diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 05:08:27 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 05:08:27 +0000 |
commit | a6d55a1df7dd680ed552bb11516bd9cfa4987eeb (patch) | |
tree | 963ff30204aa75d49afd4ab80365ce5ab54744b7 | |
parent | 57b8940af53bceb02dcbe41640d2d9e4de3c3210 (diff) | |
parent | 9ae8a3b36dc387f23a6f4b4a9d1f71c8fb4415e5 (diff) | |
download | trace-cmd-android14-mainline-os-statsd-release.tar.gz |
Snap for 10453563 from 9ae8a3b36dc387f23a6f4b4a9d1f71c8fb4415e5 to mainline-os-statsd-releaseaml_sta_341615000aml_sta_341511040aml_sta_341410000aml_sta_341311010aml_sta_341114000aml_sta_341111000aml_sta_341010020aml_sta_340912000aml_sta_340911000aml_net_341111030android14-mainline-os-statsd-release
Change-Id: I98989318693eb19f82f5baf7a194711087aa7a1f
141 files changed, 51538 insertions, 0 deletions
diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..59661532 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +.gitattributes export-ignore +.gitignore export-ignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..eb1b0dbe --- /dev/null +++ b/.gitignore @@ -0,0 +1,29 @@ +*.o +*.so +*.a +*.dat +*.data +*.patch +.*.d +*.orig +*.rej +.pc +*~ +*.pyc +*.swp +\#*\# +patches/ +tc_version.h +ks_version.h +ctracecmd_wrap.c +ctracecmdgui_wrap.c +tags +TAGS +cscope* +trace_python_dir +tracecmd_plugin_dir +libtracecmd.pc +build_prefix +build_install +build_libs_install +ltc_version.h diff --git a/Android.bp b/Android.bp new file mode 100644 index 00000000..112786b5 --- /dev/null +++ b/Android.bp @@ -0,0 +1,160 @@ +package { + default_applicable_licenses: ["external_trace-cmd_license"], +} + +// See: http://go/android-license-faq +license { + name: "external_trace-cmd_license", + + visibility: [":__subpackages__"], + + license_kinds: [ + "SPDX-license-identifier-GPL-2.0", + "SPDX-license-identifier-GPL-2.0-only", + "SPDX-license-identifier-GPL-2.0+", + "SPDX-license-identifier-GPL-2.0-or-later", + "SPDX-license-identifier-LGPL-2.1+", + ], + + license_text: [ + "LICENSE", + ], +} + +genrule { + name: "tc_version_header", + srcs: ["Makefile"], + out: ["tc_version.h"], + cmd: "(" + + "VERSION=$$(grep '\\bTC_VERSION =' <$(in) | awk '{ print $$3 }') " + + "&& PATCHLEVEL=$$(grep '\\bTC_PATCHLEVEL =' <$(in) | awk '{ print $$3 }') " + + "&& VERSION_CODE=$$(expr $${VERSION} \\* 256 + $${PATCHLEVEL}) " + + "&& EXTRAVERSION=$$(grep '\\bTC_EXTRAVERSION =' <$(in) | awk '{ print $$3 }') " + + "&& echo '/* This file is automatically generated. Do not modify */' " + + "&& echo \"#define VERSION_CODE $${VERSION_CODE}\" " + + "&& echo \"#define EXTRAVERSION $${EXTRAVERSION}\" " + + "&& echo '#define VERSION_STRING \"'$${VERSION}.$${PATCHLEVEL}.$${EXTRAVERSION}'\"' " + + "&& echo '#define FILE_VERSION ' " + + "&& echo '#define VERSION_GIT \"not-a-git-repo\"' " + + ") > $(out)", +} + +cc_library { + name: "libtracecmd", + + // Restrict visibility due to GPL license + visibility: [ + "//external/trace-cmd:__subpackages__", + ], + + local_include_dirs: [ + "lib/trace-cmd/include/private", + "lib/trace-cmd/include", + "include/trace-cmd", + "tracecmd/include", + "include", + ], + + export_include_dirs: [ + "lib/trace-cmd/include", + ], + + srcs: [ + "lib/trace-cmd/test.c", + "lib/trace-cmd/trace-blk-hack.c", + "lib/trace-cmd/trace-compress.c", + "lib/trace-cmd/trace-compress-zlib.c", + "lib/trace-cmd/trace-filter-hash.c", + "lib/trace-cmd/trace-ftrace.c", + "lib/trace-cmd/trace-hash.c", + "lib/trace-cmd/trace-hooks.c", + "lib/trace-cmd/trace-input.c", + "lib/trace-cmd/trace-msg.c", + "lib/trace-cmd/trace-output.c", + "lib/trace-cmd/trace-perf.c", + "lib/trace-cmd/trace-plugin.c", + "lib/trace-cmd/trace-recorder.c", + "lib/trace-cmd/trace-timesync.c", + "lib/trace-cmd/trace-timesync-kvm.c", + "lib/trace-cmd/trace-timesync-ptp.c", + "lib/trace-cmd/trace-util.c", + ], + + shared: { + shared_libs: [ + "libtraceevent", + "libtracefs", + "libz", + ], + export_shared_lib_headers: [ + "libtraceevent", + "libtracefs", + "libz", + ], + }, + + static: { + static_libs: [ + "libtraceevent", + "libtracefs", + "libz", + ], + export_static_lib_headers: [ + "libtraceevent", + "libtracefs", + "libz", + ], + }, + + generated_headers: ["tc_version_header"], + + export_generated_headers: ["tc_version_header"], + + cflags: [ + "-D__bswap_64=__swap64", + "-D_GNU_SOURCE", + "-DPERF", + "-DVSOCK", + "-Wno-unused-parameter", + "-Wno-macro-redefined", + "-Wno-unused-but-set-variable", + "-Wno-user-defined-warnings", + "-Wno-visibility", + "-Wno-pointer-arith", + ], + + c_std: "gnu99", +} + +cc_binary { + name: "trace-cmd", + + local_include_dirs: [ + "lib/trace-cmd/include/private", + "include/trace-cmd", + "tracecmd/include", + "include", + ], + + srcs: ["tracecmd/*.c"], + + static_libs: [ + "libtraceevent", + "libtracecmd", + "libtracefs", + ], + + static_executable: true, + + cflags: [ + "-D_GNU_SOURCE", + "-DNO_AUDIT", + "-DVSOCK", + "-Wno-unused-parameter", + "-Wno-macro-redefined", + "-Wno-visibility", + "-Wno-pointer-arith", + ], + + c_std: "gnu99", +} diff --git a/CODING_STYLE b/CODING_STYLE new file mode 100644 index 00000000..24fb10ec --- /dev/null +++ b/CODING_STYLE @@ -0,0 +1,287 @@ + +trace-cmd coding-style +====================== + +The coding style of trace-cmd and the tracing libraries (libtracefs and +libtraceevent) are very similar to the Linux kernel coding style: + + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/coding-style.rst + +Indentation +=========== + +Tabs are used for the start of indentation (the '\t' character), and should be +set to 8 characters. Spaces may be used at the end for continued lines where +having the start of text line up to braces in the previous line is not +divisible by 8. + +Max line width +============== + +All lines should not be more than 100 characters in length. + +This is a guide, as readability is more important than breaking lines up into a +hard limit. Ideally, strings should never be broken up except for where a new +line is added. + + printf("This is a line that may continue for a very long string.\n" + "This is another line, but after a new line\n"); + +But line breaks should not be: + + printf("This is a line that may continue for a very" + "long string.\n This is another line," + "but after a new line\n"); + +Not only is the above not as readable as the first version, it is not +even equivalent, because it is missing spaces between the line breaks. +For this reason, finish the string on the same line, even if that string +breaks the 100 character limit. + +Brackets and braces +=================== + +For all conditionals, the braces start on the same line: + + if (cond) { + } + +And the ending brace is at the same indentation as the conditional. + + while (cond) { + } + + do { + } while (cond); + + for (i = 0; i < 10; i++) { + } + +The same is true for structures: + + struct my_struct { + int field; + }; + +But for functions, the braces should start on the following line: + + void my_function(void) + { + } + + +It is also fine to not use braces for simple conditionals and loops. + + if (!x) + y = x; + else + y = 1; + + for (i = 0; i < 10; i++) + foo(i); + + while (getline(&line, &size, fp) > 0) + printf("%s", line); + +But any complex or multiline conditional or loop should have braces even if it +is allowed not to by the C language. + + if (x) { + for (i = 0; i < 10; i++) + foo(i); + } else { + foo(1); + } + +Notice above that even though the else portion is simple, it too has braces as +the else and if blocks should match. If one is required to have braces, they +both should have braces. + + +Spaces +====== + +A single space should be used between C commands and their starting +parenthesis. + + if (x) + for (i = 0; i < 10; i++) + while (getline(&line, &size, fp) > 0) + +There should be no space between function or macros and the starting +parenthesis. + + foo(x) + IS_VALID(y) + +This includes prototypes and declarations. + + void foo(int x) + +A space should be before and after assignment, comparison and algorithmic +signs. + + i = 0; + if (i < 10) + if (i == 5) + + y = i + 10; + + i += 5; + +For structures, use tabs to make all the fields line up nicely. + + struct { + int foo; + int bar; + unsigned long long time; + }; + +Variable declarations +===================== + +The order of variables that are declared, should first keep the same types +together, but also should be ordered by their length such that the variables +are ordered in an "upside-down Christmas tree" fashion where the length gets +smaller. + + int tracecmd_count_cpus(void) + { + static int once; + char buf[1024]; + int cpus = 0; + char *pbuf; + size_t *pn; + FILE *fp; + size_t n; + int r; + +The above shows that the order is done by length, and in the above example it +also shows that "int cpu = 0;" is not grouped next to "int r;". As this is more +of a guideline and made to be more aesthetic to the eye of the reader, both the +above and is acceptable as below. + + int tracecmd_count_cpus(void) + { + static int once; + char buf[1024]; + char *pbuf; + size_t *pn; + FILE *fp; + size_t n; + int cpus = 0; + int r; + + +Unless variables are tightly related, it is expected that each variable be on +its own line and not grouped by type. That is, + + int r, cpus = 0; + +is to be discouraged, as the two variables are not related to each other. +But if you had a bunch of counters: + + int i, j, k; + +That would be fine, as the variables are all related as they are all for the +same purpose (arbitrary counters). The same may go with pointers; + + + char *begin, *end; + +Comments +======== + +Comments will use the "/* */" format and the C++ "//" style is discouraged. +If a comment is on one line, keep the "/*" and "*/" on the same line: + + /* This is a single line comment. */ + +If a comment spans more than one line, then have the "/*" on a separate line +before the comment and the "*/" on a separate line at the end of the comment, +and each line starts with a "*" where all the "*" line up with each other. + + /* + * This is a multi line comment, where all the '*' + * will line up, and the text is on a separate line + * as the start and end markers. + */ + + +Function documentation +====================== + +All global functions (and especially any APIs) should have a function +description in the form of "kernel doc": + + https://www.kernel.org/doc/html/latest/doc-guide/kernel-doc.html + +The form is: + + /** + * function_name() - Brief description of function. + * @arg1: Describe the first argument. + * @arg2: Describe the second argument. + * One can provide multiple line descriptions + * for arguments. + * + * A longer description, with more discussion of the function function_name() + * that might be useful to those using or modifying it. Begins with an + * empty comment line, and may include additional embedded empty + * comment lines. + * + * The longer description may have multiple paragraphs. + * + * Context: Describes whether the function can sleep, what locks it takes, + * releases, or expects to be held. It can extend over multiple + * lines. + * Return: Describe the return value of function_name. + * + * The return value description can also have multiple paragraphs, and should + * be placed at the end of the comment block. + */ + +Structure layout +================ + +This is more about compaction than coding style. When creating structures, be +aware that if the fields are placed together without being sized by alignment, +that the compiler will create "holes" in them. + + struct { + int x; + char y; + unsigned long long f; + }; + +As int is 4 bytes in length, char is one byte, and unsigned long long is 8 +bytes. The compiler will try to naturally align them by their size, and will +include padding (holes) inside the structure to do so. The above is equivalent +to: + + struct { + int x; + char y; + char padding[3]; + unsigned long long f; + }; + +It is best to try to organize the structure where there are no holes within +them. + + struct { + unsigned long long f; + int x; + char y; + }; + +The above is better formatting, even if there may be padding outside the +structure, but the compiler will still have more flexibility to utilize the +space outside the structure than what it can do within it. + +General +======= + +As stated, this is a guide and may not be strictly enforced. The goal is to +have consistent and readable code. In general, try to have the coding style +match the surrounding code. diff --git a/CONTRIBUTE b/CONTRIBUTE new file mode 100644 index 00000000..0440b186 --- /dev/null +++ b/CONTRIBUTE @@ -0,0 +1,103 @@ +If you like to become part of the community and submit patches, here's how +to do so for trace-cmd. + +If you only want to report a bug, or suggest an enhancement, you may do +so at: + + https://bugzilla.kernel.org/buglist.cgi?component=Trace-cmd%2FKernelshark + +All development is done via a mailing list: + + http://vger.kernel.org/vger-lists.html#linux-trace-devel + +Patches should be sent to linux-trace-devel@vger.kernel.org + +Start by cloning the official repository: + + git clone git://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git + +Make your changes. When you are satisfied with them, commit them into git. +Here's some helpful hints for your git commits. + +1) When making changes, please follow the coding style defined by the file + called CODING_STYLE in this directory. + +2) Every commit should only do one thing. + That is, if your work requires some cleaning up of code, do that + clean up as a separate commit and not with your functional changes. + Find ways to take "steps" in modifying code. If you can break up + your changes in a series of steps, do so. + +3) The commit log should start with a title. Like the below + + trace-cmd: Add CONTRIBUTE file + + Even though this repo is for trace-cmd, start the topic with + "trace-cmd:" because the commits will end up as patches to a mailing + list that handles other tracing repos, differentiating them with the subject + is useful. You can be more specific as well. If the change only affects the + "record" command, you may start the title with "trace-cmd record:". + +4) The body of the commit (with a blank line from the title), should be self + contained, and explain why you are making the change. The title should hold + the "what" is changing, but the body contains the rationale for the change. + It should be a stand alone, and not state things like "See the next patch", + because when it is in git history, there's no knowing what the next patch + is. You can make statements like "This is needed for a <future-feature> + that will come later". Where "<future-feature>" is something that you are + working on and the current commit is one of the steps required to get there. + +5) Add your Developer Certificate of Origin (DCO) at the bottom of the commit + log. That is "Signed-off-by: Full Name <email>" where your full name is your + real name (no pseudonyms). Optionally, if you are making the change on + behalf of your company, you may also add your company name, if you are not + using your company's email. "Signed-off-by: Full Name (Company) <email>". + Please note, the DCO is your statement that you have the legal right to + make these changes for the project you are submitting to. + +You can use the Linux kernel "checkpatch.pl" script to help verify the formatting +of your patch: + + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/checkpatch.pl + +Please note that checkpatch.pl is a guide and not a hard rule. If it reports a +fix that makes the code harder to read, that fix can probably be ignored. + + git format-patch --stdout HEAD~1..HEAD | ./checkpatch.pl + +Finally, you can use the git "send-email" functionality: + + git send-email --from='<your-email> --to='linux-trace-devel@vger.kernel.org' HEAD~1..HEAD + +If you are sending one patch, if you are adding more than one patch, also include +a cover letter: + + git send-email --cover-letter --annotate --from='<your-email> --to='linux-trace-devel@vger.kernel.org' <first-commit>~1..HEAD + +If you receive feedback on your patches, and plan on sending another version, +please use the '-v' option to mark your patches that they are a new version. +For example, if you add "-v2" to the above commands, instead of having: +"[PATCH]" in the subject, it will have "[PATCH v2]", letting the reviewers know +that this is a new version. If you send another version, use "-v3" and so on. + +For more information about git send-email: + + https://git-scm.com/docs/git-send-email + +To keep track of the status of patches that have been submitted, check out: + + https://patchwork.kernel.org/project/linux-trace-devel/list/ + +If you would like to apply patches from the mailing list, you can use +the "b4" utility. + + $ pip install b4 + +Then from the mailing list archive, find a message id from a patch or patch +series. For example, to get the patch from: + + https://lore.kernel.org/linux-trace-devel/20210205173713.132051-1-tz.stoyanov@gmail.com/ + + $ b4 am -o - 20210205173713.132051-1-tz.stoyanov@gmail.com > /tmp/p.mbox + $ git am /tmp/p.mbox + diff --git a/COPYING b/COPYING new file mode 100644 index 00000000..9d46b791 --- /dev/null +++ b/COPYING @@ -0,0 +1,4 @@ +There are two main licenses that the tools in this directory are covered +under. For the applications themselves, they are covered under GPL-2.0 (see +LICENSES/GPL-2.0). As for the exported headers and libraries, they are covered +under LPGL-2.1 (see LICENSES/LGPL-2.1). diff --git a/COPYING.LIB b/COPYING.LIB new file mode 100644 index 00000000..da52742b --- /dev/null +++ b/COPYING.LIB @@ -0,0 +1,510 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. +^L + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. +^L + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. +^L + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. +^L + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. +^L + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. +^L + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. +^L + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS +^L + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + @@ -0,0 +1,47 @@ + +(Copied from the Linux Kernel's Documentation/process/submitting-patches.rst) + +Sign your work - the Developer's Certificate of Origin +------------------------------------------------------ + +The sign-off is a simple line at the end of the explanation for the +patch, which certifies that you wrote it or otherwise have the right to +pass it on as an open-source patch. The rules are pretty simple: if you +can certify the below: + +Developer's Certificate of Origin 1.1 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By making a contribution to this project, I certify that: + + (a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + + (b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + + (c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + + (d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. + +then you just add a line saying:: + + Signed-off-by: Random J Developer <random@developer.example.org> + +using your real name (sorry, no pseudonyms or anonymous contributions.) + +Some people also put extra tags at the end. They'll just be ignored for +now, but you can do this to mark internal company procedures or just +point out some special detail about the sign-off. diff --git a/Documentation/.gitignore b/Documentation/.gitignore new file mode 100644 index 00000000..8a38b2ea --- /dev/null +++ b/Documentation/.gitignore @@ -0,0 +1,3 @@ +*.[1-9] +*.m +*.html diff --git a/Documentation/Makefile b/Documentation/Makefile new file mode 100644 index 00000000..ec364916 --- /dev/null +++ b/Documentation/Makefile @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: GPL-2.0 + +doc_dir:=$(src)/Documentation + +export doc_dir + +SUBDIR += trace-cmd +SUBDIR += libtracecmd + +.PHONY: $(SUBDIR) + +DOCDIR = $(src)/Documentation +ASCIIDOC=asciidoc +ASCIIDOC_CONF = $(DOCDIR)/asciidoc.conf +ASCIIDOC_EXTRA = --unsafe -f $(ASCIIDOC_CONF) +ASCIIDOC_HTML = xhtml11 +MANPAGE_XSL = $(DOCDIR)/manpage-normal.xsl +XMLTO_EXTRA = +INSTALL?=install +RM ?= rm -f + +ASCIIDOC_INSTALLED := $(shell command -v $(ASCIIDOC) 2> /dev/null) +ifndef ASCIIDOC_INSTALLED + missing_tools += $(ASCIIDOC) +endif + +XMLTO=xmlto +XMLTO_INSTALLED := $(shell command -v $(XMLTO) 2> /dev/null) +ifndef XMLTO_INSTALLED + missing_tools += $(XMLTO) +endif + +# +# For asciidoc ... +# -7.1.2, no extra settings are needed. +# 8.0-, set ASCIIDOC8. +# + +# +# For docbook-xsl ... +# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0) +# 1.69.0, no extra settings are needed? +# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP? +# 1.71.1, no extra settings are needed? +# 1.72.0, set DOCBOOK_XSL_172. +# 1.73.0-, set ASCIIDOC_NO_ROFF +# + +# +# If you had been using DOCBOOK_XSL_172 in an attempt to get rid +# of 'the ".ft C" problem' in your generated manpages, and you +# instead ended up with weird characters around callouts, try +# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8). +# + +ifdef ASCIIDOC8 +ASCIIDOC_EXTRA += -a asciidoc7compatible +endif +ifdef DOCBOOK_XSL_172 +ASCIIDOC_EXTRA += -a libtracecmd-asciidoc-no-roff +MANPAGE_XSL = $(DOCDIR)/manpage-1.72.xsl +else + ifdef ASCIIDOC_NO_ROFF + # docbook-xsl after 1.72 needs the regular XSL, but will not + # pass-thru raw roff codes from asciidoc.conf, so turn them off. + ASCIIDOC_EXTRA += -a libtracecmd-asciidoc-no-roff + endif +endif +ifdef MAN_BOLD_LITERAL +XMLTO_EXTRA += -m $(DOCDIR)/manpage-bold-literal.xsl +endif +ifdef DOCBOOK_SUPPRESS_SP +XMLTO_EXTRA += -m $(DOCDIR)/manpage-suppress-sp.xsl +endif + +ifdef USE_ASCIIDOCTOR +ASCIIDOC = asciidoctor +ASCIIDOC_EXTRA = -a compat-mode +ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions +ASCIIDOC_HTML = xhtml5 +endif + +ifneq ($(findstring $(MAKEFLAGS),w),w) +PRINT_DIR = --no-print-directory +else # "make -w" +NO_SUBDIR = : +endif + +export ASCIIDOC ASCIIDOC_CONF ASCIIDOC_EXTRA ASCIIDOC_HTML +export MANPAGE_XSL +export XMLTO XMLTO_INSTALLED XMLTO_EXTRA +export missing_tools +export RM + +all: $(SUBDIR) +clean: $(SUBDIR) +install: $(SUBDIR) + +$(SUBDIR): + make -C $@ $(MAKECMDGOALS) + diff --git a/Documentation/README.PythonPlugin b/Documentation/README.PythonPlugin new file mode 100644 index 00000000..3de05647 --- /dev/null +++ b/Documentation/README.PythonPlugin @@ -0,0 +1,127 @@ + PYTHON PLUGIN DOCUMENTATION +============================= + +With the python plugin (make python-plugin) you can now +write plugins in python. The API exported by the python +plugin itself (written in C) allows you to access most +information about a record from python. + +To write a python plugin, put a new .py file into a new +~/.trace-cmd/python/ directory. + +The most basic python plugin is this: + +--- %< --- +def register(pevent): + pass +--- >% --- + +which obviously does nothing at all. + +To register a callback, use the pevent.register_event_handler +function: + +--- %< --- +import tracecmd + +def my_event_handler(trace_seq, event): + pass + +def register(pevent): + pevent.register_event_handler("subsys", "event_name", + my_event_handler) +--- >% --- + + +There are four object types that you get, described below. + + tracecmd.PEvent +----------------- + +This is the class of the 'pevent' object above, +you get one of those via your register callback. +It has one method and one property: + * register_event_handler() - example above, to register + an event handler function + * file_endian - either '<' or '>' indicating + which endianness the file has, + to be used with struct.unpack() + + tracecmd.TraceSeq +------------------- + +This is the class of the 'trace_seq' parameter to your callback +function. It has only one method, puts(), to put data into the +buffer. Formatting must be done in python. + + tracecmd.Event +---------------------- + +This is the class of the 'event' parameter to your callback +function. Note that it doesn't just contain the format, but +also the event data. As such, you can do much with this, and +this is what you'll usually use. Each instance of this allows +access to record items via the dict protocol, and you can get +the items via its keys() methods. So for example, your +callback could be + +--- %< --- +def my_callback(trace_seq, event): + for fieldname in event.keys(): + field = event[fieldname] +--- >% --- + +Each field returned from the dict protocol is an instance of +the next (and last) class: + + tracecmd.Field +---------------------- + +This is an instance of a field, including its data. It affords +numerous use cases and is what you'll be using most. + + * If this is an integer field, i.e. 1, 2, 4 or 8 bytes long, + you can convert it to the number contained, according to + the file's endianness, by simply casting it to a long: + + field = event['myint'] + value = long(field) + + * You can access the field's data, as field.data, and if the + data is really a "__data_loc" type that will be resolved + automatically. (If you don't know what this means, don't + worry about it and just use field.data) + + +This is it. It's pretty simple. A fully-featured plugin could +look like this: + +--- %< --- +def my_event_handler(trace_seq, event): + trace_seq.puts("myev: %u", long(event['myfield'])) + +def register(pevent): + pevent.register_event_handler("subsys", "event_name", + my_event_handler) +--- >% --- + + + Tips and tricks +----------------- + +Be familiar with the struct module and use it, always +checking endianness and potentially using pevent.file_endian. + + +If you need access to pevent in your callbacks, simply +pass it in yourself: + +--- %< --- +def my_event_handler(pevent, trace_seq, event): + pass + +def register(pevent): + pevent.register_event_handler("subsys", "event_name", + lambda *args: my_event_handler(pevent, *args) + ) +--- >% --- diff --git a/Documentation/asciidoc.conf b/Documentation/asciidoc.conf new file mode 100644 index 00000000..c15aa13b --- /dev/null +++ b/Documentation/asciidoc.conf @@ -0,0 +1,120 @@ +## linktep: macro +# +# Usage: linktep:command[manpage-section] +# +# Note, {0} is the manpage section, while {target} is the command. +# +# Show TEP link as: <command>(<section>); if section is defined, else just show +# the command. + +[macros] +(?su)[\\]?(?P<name>linktep):(?P<target>\S*?)\[(?P<attrlist>.*?)\]= + +[attributes] +asterisk=* +plus=+ +caret=^ +startsb=[ +endsb=] +tilde=~ + +ifdef::backend-docbook[] +[linktep-inlinemacro] +{0%{target}} +{0#<citerefentry>} +{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>} +{0#</citerefentry>} +endif::backend-docbook[] + +ifdef::backend-docbook[] +ifndef::tep-asciidoc-no-roff[] +# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this. +# v1.72 breaks with this because it replaces dots not in roff requests. +[listingblock] +<example><title>{title}</title> +<literallayout> +ifdef::doctype-manpage[] + .ft C +endif::doctype-manpage[] +| +ifdef::doctype-manpage[] + .ft +endif::doctype-manpage[] +</literallayout> +{title#}</example> +endif::tep-asciidoc-no-roff[] + +ifdef::tep-asciidoc-no-roff[] +ifdef::doctype-manpage[] +# The following two small workarounds insert a simple paragraph after screen +[listingblock] +<example><title>{title}</title> +<literallayout> +| +</literallayout><simpara></simpara> +{title#}</example> + +[verseblock] +<formalpara{id? id="{id}"}><title>{title}</title><para> +{title%}<literallayout{id? id="{id}"}> +{title#}<literallayout> +| +</literallayout> +{title#}</para></formalpara> +{title%}<simpara></simpara> +endif::doctype-manpage[] +endif::tep-asciidoc-no-roff[] +endif::backend-docbook[] + +ifdef::doctype-manpage[] +ifdef::backend-docbook[] +[header] +template::[header-declarations] +<refentry> +<refmeta> +<refentrytitle>{mantitle}</refentrytitle> +<manvolnum>{manvolnum}</manvolnum> +<refmiscinfo class="source">libtracefs</refmiscinfo> +<refmiscinfo class="version">{libtracefs_version}</refmiscinfo> +<refmiscinfo class="manual">libtracefs Manual</refmiscinfo> +</refmeta> +<refnamediv> + <refname>{manname1}</refname> + <refname>{manname2}</refname> + <refname>{manname3}</refname> + <refname>{manname4}</refname> + <refname>{manname5}</refname> + <refname>{manname6}</refname> + <refname>{manname7}</refname> + <refname>{manname8}</refname> + <refname>{manname9}</refname> + <refname>{manname10}</refname> + <refname>{manname11}</refname> + <refname>{manname12}</refname> + <refname>{manname13}</refname> + <refname>{manname14}</refname> + <refname>{manname15}</refname> + <refname>{manname16}</refname> + <refname>{manname17}</refname> + <refname>{manname18}</refname> + <refname>{manname19}</refname> + <refname>{manname20}</refname> + <refname>{manname21}</refname> + <refname>{manname22}</refname> + <refname>{manname23}</refname> + <refname>{manname24}</refname> + <refname>{manname25}</refname> + <refname>{manname26}</refname> + <refname>{manname27}</refname> + <refname>{manname28}</refname> + <refname>{manname29}</refname> + <refname>{manname30}</refname> + <refpurpose>{manpurpose}</refpurpose> +</refnamediv> +endif::backend-docbook[] +endif::doctype-manpage[] + +ifdef::backend-xhtml11[] +[linktep-inlinemacro] +<a href="{target}.html">{target}{0?({0})}</a> +endif::backend-xhtml11[] diff --git a/Documentation/libtracecmd/Makefile b/Documentation/libtracecmd/Makefile new file mode 100644 index 00000000..48334525 --- /dev/null +++ b/Documentation/libtracecmd/Makefile @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: GPL-2.0 + +# Include the utils +include $(src)/scripts/utils.mk + +# This Makefile and manpage XSL files were taken from libtracefs +# and modified for libtracecmd + +MAN3_TXT= \ + $(wildcard libtracecmd-*.txt) \ + libtracecmd.txt + +MAN_TXT = $(MAN3_TXT) +_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) +_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) +_DOC_MAN3=$(patsubst %.txt,%.m,$(MAN3_TXT)) + +MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML)) +MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML)) +DOC_MAN3=$(addprefix $(OUTPUT),$(_DOC_MAN3)) + +# Make the path relative to DESTDIR, not prefix +ifndef DESTDIR +prefix?=$(HOME) +endif +bindir?=$(prefix)/bin +htmldir?=$(prefix)/share/doc/libtracecmd-doc +pdfdir?=$(prefix)/share/doc/libtracecmd-doc +mandir?=$(prefix)/share/man +man3dir=$(mandir)/man3 + +ifdef USE_ASCIIDOCTOR +ASCIIDOC_EXTRA += -a mansource="libtracecmd" -a manmanual="libtracecmd Manual" +endif + +all: check-man-tools html man + +man: man3 +man3: $(DOC_MAN3) + +html: $(MAN_HTML) + +$(MAN_HTML) $(DOC_MAN3): $(ASCIIDOC_CONF) + +install: check-man-tools install-man install-html + +check-man-tools: +ifdef missing_tools + $(error "You need to install $(missing_tools) for man pages") +endif + +install-%.3: $(OUTPUT)%.3 + $(Q)$(call do_install_docs,$<,$(man3dir),644); + +do-install-man: man $(addprefix install-,$(wildcard $(OUTPUT)*.3)) + +install-man: man + $(Q)$(MAKE) -C . do-install-man + +install-%.txt: $(OUTPUT)%.html + $(Q)$(call do_install_docs,$<,$(htmldir),644); + +do-install-html: html $(addprefix install-,$(wildcard *.txt)) + +install-html: html do-install-html + +uninstall: uninstall-man uninstall-html + +uninstall-man: + $(Q)$(RM) $(addprefix $(DESTDIR)$(man3dir)/,$(DOC_MAN3)) + +uninstall-html: + $(Q)$(RM) $(addprefix $(DESTDIR)$(htmldir)/,$(MAN_HTML)) + +ifdef missing_tools + DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed) +else + DO_INSTALL_MAN = do-install-man +endif + +CLEAN_FILES = \ + $(MAN_XML) $(addsuffix +,$(MAN_XML)) \ + $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \ + $(DOC_MAN3) *.3 *.m + +clean: + $(Q) $(RM) $(CLEAN_FILES) + +ifdef USE_ASCIIDOCTOR +$(OUTPUT)%.m : $(OUTPUT)%.txt + $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + $(ASCIIDOC) -b manpage -d manpage \ + $(ASCIIDOC_EXTRA) -alibtracecmd_version=$(LIBTRACECMD_VERSION) -o $@+ $< && \ + mv $@+ $@ +endif + +$(OUTPUT)%.m : $(OUTPUT)%.xml + $(QUIET_XMLTO)$(RM) $@ && \ + $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<; \ + touch $@ + +$(OUTPUT)%.xml : %.txt + $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + $(ASCIIDOC) -b docbook -d manpage \ + $(ASCIIDOC_EXTRA) -alibtracecmd_version=$(LIBTRACECMD_VERSION) -o $@+ $< && \ + mv $@+ $@ + +$(MAN_HTML): $(OUTPUT)%.html : %.txt + $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \ + $(ASCIIDOC_EXTRA) -alibtracecmd_version=$(LIBTRACECMD_VERSION) -o $@+ $< && \ + mv $@+ $@ diff --git a/Documentation/libtracecmd/libtracecmd-files.txt b/Documentation/libtracecmd/libtracecmd-files.txt new file mode 100644 index 00000000..2de5d6df --- /dev/null +++ b/Documentation/libtracecmd/libtracecmd-files.txt @@ -0,0 +1,169 @@ +libtracecmd(3) +============= + +NAME +---- +tracecmd_open, tracecmd_open_fd, tracecmd_open_head, tracecmd_init_data, +tracecmd_close - Open and close a trace file. + +SYNOPSIS +-------- +[verse] +-- +*#include <trace-cmd.h>* + +struct tracecmd_input pass:[*]*tracecmd_open*(const char pass:[*]_file_, int _flags_); +struct tracecmd_input pass:[*]*tracecmd_open_fd*(int _fd_, int _flags_); +struct tracecmd_input pass:[*]*tracecmd_open_head*(const char pass:[*]_file_, int _flags_); +int *tracecmd_init_data*(struct tracecmd_input pass:[*]_handle_); +void *tracecmd_close*(struct tracecmd_input pass:[*]_handle_); +-- + +DESCRIPTION +----------- +This set of APIs can be used to open and close a trace file recorded by +_trace-cmd(1)_ and containing tracing information from ftrace, the official +Linux kernel tracer. The opened file is represented by a _tracecmd_input_ +structure, all other library APIs that work with the file require a pointer +to the structure. The APIs for opening a trace file have a _flag_ input +parameter, which controls how the file will be opened and parsed. The _flag_ +is a combination of these options: + + TRACECMD_FL_LOAD_NO_PLUGINS - Do not load any plugins + TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS - Do not load system wide plugins, load only "local only" + plugins from user's home directory. + +The _tracecmd_open()_ function opens a given trace _file_, parses the +metadata headers from the file, allocates and initializes а _tracecmd_input_ +handler structure representing the file. It also initializes the handler +for reading trace data from the file. The returned handler is ready to be +used with _tracecmd_read__ APIs. + +The _tracecmd_open_fd()_ function does the same as _tracecmd_open()_, but +works with a file descriptor to a trace file, opened for reading. + +The _tracecmd_open_head()_ function is the same as _tracecmd_open()_, but +does not initialize the handler for reading trace data. It reads and parses +the metadata headers only. The _tracecmd_init_data()_ should be used before +using the _tracecmd_read__ APIs. + +The _tracecmd_init_data()_ function initializes a _handle_, allocated with +_tracecmd_open_head()_, for reading trace data from the file associated with +it. This API must be called before any of the _tracecmd_read__ APIs. + +The _tracecmd_close()_ function frees a _handle_, pointer to tracecmd_input +structure, previously allocated with _tracecmd_open()_, _tracecmd_open_fd()_ +or _tracecmd_open_head()_ APIs. + +RETURN VALUE +------------ +The _tracecmd_open()_, _tracecmd_open_fd()_ and _tracecmd_open_head()_ +functions return a pointer to tracecmd_input structure or NULL in case of +an error. The returned structure must be free with _tracecmd_close()_. +Note that if _tracecmd_open_fd()_ is used to allocate a tracecmd_input handler, +when _tracecmd_close()_ is called to close it, that fd will be closed also. + +The _tracecmd_init_data()_ function returns -1 in case of an error or +0 otherwise. + +EXAMPLE +------- +[source,c] +-- +The are two different use patterns for opening and reading trace data from +a trace file, which can be used depending on the use case. + +1. Open and initialise the trace file in а single step: + +#include <trace-cmd.h> +... +struct tracecmd_input *handle = tracecmd_open("trace.dat"); + if (!handle) { + /* Failed to open trace.dat file */ + } +... + /* Read tracing data from the file, using the handle */ +... + tracecmd_close(handle); +... +int fd; + fd = = open("trace.dat", O_RDONLY); + if (fd < 0) { + /* Failed to open trace file for reading */ + } + handle = tracecmd_open_fd(fd); + if (!handle) { + close(fd); + /* Failed to initialise handler for reading the trace file */ + } +... + /* Read tracing data from the file, using the handle */ +... + tracecmd_close(handle); +... + +2. Open and initialise the trace file in two steps. This allows to perform +some processing based on metadata, read from the file, before initialising +the trace data for reading. Example for such use case is when opening multiple +trace files recorded in a same trace session. In that case timestamps of all +trace events must be adjusted based on the information from the file's metadata +and before reading the trace data. + +#include <trace-cmd.h> +... +struct tracecmd_input *handle = tracecmd_open_head("trace.dat"); + if (!handle) { + /* Failed to open trace.dat file */ + } +... + /* do some processing, before initialising the trace data for reading */ +... + if (tracecmd_init_data(handle) < 0) { + /* Failed to initialize hadle for reading the trace data */ + } +... + /* Read tracing data from the file, using the handle */ +... + tracecmd_close(handle); +... +-- +FILES +----- +[verse] +-- +*trace-cmd.h* + Header file to include in order to have access to the library APIs. +*-ltracecmd* + Linker switch to add when building a program that uses the library. +-- + +SEE ALSO +-------- +_libtracefs(3)_, +_libtraceevent(3)_, +_trace-cmd(1)_ +_trace-cmd.dat(5)_ + +AUTHOR +------ +[verse] +-- +*Steven Rostedt* <rostedt@goodmis.org> +*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com> +-- +REPORTING BUGS +-------------- +Report bugs to <linux-trace-devel@vger.kernel.org> + +LICENSE +------- +libtracecmd is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/libtracecmd/libtracecmd-instances.txt b/Documentation/libtracecmd/libtracecmd-instances.txt new file mode 100644 index 00000000..df8fdc4e --- /dev/null +++ b/Documentation/libtracecmd/libtracecmd-instances.txt @@ -0,0 +1,129 @@ +libtracecmd(3) +============= + +NAME +---- +tracecmd_buffer_instances, tracecmd_buffer_instance_name, tracecmd_buffer_instance_handle +- Read tracing instances from a trace file. + +SYNOPSIS +-------- +[verse] +-- +*#include <trace-cmd.h>* + +int *tracecmd_buffer_instances*(struct tracecmd_input pass:[*]_handle_); +const char pass:[*]*tracecmd_buffer_instance_name*(struct tracecmd_input pass:[*]_handle_, int _indx_); +struct tracecmd_input pass:[*]*tracecmd_buffer_instance_handle*(struct tracecmd_input pass:[*]_handle_, int _indx_); +-- + +DESCRIPTION +----------- +This set of APIs can be used to get information and read tracing data +from tracing instances stored in a trace file. + +The _tracecmd_buffer_instances()_ function gets the number of tracing +instances recorded in a trace file. The top instance is not counted. +The _handle_ is a tracecmd_input handler returned by +_tracecmd_open_head()_. + +The _tracecmd_buffer_instance_name()_ function gets the name of the +tracing instance with given index _indx_, recorded in a trace file. +The _indx_ is a number in the interval [0 .. count-1], where count +is the number returned by _tracecmd_buffer_instances()_. The _handle_ +is a tracecmd_input handler returned by _tracecmd_open_head()_. + +The _tracecmd_buffer_instance_handle()_ allocates and initializes a +tracecmd_input handle, associated with trace instance with index +_indx_ from a trace file. The _handle_ is a tracecmd_input handler +returned by _tracecmd_open_head()_. The _indx_ is a number in the +interval [0 .. count-1], where count is the number returned by +_tracecmd_buffer_instances()_. + +RETURN VALUE +------------ +The _tracecmd_buffer_instances()_ function returns the number of tracing +instances recorded in a trace file. + +The _tracecmd_buffer_instance_name()_ function returns a string, the name +of a tracing instance, or NULL in case of an error The string must *not* +be freed. + +The _tracecmd_buffer_instance_handle()_ function returns a pointer to +newly allocated tracecmd_input handler or NULL in case if an error. The +returned handler must be closed by _tracecmd_close()(3)_ + +EXAMPLE +------- +[source,c] +-- +#include <trace-cmd.h> +... +struct tracecmd_input *handle = tracecmd_open_head("trace.dat"); + if (!handle) { + /* Failed to open trace.dat file */ + } +... +int num = tracecmd_buffer_instances(handle); + + while(num) { + struct tracecmd_input *h; + char *name; + + name = tracecmd_buffer_instance_name(handle, num); + if (!name) { + /* Failed to get name of instance num */ + } + h = tracecmd_buffer_instance_handle(handle, num); + if (!h) { + /* Failed to initialize handler for instance num */ + } + + ... + tracecmd_close(h); + num--; + } +... + tracecmd_close(handle); + +-- +FILES +----- +[verse] +-- +*trace-cmd.h* + Header file to include in order to have access to the library APIs. +*-ltracecmd* + Linker switch to add when building a program that uses the library. +-- + +SEE ALSO +-------- +_libtracefs(3)_, +_libtraceevent(3)_, +_trace-cmd(1)_ +_trace-cmd.dat(5)_ + +AUTHOR +------ +[verse] +-- +*Steven Rostedt* <rostedt@goodmis.org> +*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com> +-- +REPORTING BUGS +-------------- +Report bugs to <linux-trace-devel@vger.kernel.org> + +LICENSE +------- +libtracecmd is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/libtracecmd/libtracecmd-log.txt b/Documentation/libtracecmd/libtracecmd-log.txt new file mode 100644 index 00000000..de5c2776 --- /dev/null +++ b/Documentation/libtracecmd/libtracecmd-log.txt @@ -0,0 +1,78 @@ +libtracecmd(3) +============= + +NAME +---- +tracecmd_set_loglevel - Set log level of the library + +SYNOPSIS +-------- +[verse] +-- +*#include <trace-cmd.h>* + +int *tracecmd_set_loglevel*(enum tep_loglevel _level_); +-- + +DESCRIPTION +----------- +The _tracecmd_set_loglevel()_ function sets the level of the library logs that will be printed on +the console. See _libtraceevent(3)_ for detailed desciription of the log levels. Setting the log +level to specific value means that logs from the previous levels will be printed too. For example +_TEP_LOG_WARNING_ will print any logs with severity _TEP_LOG_WARNING_, _TEP_LOG_ERROR_ and +_TEP_LOG_CRITICAL_. The default log level is _TEP_LOG_CRITICAL_. When a new level is set, it is +also propagated to the libtracefs and libtraceevent. + +EXAMPLE +------- +[source,c] +-- +#include <trace-cmd.h> +... +tracecmd_set_loglevel(TEP_LOG_ALL); +... +/* call libtracecmd, libtracefs or libtraceevent APIs and observe any logs they produce */ +... +tracecmd_set_loglevel(TEP_LOG_CRITICAL); +-- + +FILES +----- +[verse] +-- +*trace-cmd.h* + Header file to include in order to have access to the library APIs. +*-ltracecmd* + Linker switch to add when building a program that uses the library. +-- + +SEE ALSO +-------- +_libtracefs(3)_, +_libtraceevent(3)_, +_trace-cmd(1)_ +_trace-cmd.dat(5)_ + +AUTHOR +------ +[verse] +-- +*Steven Rostedt* <rostedt@goodmis.org> +*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com> +-- +REPORTING BUGS +-------------- +Report bugs to <linux-trace-devel@vger.kernel.org> + +LICENSE +------- +libtracecmd is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2021 VMware, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/libtracecmd/libtracecmd-peer.txt b/Documentation/libtracecmd/libtracecmd-peer.txt new file mode 100644 index 00000000..2e3232c6 --- /dev/null +++ b/Documentation/libtracecmd/libtracecmd-peer.txt @@ -0,0 +1,137 @@ +libtracecmd(3) +============= + +NAME +---- +tracecmd_get_traceid, tracecmd_get_guest_cpumap - Manage trace session with multiple trace peers, +recorded in multiple trace files. + +SYNOPSIS +-------- +[verse] +-- +*#include <trace-cmd.h>* + +unsigned long long *tracecmd_get_traceid*(struct tracecmd_input pass:[*]_handle_); +int *tracecmd_get_guest_cpumap*(struct tracecmd_input pass:[*]_handle_, unsigned long long _trace_id_, const char pass:[*]pass:[*]_name_, int pass:[*]_vcpu_count_, const int pass:[*]pass:[*]_cpu_pid_); +-- + +DESCRIPTION +----------- +This set of APIs can be used to manage a trace session with multiple trace +peers, for example, tracing both a host and one or more guest virtual machines. +The trace data of each peer from the session is recorded in separate trace files. +Information about peers from the session is stored in the metadata of each +trace file. These APIs use that information to extract and synchronize +the trace data. + +The _tracecmd_get_traceid()_ function returns the trace ID stored in +the trace file metadata associated with _handle_. Each peer from a trace +session has an ID unique for that peer and that trace session only. +This ID is used to match multiple trace files recorded in a same trace +session. + +The _tracecmd_get_guest_cpumap()_ function gets the mapping of guest +virtual CPUs (VCPU) to the host process that represents those VCPUs and is +stored in the metadata of the trace file associated with _handle_. This +information is gathered during a host-guest trace session and is stored +in the host trace file. The _trace_id_ parameter is the trace ID of the guest +in this particular trace session. If a guest with that ID was part of that +session, its VCPU to host process mapping is in the host trace file and the +information is returned in _name_, _vcpu_count_ and _cpu_pid_ parameters. +The _name_ parameter contains the name of the guest, the _vcpu_count_ contains +the count of VCPUs of that guest and the _cpu_pid_ array contains the VCPU to +host process mapping. The array is of size _vcpu_count_ where the index is VCPU +and the value is the process ID (PID) of the host process, running that VCPU. +The _name_, _vcpu_count_ and _cpu_pid_ values must *not* be freed. + +RETURN VALUE +------------ +The _tracecmd_get_traceid()_ function returns a 64 bit trace ID. + +The _tracecmd_get_guest_cpumap()_ function returns -1 in case of +an error or 0 otherwise. If 0 is returned, then the _name_, _vcpu_count_ +and _cpu_pid_ parameters contain the requested information. + +EXAMPLE +------- +[source,c] +-- +#include <trace-cmd.h> +... +struct tracecmd_input *host = tracecmd_open("trace.dat"); + if (!host) { + /* Failed to open host trace file */ + } + +struct tracecmd_input *guest1 = tracecmd_open_head("trace-Guest1.dat"); + if (!guest1) { + /* Failed to open guest1 trace file */ + } +struct tracecmd_input *guest2 = tracecmd_open_head("trace-Guest2.dat"); + if (!guest2) { + /* Failed to open guest2 trace file */ + } + +unsigned long long guest_id_1 = tracecmd_get_traceid(guest1); +unsigned long long guest_id_2 = tracecmd_get_traceid(guest2); +int *cpu_pid_1, *cpu_pid_2; +int vcount_1, vcount_2; +char *name_1, *name_2; + + if (!tracecmd_get_guest_cpumap(host, guest_id_1, &name_1, &vcount_1, &cpu_pid_1)) { + /* The Host and a guest1 with name_1 are part of the same trace session. + * Got guest1 VCPU to host PID mapping. + */ + } + if (!tracecmd_get_guest_cpumap(host, guest_id_2, &name_2, &vcount_2, &cpu_pid_2)) { + /* The Host and a guest2 with name_2 are part of the same trace session. + * Got guest2 VCPU to host PID mapping. + */ + } +... + tracecmd_close(guest1); + tracecmd_close(guest2); + tracecmd_close(handle); + +-- +FILES +----- +[verse] +-- +*trace-cmd.h* + Header file to include in order to have access to the library APIs. +*-ltracecmd* + Linker switch to add when building a program that uses the library. +-- + +SEE ALSO +-------- +_libtracefs(3)_, +_libtraceevent(3)_, +_trace-cmd(1)_ +_trace-cmd.dat(5)_ + +AUTHOR +------ +[verse] +-- +*Steven Rostedt* <rostedt@goodmis.org> +*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com> +-- +REPORTING BUGS +-------------- +Report bugs to <linux-trace-devel@vger.kernel.org> + +LICENSE +------- +libtracecmd is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/libtracecmd/libtracecmd-record.txt b/Documentation/libtracecmd/libtracecmd-record.txt new file mode 100644 index 00000000..aa1a4a66 --- /dev/null +++ b/Documentation/libtracecmd/libtracecmd-record.txt @@ -0,0 +1,138 @@ +libtracecmd(3) +============= + +NAME +---- +tracecmd_read_cpu_first, tracecmd_read_data, tracecmd_read_at, +tracecmd_free_record, tracecmd_get_tep - Read recorded events from a trace file. + +SYNOPSIS +-------- +[verse] +-- +*#include <trace-cmd.h>* + +struct tep_record pass:[*]*tracecmd_read_cpu_first*(struct tracecmd_input pass:[*]_handle_, int _cpu_); +struct tep_record pass:[*]*tracecmd_read_data*(struct tracecmd_input pass:[*]_handle_, int _cpu_); +struct tep_record pass:[*]*tracecmd_read_at*(struct tracecmd_input pass:[*]_handle_, unsigned long long _offset_, int pass:[*]_cpu_); +void *tracecmd_free_record*(struct tep_record pass:[*]_record_); +struct tep_handle pass:[*]*tracecmd_get_tep*(struct tracecmd_input pass:[*]_handle_); +-- + +DESCRIPTION +----------- +This set of APIs can be used to read tracing data from a trace file opened +with _tracecmd_open()(3)_, _tracecmd_open_fd()(3)_ or _tracecmd_open_head()(3)_. + +The _tracecmd_read_cpu_first()_ function reads the first trace record +for a given _cpu_ from a trace file associated with _handle_. The returned +record must be freed with _tracecmd_free_record()_. + +The _tracecmd_read_data()_ function reads the next trace record for +a given _cpu_ from a trace file associated with _handle_ and increments +the read location pointer, so that the next call to _tracecmd_read_data()_ +will not read the same record again. The returned record must be freed +with _tracecmd_free_record()_. + +The _tracecmd_read_at()_ function reads a trace record from a specific +_offset_ within the file associated with _handle_. The CPU on which the +recorded event occurred is stored in the _cpu_. The function does not +change the current read location pointer. The returned record must be +freed with _tracecmd_free_record()_. + +The _tracecmd_free_record()_ function frees a _record_ returned by any +of the _tracecmd_read__ APIs. + +The _tracecmd_get_tep()_ function returns a tep context for a given +_handle_. + +RETURN VALUE +------------ +The _tracecmd_read_cpu_first()_, _tracecmd_read_data()_ and +_tracecmd_read_at()_ functions return a pointer to struct tep_record or +NULL in case of an error.The returned record must be freed with +_tracecmd_free_record()_. + +The _tracecmd_get_tep()_ function returns a pointer to tep context or +NULL if there is no tep context for the given _handle_. The returned +tep pointer must *not* be freed. + +EXAMPLE +------- +[source,c] +-- +#include <trace-cmd.h> +... +struct tracecmd_input *handle = tracecmd_open("trace.dat"); + if (!handle) { + /* Failed to open trace.dat file */ + } +... +unsigned long long offset = 0; +struct tep_record *rec; +int cpu = 0; + rec = tracecmd_read_cpu_first(handle, cpu); + while (rec) { + ... + if ( /* some interesting record noticed */) { + /* store the offset of the interesting record */ + offset = rec->offset; + } + ... + tracecmd_free_record(rec); + rec = tracecmd_read_data(handle, cpu); + } + ... + if (offset) { + rec = tracecmd_read_at(handle, offset, &cpu); + if (rec) { + /* Got record at offset on cpu */ + ... + tracecmd_free_record(rec); + } + } + +... + tracecmd_close(hadle); + +-- +FILES +----- +[verse] +-- +*trace-cmd.h* + Header file to include in order to have access to the library APIs. +*-ltracecmd* + Linker switch to add when building a program that uses the library. +-- + +SEE ALSO +-------- +_libtracefs(3)_, +_libtraceevent(3)_, +_trace-cmd(1)_ +_trace-cmd.dat(5)_ + +AUTHOR +------ +[verse] +-- +*Steven Rostedt* <rostedt@goodmis.org> +*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com> +-- +REPORTING BUGS +-------------- +Report bugs to <linux-trace-devel@vger.kernel.org> + +LICENSE +------- +libtracecmd is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/libtracecmd/libtracecmd.txt b/Documentation/libtracecmd/libtracecmd.txt new file mode 100644 index 00000000..dc528ce0 --- /dev/null +++ b/Documentation/libtracecmd/libtracecmd.txt @@ -0,0 +1,86 @@ +libtracecmd(3) +============= + +NAME +---- +libtracecmd - trace-cmd library APIs + +SYNOPSIS +-------- +[verse] +-- +*#include <trace-cmd.h>* + +Open and close trace file: + struct tracecmd_input pass:[*]*tracecmd_open*(const char pass:[*]_file_, int _flags_); + struct tracecmd_input pass:[*]*tracecmd_open_fd*(int _fd_, int _flags_); + struct tracecmd_input pass:[*]*tracecmd_open_head*(const char pass:[*]_file_, int _flags_); + void *tracecmd_close*(struct tracecmd_input pass:[*]_handle_); + +Read tracing records from a trace file: + int *tracecmd_init_data*(struct tracecmd_input pass:[*]_handle_); + struct tep_record pass:[*]*tracecmd_read_cpu_first*(struct tracecmd_input pass:[*]_handle_, int _cpu_); + struct tep_record pass:[*]*tracecmd_read_data*(struct tracecmd_input pass:[*]_handle_, int _cpu_); + struct tep_record pass:[*]*tracecmd_read_at*(struct tracecmd_input pass:[*]_handle_, unsigned long long _offset_, int pass:[*]_cpu_); + void *tracecmd_free_record*(struct tep_record pass:[*]_record_); + struct tep_handle pass:[*]*tracecmd_get_tep*(struct tracecmd_input pass:[*]_handle_); + +Read tracing instances from a trace file: + int *tracecmd_buffer_instances*(struct tracecmd_input pass:[*]_handle_); + const char pass:[*]*tracecmd_buffer_instance_name*(struct tracecmd_input pass:[*]_handle_, int _indx_); + struct tracecmd_input pass:[*]*tracecmd_buffer_instance_handle*(struct tracecmd_input pass:[*]_handle_, int _indx_); + +Get traceing peer information from a trace file: + unsigned long long *tracecmd_get_traceid*(struct tracecmd_input pass:[*]_handle_); + int *tracecmd_get_guest_cpumap*(struct tracecmd_input pass:[*]_handle_, unsigned long long _trace_id_, const char pass:[*]pass:[*]_name_, int pass:[*]_vcpu_count_, const int pass:[*]pass:[*]_cpu_pid_); + +Control library logs: + int *tracecmd_set_loglevel*(enum tep_loglevel _level_); +-- + +DESCRIPTION +----------- +The libtracecmd(3) library provides APIs to read, parse and write +_trace-cmd.dat(5)_ files, recorded with _trace-cmd(1)_ application and containing +tracing information from ftrace, the official Linux kernel tracer. + +FILES +----- +[verse] +-- +*trace-cmd.h* + Header file to include in order to have access to the library APIs. +*-ltracecmd* + Linker switch to add when building a program that uses the library. +-- + +SEE ALSO +-------- +_libtraceevent(3)_ +_libtracefs(3)_ +_trace-cmd(1)_ +_trace-cmd.dat(5)_ + +AUTHOR +------ +[verse] +-- +*Steven Rostedt* <rostedt@goodmis.org> +*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com> +-- +REPORTING BUGS +-------------- +Report bugs to <linux-trace-devel@vger.kernel.org> + +LICENSE +------- +libtracecmd is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/manpage-1.72.xsl b/Documentation/manpage-1.72.xsl new file mode 100644 index 00000000..b4d315cb --- /dev/null +++ b/Documentation/manpage-1.72.xsl @@ -0,0 +1,14 @@ +<!-- manpage-1.72.xsl: + special settings for manpages rendered from asciidoc+docbook + handles peculiarities in docbook-xsl 1.72.0 --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<xsl:import href="manpage-base.xsl"/> + +<!-- these are the special values for the roff control characters + needed for docbook-xsl 1.72.0 --> +<xsl:param name="git.docbook.backslash">▓</xsl:param> +<xsl:param name="git.docbook.dot" >⌂</xsl:param> + +</xsl:stylesheet> diff --git a/Documentation/manpage-base.xsl b/Documentation/manpage-base.xsl new file mode 100644 index 00000000..a264fa61 --- /dev/null +++ b/Documentation/manpage-base.xsl @@ -0,0 +1,35 @@ +<!-- manpage-base.xsl: + special formatting for manpages rendered from asciidoc+docbook --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<!-- these params silence some output from xmlto --> +<xsl:param name="man.output.quietly" select="1"/> +<xsl:param name="refentry.meta.get.quietly" select="1"/> + +<!-- convert asciidoc callouts to man page format; + git.docbook.backslash and git.docbook.dot params + must be supplied by another XSL file or other means --> +<xsl:template match="co"> + <xsl:value-of select="concat( + $git.docbook.backslash,'fB(', + substring-after(@id,'-'),')', + $git.docbook.backslash,'fR')"/> +</xsl:template> +<xsl:template match="calloutlist"> + <xsl:value-of select="$git.docbook.dot"/> + <xsl:text>sp </xsl:text> + <xsl:apply-templates/> + <xsl:text> </xsl:text> +</xsl:template> +<xsl:template match="callout"> + <xsl:value-of select="concat( + $git.docbook.backslash,'fB', + substring-after(@arearefs,'-'), + '. ',$git.docbook.backslash,'fR')"/> + <xsl:apply-templates/> + <xsl:value-of select="$git.docbook.dot"/> + <xsl:text>br </xsl:text> +</xsl:template> + +</xsl:stylesheet> diff --git a/Documentation/manpage-bold-literal.xsl b/Documentation/manpage-bold-literal.xsl new file mode 100644 index 00000000..608eb5df --- /dev/null +++ b/Documentation/manpage-bold-literal.xsl @@ -0,0 +1,17 @@ +<!-- manpage-bold-literal.xsl: + special formatting for manpages rendered from asciidoc+docbook --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<!-- render literal text as bold (instead of plain or monospace); + this makes literal text easier to distinguish in manpages + viewed on a tty --> +<xsl:template match="literal"> + <xsl:value-of select="$git.docbook.backslash"/> + <xsl:text>fB</xsl:text> + <xsl:apply-templates/> + <xsl:value-of select="$git.docbook.backslash"/> + <xsl:text>fR</xsl:text> +</xsl:template> + +</xsl:stylesheet> diff --git a/Documentation/manpage-normal.xsl b/Documentation/manpage-normal.xsl new file mode 100644 index 00000000..a48f5b11 --- /dev/null +++ b/Documentation/manpage-normal.xsl @@ -0,0 +1,13 @@ +<!-- manpage-normal.xsl: + special settings for manpages rendered from asciidoc+docbook + handles anything we want to keep away from docbook-xsl 1.72.0 --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<xsl:import href="manpage-base.xsl"/> + +<!-- these are the normal values for the roff control characters --> +<xsl:param name="git.docbook.backslash">\</xsl:param> +<xsl:param name="git.docbook.dot" >.</xsl:param> + +</xsl:stylesheet> diff --git a/Documentation/manpage-suppress-sp.xsl b/Documentation/manpage-suppress-sp.xsl new file mode 100644 index 00000000..a63c7632 --- /dev/null +++ b/Documentation/manpage-suppress-sp.xsl @@ -0,0 +1,21 @@ +<!-- manpage-suppress-sp.xsl: + special settings for manpages rendered from asciidoc+docbook + handles erroneous, inline .sp in manpage output of some + versions of docbook-xsl --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<!-- attempt to work around spurious .sp at the tail of the line + that some versions of docbook stylesheets seem to add --> +<xsl:template match="simpara"> + <xsl:variable name="content"> + <xsl:apply-templates/> + </xsl:variable> + <xsl:value-of select="normalize-space($content)"/> + <xsl:if test="not(ancestor::authorblurb) and + not(ancestor::personblurb)"> + <xsl:text> </xsl:text> + </xsl:if> +</xsl:template> + +</xsl:stylesheet> diff --git a/Documentation/trace-cmd/Makefile b/Documentation/trace-cmd/Makefile new file mode 100644 index 00000000..1568af53 --- /dev/null +++ b/Documentation/trace-cmd/Makefile @@ -0,0 +1,132 @@ +# SPDX-License-Identifier: GPL-2.0 + +# Include the utils +include $(src)/scripts/utils.mk + +# This Makefile and manpage XSL files were taken from libtracefs +# and modified for libtracecmd + +MAN1_TXT= \ + $(wildcard trace-cmd*.1.txt) + +MAN5_TXT= \ + $(wildcard trace-cmd*.5.txt) + +MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) +_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) +_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) +_DOC_MAN1=$(patsubst %.1.txt,%.1,$(MAN1_TXT)) +_DOC_MAN5=$(patsubst %.5.txt,%.5,$(MAN5_TXT)) + +MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML)) +MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML)) +DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1)) +DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5)) + +# Make the path relative to DESTDIR, not prefix +ifndef DESTDIR +prefix?=$(HOME) +endif +bindir?=$(prefix)/bin +htmldir?=$(prefix)/share/doc/trace-cmd +pdfdir?=$(prefix)/share/doc/trace-cmd +mandir?=$(prefix)/share/man +man1dir=$(mandir)/man1 +man5dir=$(mandir)/man5 + +ifdef USE_ASCIIDOCTOR +ASCIIDOC_EXTRA += -a mansource="libtracecmd" -a manmanual="libtracecmd Manual" +endif + +all: check-man-tools html man + +man: man1 man5 +man1: $(DOC_MAN1) +man5: $(DOC_MAN5) + +html: $(MAN_HTML) + +$(MAN_HTML) $(DOC_MAN1) $(DOC_MAN5): $(ASCIIDOC_CONF) + +install: check-man-tools install-man install-html + +check-man-tools: +ifdef missing_tools + $(error "You need to install $(missing_tools) for man pages") +endif + +install-%.1: $(OUTPUT)%.1 + $(Q)$(call do_install_docs,$<,$(man1dir),644); + +install-%.5: $(OUTPUT)%.5 + $(Q)$(call do_install_docs,$<,$(man5dir),644); + +do-install-man: man $(addprefix install-,$(wildcard $(OUTPUT)*.1)) \ + $(addprefix install-,$(wildcard $(OUTPUT)*.5)) + +install-man: man + $(Q)$(MAKE) -C . do-install-man + +install-%.txt: $(OUTPUT)%.html + $(Q)$(call do_install_docs,$<,$(htmldir),644); + +do-install-html: html $(addprefix install-,$(wildcard *.txt)) + +install-html: html do-install-html + +uninstall: uninstall-man uninstall-html + +uninstall-man: + $(Q)$(RM) $(addprefix $(DESTDIR)$(man1dir)/,$(DOC_MAN1)) + $(Q)$(RM) $(addprefix $(DESTDIR)$(man5dir)/,$(DOC_MAN5)) + +uninstall-html: + $(Q)$(RM) $(addprefix $(DESTDIR)$(htmldir)/,$(MAN_HTML)) + +ifdef missing_tools + DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed) +else + DO_INSTALL_MAN = do-install-man +endif + +CLEAN_FILES = \ + $(MAN_XML) $(addsuffix +,$(MAN_XML)) \ + $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \ + $(DOC_MAN1) $(DOC_MAN5) *.1 *.5 + +clean: + $(Q) $(RM) $(CLEAN_FILES) + +ifdef USE_ASCIIDOCTOR +$(OUTPUT)%.1 : $(OUTPUT)%.1.txt + $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + $(ASCIIDOC) -b manpage -d manpage \ + $(ASCIIDOC_EXTRA) -atracecmd_version=$(TRACECMD_VERSION) -o $@+ $< && \ + mv $@+ $@ + +$(OUTPUT)%.5 : $(OUTPUT)%.5.txt + $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + $(ASCIIDOC) -b manpage -d manpage \ + $(ASCIIDOC_EXTRA) -atracecmd_version=$(TRACECMD_VERSION) -o $@+ $< && \ + mv $@+ $@ +endif + +$(OUTPUT)%.1 : $(OUTPUT)%.1.xml + $(QUIET_XMLTO)$(RM) $@ && \ + $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<; \ + +$(OUTPUT)%.5 : $(OUTPUT)%.5.xml + $(QUIET_XMLTO)$(RM) $@ && \ + $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<; \ + +$(OUTPUT)%.xml : %.txt + $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + $(ASCIIDOC) -b docbook -d manpage \ + $(ASCIIDOC_EXTRA) -atracecmd_version=$(TRACECMD_VERSION) -o $@+ $< && \ + mv $@+ $@ + +$(MAN_HTML): $(OUTPUT)%.html : %.txt + $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \ + $(ASCIIDOC_EXTRA) -atracecmd_version=$(TRACECMD_VERSION) -o $@+ $< && \ + mv $@+ $@ diff --git a/Documentation/trace-cmd/trace-cmd-agent.1.txt b/Documentation/trace-cmd/trace-cmd-agent.1.txt new file mode 100644 index 00000000..f247d41d --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-agent.1.txt @@ -0,0 +1,62 @@ +TRACE-CMD-AGENT(1) +================== + +NAME +---- +trace-cmd-agent - Run as an agent on a machine (to be controlled by another machine) + +SYNOPSIS +-------- +*trace-cmd agent* ['OPTIONS'] + +DESCRIPTION +----------- +The trace-cmd(1) agent listens over a vsocket (for virtual machines) or a TCP port +for connections to control the tracing of the machine. The agent will then start +tracing on the local machine and pass the data to the controlling connection. + +OPTIONS +------- +*-N* 'client':: + Listen over TCP instead of a vsocket. Must pass in a client host name or IP address + to allow connection to. It will only connect to the specified client. Note, any process + on that client can control the agent. + + *This is a very insecure setting. Only use on a trusted network* + *Only use if the client machine is totally trusted* + +*-p* 'port':: + This option will specify the port to listen to. + +*-D*:: + This options causes trace-cmd agent to go into a daemon mode and run in + the background. + +*--verbose*[='level']:: + Set the log level. Supported log levels are "none", "critical", "error", "warning", + "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log + level to specific value enables all logs from that and all previous levels. + The level will default to "info" if one is not specified. + + Example: enable all critical, error and warning logs + + trace-cmd listen --verbose=warning + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), +trace-cmd-split(1), trace-cmd-list(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/trace-cmd/trace-cmd-check-events.1.txt b/Documentation/trace-cmd/trace-cmd-check-events.1.txt new file mode 100644 index 00000000..debab6c5 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-check-events.1.txt @@ -0,0 +1,55 @@ +TRACE-CMD-CHECK_EVENTS(1) +========================= + +NAME +---- +trace-cmd-check-events - parse the event formats on local system + +SYNOPSIS +-------- +*trace-cmd check-events* ['OPTIONS'] + +DESCRIPTION +----------- +The trace-cmd(1) check-events parses format strings for all the events on the +local system. It returns whether all the format strings can be parsed +correctly. It will load plugins unless specified otherwise. + +This is useful to check for any trace event format strings which may contain +some internal kernel function references which cannot be decoded outside of +the kernel. This may mean that either the unparsed format strings of the trace +events need to be changed or that a plugin needs to be created to parse them. + +OPTIONS +------- +*-N* - Don't load plugins + +*--verbose*[='level']:: + Set the log level. Supported log levels are "none", "critical", "error", "warning", + "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log + level to specific value enables all logs from that and all previous levels. + The level will default to "info" if one is not specified. + + Example: enable all critical, error and warning logs + + trace-cmd check-events --verbose=warning + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-stop(1), +trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-start(1) + +AUTHOR +------ +Written by Vaibhav Nagarnaik, <vnagarnaik@google.com> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2011 Google, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-clear.1.txt b/Documentation/trace-cmd/trace-cmd-clear.1.txt new file mode 100644 index 00000000..74236960 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-clear.1.txt @@ -0,0 +1,51 @@ +TRACE-CMD-CLEAR(1) +================= + +NAME +---- +trace-cmd-clear - clear the Ftrace buffer. + +SYNOPSIS +-------- +*trace-cmd clear* ['OPTIONS'] + +DESCRIPTION +----------- +The *trace-cmd(1) clear* clears the content of the Ftrace ring buffer. + +OPTIONS +------- +*-B* 'buffer-name':: + If the kernel supports multiple buffers, this will clear only the given + buffer. It does not affect any other buffers. This may be used multiple + times to specify different buffers. The top level buffer will not be + clearded if this option is given. + +*-a*:: + Clear all existing buffers, including the top level one. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +[verse] +-- +*Steven Rostedt* <rostedt@goodmis.org>, author of *trace-cmd*. +*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. +-- +REPORTING BUGS +-------------- +Report bugs to <linux-trace-devel@vger.kernel.org> + +LICENSE +------- +trace-cmd is Free Software licensed under the terms of the +GNU Public License (GPL). + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
\ No newline at end of file diff --git a/Documentation/trace-cmd/trace-cmd-convert.1.txt b/Documentation/trace-cmd/trace-cmd-convert.1.txt new file mode 100644 index 00000000..7c13cf3d --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-convert.1.txt @@ -0,0 +1,65 @@ +TRACE-CMD-CONVERT(1) +=================== + +NAME +---- +trace-cmd-convert - convert trace files + +SYNOPSIS +-------- +*trace-cmd convert* ['OPTIONS'] ['output-file'] + +DESCRIPTION +----------- +The trace-cmd(1) convert command converts trace file. It reads the input file and copies the data +into an output file. The output file may be in different format, depending on the command line +arguments. + +OPTIONS +------- +*-i* 'input-file':: + By default, trace-cmd convert will read the file 'trace.dat'. But the *-i* + option open up the given 'input-file' instead. + +*-o* 'out-file':: + The name of the output file, this parameter is mandatory. Note, the output file may also be + specified as the last item on the command line. + +*--file-version*:: + Desired version of the output file. Supported versions are 6 or 7. + +*--compression*:: + Compression of the trace output file, one of these strings can be passed: + + 'any' - auto select the best available compression algorithm + + 'none' - do not compress the trace file + + 'name' - the name of the desired compression algorithms. Available algorithms can be listed with + trace-cmd list -c + +*--help*:: + Print usage information. + +EXAMPLES +-------- + +# trace-cmd convert --compression any trace_compress.dat + +SEE ALSO +-------- +trace-cmd(1), trace-cmd.dat(1) + +AUTHOR +------ +*Steven Rostedt* <rostedt@goodmis.org>, author of *trace-cmd*. +*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2021 VMware. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/trace-cmd/trace-cmd-dump.1.txt b/Documentation/trace-cmd/trace-cmd-dump.1.txt new file mode 100644 index 00000000..9c95244b --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-dump.1.txt @@ -0,0 +1,142 @@ +TRACE-CMD-DUMP(1) +=================== + +NAME +---- +trace-cmd-dump - show a meta data from a trace file, created by trace-cmd record + +SYNOPSIS +-------- +*trace-cmd dump* ['OPTIONS'] ['input-file'] + +DESCRIPTION +----------- +The trace-cmd(1) dump command will display the meta data from a trace file +created by trace-cmd record. + +OPTIONS +------- +*-i* 'input-file':: + By default, trace-cmd dump will read the file 'trace.dat'. But the *-i* + option open up the given 'input-file' instead. Note, the input file may + also be specified as the last item on the command line. +*-v*, *--validate*:: + Check if the input file is a valid trace file, created by trace-cmd. +*--summary*:: + Print a meta data summary - initial format and a short description of each + file section. This is the default action, if no arguments are specified. +*--head-page*:: + Print the header page information, stored in the file. +*--head-event*:: + Print the event header information, stored in the file. +*--ftrace-events*:: + Print formats of ftrace specific events. +*--systems*:: + Print information of event systems, stored in the file - name and number of + events for each system. +*--events*:: + Print formats of all events, stored in the file. +*--kallsyms*:: + Print information of the mapping of function addresses to the function names. +*--printk*:: + Print trace_printk() format strings, stored in the file. +*--cmd-lines*:: + Print mapping a PID to a process name. +*--options*:: + Print all options, stored in the file. +*--flyrecord*:: + Print the offset and the size of tracing data per each CPU. +*--clock*:: + Print the trace clock, used for timestamp of the tracing events, stored in the file. +*--all*:: + Print all meta data from the file. +*--help*:: + Print usage information. +*--verbose*[='level']:: + Set the log level. Supported log levels are "none", "critical", "error", "warning", + "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log + level to specific value enables all logs from that and all previous levels. + The level will default to "info" if one is not specified. + + Example: enable all critical, error and warning logs + + trace-cmd report --verbose=warning + +EXAMPLES +-------- + +# trace-cmd dump --summary -i trace.dat + + Tracing meta data in file trace.dat: + [Initial format] + 6 [Version] + 0 [Little endian] + 8 [Bytes in a long] + 4096 [Page size, bytes] + [Header info, 205 bytes] + [Header event, 205 bytes] + [Ftrace format, 15 events] + [Events format, 2 systems] + [Kallsyms, 7144493 bytes] + [Trace printk, 2131 bytes] + [Saved command lines, 117 bytes] + 8 [CPUs with tracing data] + [12 options] + [Flyrecord tracing data] +------------------------------------------ + +# trace-cmd dump --flyrecord -i trace.dat + [Flyrecord tracing data] + 7176192 0 [offset, size of cpu 0] + 7176192 0 [offset, size of cpu 1] + 7176192 0 [offset, size of cpu 2] + 7176192 4096 [offset, size of cpu 3] + 7180288 4096 [offset, size of cpu 4] + 7184384 0 [offset, size of cpu 5] + 7184384 0 [offset, size of cpu 6] + 7184384 0 [offset, size of cpu 7] +------------------------------------------ + +# trace-cmd dump --summary --systems -i trace.dat + + Tracing meta data in file trace.dat: + [Initial format] + 6 [Version] + 0 [Little endian] + 8 [Bytes in a long] + 4096 [Page size, bytes] + [Header info, 205 bytes] + [Header event, 205 bytes] + [Ftrace format, 15 events] + [Events format, 3 systems] + sched 23 [system, events] + irq 5 [system, events] + kvm 70 [system, events] + [Kallsyms, 7144493 bytes] + [Trace printk, 2131 bytes] + [Saved command lines, 157 bytes] + 8 [CPUs with tracing data] + [11 options] + [Flyrecord tracing data] +------------------------------------------ + +# trace-cmd dump --summary --systems -i trace.dat +File trace.dat is a valid trace-cmd file +------------------------------------------ +SEE ALSO +-------- +trace-cmd(1), trace-cmd.dat(1) + +AUTHOR +------ +*Steven Rostedt* <rostedt@goodmis.org>, author of *trace-cmd*. +*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/trace-cmd/trace-cmd-extract.1.txt b/Documentation/trace-cmd/trace-cmd-extract.1.txt new file mode 100644 index 00000000..776da6e1 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-extract.1.txt @@ -0,0 +1,98 @@ +TRACE-CMD-EXTRACT(1) +==================== + +NAME +---- +trace-cmd-extract - extract out the data from the Ftrace Linux tracer. + +SYNOPSIS +-------- +*trace-cmd extract ['OPTIONS']* + +DESCRIPTION +----------- +The trace-cmd(1) extract is usually used after 'trace-cmd-start(1)' and +'trace-cmd-stop(1)'. It can be used after the Ftrace tracer has been started +manually through the Ftrace pseudo file system. + +The extract command creates a trace.dat file that can be used by +'trace-cmd-report(1)' to read from. It reads the kernel internal ring buffer +to produce the trace.dat file. + +OPTIONS +------- +*-p* 'plugin':: + Although *extract* does not start any traces, some of the plugins require + just reading the output in ASCII format. These are the latency tracers, + since the latency tracers have a separate internal buffer. The plugin + option is therefore only necessary for the 'wakeup', 'wakeup-rt', + 'irqsoff', 'preemptoff' and 'preemptirqsoff' plugins. + + With out this option, the extract command will extract from the internal + Ftrace buffers. + +*-O* 'option':: + If a latency tracer is being extracted, and the *-p* option is used, then + there are some Ftrace options that can change the format. This will update + those options before extracting. To see the list of options see + 'trace-cmd-list'. To enable an option, write its name, to disable the + option append the characters 'no' to it. For example: 'noprint-parent' + will disable the 'print-parent' option that prints the parent function in + printing a function event. + +*-o* 'outputfile':: + By default, the extract command will create a 'trace.dat' file. This + option will change where the file is written to. + +*-s*:: + Extract from the snapshot buffer (if the kernel supports it). + +*--date*:: + This is the same as the trace-cmd-record(1) --date option, but it + does cause the extract routine to disable all tracing. That is, + the end of the extract will perform something similar to trace-cmd-reset(1). + +*-B* 'buffer-name':: + If the kernel supports multiple buffers, this will extract the trace for + only the given buffer. It does not affect any other buffer. This may be + used multiple times to specify different buffers. When this option is + used, the top level instance will not be extracted unless *-t* is given. + +*-a*:: + Extract all existing buffer instances. When this option is used, the + top level instance will not be extracted unless *-t* is given. + +*-t*:: + Extracts the top level instance buffer. Without the *-B* or *-a* option + this is the same as the default. But if *-B* or *-a* is used, this is + required if the top level instance buffer should also be extracted. + +*--verbose*[='level']:: + Set the log level. Supported log levels are "none", "critical", "error", "warning", + "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log + level to specific value enables all logs from that and all previous levels. + The level will default to "info" if one is not specified. + + Example: enable all critical, error and warning logs + + trace-cmd extract --verbose=warning + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-hist.1.txt b/Documentation/trace-cmd/trace-cmd-hist.1.txt new file mode 100644 index 00000000..169f8d7b --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-hist.1.txt @@ -0,0 +1,50 @@ +TRACE-CMD-HIST(1) +================= + +NAME +---- +trace-cmd-hist - show histogram of events in trace.dat file + +SYNOPSIS +-------- +*trace-cmd hist* ['OPTIONS']['input-file'] + +DESCRIPTION +----------- +The trace-cmd(1) hist displays a histogram form from the trace.dat file. +Instead of showing the events as they were ordered, it creates a histogram +that can be displayed per task or for all tasks where the most common +events appear first. It uses the function tracer and call stacks that it +finds to try to put together a call graph of the events. + +OPTIONS +------- +*-i* 'input-file':: + By default, trace-cmd hist will read the file 'trace.dat'. But the *-i* + option open up the given 'input-file' instead. Note, the input file may + also be specified as the last item on the command line. + +*-P*:: + To compact all events and show the call graphs by ignoring tasks + and different PIDs, add the *-P* to do so. Instead of showing the + task name, it will group all chains together and show "<all pids>". + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), +trace-cmd-split(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-list.1.txt b/Documentation/trace-cmd/trace-cmd-list.1.txt new file mode 100644 index 00000000..b77e3460 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-list.1.txt @@ -0,0 +1,95 @@ +TRACE-CMD-LIST(1) +================= + +NAME +---- +trace-cmd-list - list available plugins, events or options for Ftrace. + +SYNOPSIS +-------- +*trace-cmd list* ['OPTIONS'] + +DESCRIPTION +----------- +The trace-cmd(1) list displays the available plugins, events or Ftrace options +that are configured on the current machine. If no option is given, then it +lists all plugins, event systems, events and Ftrace options to standard output. + +OPTIONS +------- +*-e* ['regex']:: + This option will list the available events that are enabled on the + local system. + + It takes an optional argument that uses 'regcomp(3)' expressions to seach. + + trace-cmd list -e '^sys.*' + +*-F*:: + Used with *-e* 'regex' to show the fields of the event. + +*--full*:: + Used with *-F* which will show the "print fmt" of the event along with the fields. + +*-l*:: + Used with *-e* 'regex' to show those events filters. + +*-R*:: + Used with *-e* 'regex' to show those events triggers. + +*-s*:: + This option will list the available event systems. + +*-t*:: + This option will list the available tracers that are enabled on the + local system. + +*-p*:: + Same as *-t* and only for legacy purposes. + +*-o*:: + This option will list the available Ftrace options that are configured on + the local system. + +*-f* ['regex']:: + This option will list the available filter functions. These are the list of + functions on the system that you can trace, or filter on. + It takes an optional argument that uses 'regcomp(3)' expressions to seach. + + trace-cmd list -f '^sched.*' + +*-P*:: + List the plugin files that get loaded on trace-cmd report. + +*-O*:: + List plugin options that can be used by trace-cmd report *-O* option. + +*-B*:: + List defined buffer instances (sub buffers). + +*-C*:: + List defined clocks that can be used with trace-cmd record -C. + The one in brackets ([]) is the active clock. + +*-c*:: + List the available trace file compression algorithms. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), +trace-cmd-split(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-listen.1.txt b/Documentation/trace-cmd/trace-cmd-listen.1.txt new file mode 100644 index 00000000..7c6093ba --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-listen.1.txt @@ -0,0 +1,71 @@ +TRACE-CMD-LISTEN(1) +=================== + +NAME +---- +trace-cmd-listen - listen for incoming connection to record tracing. + +SYNOPSIS +-------- +*trace-cmd listen* -p 'port' ['OPTIONS'] + +DESCRIPTION +----------- +The trace-cmd(1) listen sets up a port to listen to waiting for connections +from other hosts that run 'trace-cmd-record(1)' with the *-N* option. When a +connection is made, and the remote host sends data, it will create a file +called 'trace.HOST:PORT.dat'. Where HOST is the name of the remote host, and +PORT is the port that the remote host used to connect with. + +OPTIONS +------- +*-p* 'port':: + This option will specify the port to listen to. + +*-D*:: + This options causes trace-cmd listen to go into a daemon mode and run in + the background. + +*-V*:: + Listen on a vsocket instead. This is useful for tracing between host and + guest VMs. + +*-d* 'dir':: + This option specifies a directory to write the data files into. + +*-o* 'filename':: + This option overrides the default 'trace' in the 'trace.HOST:PORT.dat' that + is created when a remote host connects. + +*-l* 'filename':: + This option writes the output messages to a log file instead of standard output. + +*--verbose*[='level']:: + Set the log level. Supported log levels are "none", "critical", "error", "warning", + "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log + level to specific value enables all logs from that and all previous levels. + The level will default to "info" if one is not specified. + + Example: enable all critical, error and warning logs + + trace-cmd listen --verbose=warning + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), +trace-cmd-split(1), trace-cmd-list(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-mem.1.txt b/Documentation/trace-cmd/trace-cmd-mem.1.txt new file mode 100644 index 00000000..90e430b8 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-mem.1.txt @@ -0,0 +1,74 @@ +TRACE-CMD-MEM(1) +================ + +NAME +---- +trace-cmd-mem - show memory usage of certain kmem events + +SYNOPSIS +-------- +*trace-cmd mem* ['OPTIONS']['input-file'] + +DESCRIPTION +----------- +The trace-cmd(1) mem requires a trace-cmd record that enabled the following +events: + + kmalloc + kmalloc_node + kfree + kmem_cache_alloc + kmem_cache_alloc_node + kmem_cache_alloc_free + +It then reads the amount requested and the ammount freed as well as the +functions that called the allocation. It then reports the final amount +of bytes requested and allocated, along with the total amount allocated +and requested, as well as the max allocation and requested during the run. +It reports the amount of wasted bytes (allocated - requested) that was +not freed, as well as the max wasted amount during the run. The list is +sorted by descending order of wasted bytes after the run. + + Function Waste Alloc req TotAlloc TotReq MaxAlloc MaxReq MaxWaste + -------- ----- ----- --- -------- ------ -------- ------ -------- + rb_allocate_cpu_buffer 768 2304 1536 2304 1536 2304 1536 768 + alloc_pipe_info 400 1152 752 1152 752 1152 752 400 + instance_mkdir 252 544 292 544 292 544 292 252 + __d_alloc 215 1086560 1086345 1087208 1086993 1086560 1086345 215 + get_empty_filp 72 2304 2232 4864 4712 4864 4712 152 + mm_alloc 40 960 920 960 920 960 920 40 + prepare_creds 32 192 160 1728 1440 1728 1440 288 + tracing_buffers_open 8 32 24 32 24 32 24 8 + do_brk 0 0 0 368 368 368 368 0 + journal_add_journal_head 0 6048 6048 6048 6048 6048 6048 0 + journal_start 0 0 0 1224 1224 48 48 0 + __rb_allocate_pages 0 3289856 3289856 3289856 3289856 3289856 3289856 0 + anon_vma_alloc 0 0 0 936 936 864 864 0 + [...] + +OPTIONS +------- +*-i* 'input-file':: + By default, trace-cmd hist will read the file 'trace.dat'. But the *-i* + option open up the given 'input-file' instead. Note, the input file may + also be specified as the last item on the command line. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-hist(1), +trace-cmd-split(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2013 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-options.1.txt b/Documentation/trace-cmd/trace-cmd-options.1.txt new file mode 100644 index 00000000..bcdf0533 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-options.1.txt @@ -0,0 +1,35 @@ +TRACE-CMD-OPTIONS(1) +==================== + +NAME +---- +trace-cmd-options - list available options from trace-cmd plugins + +SYNOPSIS +-------- +*trace-cmd options* + +DESCRIPTION +----------- +The trace-cmd(1) options command will examine all the trace-cmd plugins +that are used by *trace-cmd report(1)* and list them. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-start(1), trace-cmd-stop(1), +trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2011 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-profile.1.txt b/Documentation/trace-cmd/trace-cmd-profile.1.txt new file mode 100644 index 00000000..078ae9e0 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-profile.1.txt @@ -0,0 +1,686 @@ +TRACE-CMD-PROFILE(1) +==================== + +NAME +---- +trace-cmd-profile - profile tasks running live + +SYNOPSIS +-------- +*trace-cmd profile ['OPTIONS']* ['command'] + +DESCRIPTION +----------- +The trace-cmd(1) profile will start tracing just like trace-cmd-record(1), +with the *--profile* option, except that it does not write to a file, +but instead, it will read the events as they happen and will update the accounting +of the events. When the trace is finished, it will report the results just like +trace-cmd-report(1) would do with its *--profile* option. In other words, +the profile command does the work of trace-cmd record --profile, and trace-cmd +report --profile without having to record the data to disk, in between. + +The advantage of using the profile command is that the profiling can be done +over a long period of time where recording all events would take up too much +disk space. + +This will enable several events as well as the function graph tracer +with a depth of one (if the kernel supports it). This is to show where +tasks enter and exit the kernel and how long they were in the kernel. + +To disable calling function graph, use the *-p* option to enable another +tracer. To not enable any tracer, use *-p nop*. + +All timings are currently in nanoseconds. + +OPTIONS +------- +These are the same as trace-cmd-record(1) with the *--profile* option. + +*-p* 'tracer':: + Set a tracer plugin to run instead of function graph tracing set to + depth of 1. To not run any tracer, use *-p nop*. + +*-S*:: + Only enable the tracer or events speficied on the command line. + With this option, the function_graph tracer is not enabled, nor are + any events (like sched_switch), unless they are specifically specified + on the command line (i.e. -p function -e sched_switch -e sched_wakeup) + +*-G*:: + Set interrupt (soft and hard) events as global (associated to CPU + instead of tasks). + +*-o* 'file':: + Write the output of the profile to 'file'. This supersedes *--stderr* + +*-H* 'event-hooks':: + Add custom event matching to connect any two events together. Format is: + [<start_system>:]<start_event>,<start_match>[,<start_pid>]/ + [<end_system>:]<end_event>,<end_match>[,<flags>] + + The start_system:start_event (start_system is optional), is the event that + starts the timing. + + start_match is the field in the start event that is to match with the + end_match in the end event. + + start_pid is optional, as matches are attached to the tasks that run + the events, if another field should be used to find that task, then + it is specified with start_pid. + + end_system:end_event is the event that ends the timing (end_system is + optional). + + end_match is the field in end_match that wil match the start event field + start_match. + + flags are optional and can be the following (case insensitive): + + p : The two events are pinned to the same CPU (start and end happen + on the same CPU always). + + s : The event should have a stack traced with it (enable stack tracing + for the start event). + + g : The event is global (not associated to a task). start_pid is + not applicable with this flag. + +*--stderr*:: + Redirect the output to stderr. The output of the command being executed + is not changed. This allows watching the command execute and saving the + output of the profile to another file. + +*--verbose*[='level']:: + Set the log level. Supported log levels are "none", "critical", "error", "warning", + "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log + level to specific value enables all logs from that and all previous levels. + The level will default to "info" if one is not specified. + + Example: enable all critical, error and warning logs + + trace-cmd profile --verbose=warning + +EXAMPLES +-------- + + --- +# trace-cmd profile -F sleep 1 + [..] +task: sleep-1121 + Event: sched_switch:R (2) Total: 234559 Avg: 117279 Max: 129886 Min:104673 + | + + ftrace_raw_event_sched_switch (0xffffffff8109f310) + 100% (2) time:234559 max:129886 min:104673 avg:117279 + __schedule (0xffffffff816c1e81) + preempt_schedule (0xffffffff816c236e) + ___preempt_schedule (0xffffffff81351a59) + | + + unmap_single_vma (0xffffffff81198c05) + | 55% (1) time:129886 max:129886 min:0 avg:129886 + | stop_one_cpu (0xffffffff8110909a) + | sched_exec (0xffffffff810a119b) + | do_execveat_common.isra.31 (0xffffffff811de528) + | do_execve (0xffffffff811dea8c) + | SyS_execve (0xffffffff811ded1e) + | return_to_handler (0xffffffff816c8458) + | stub_execve (0xffffffff816c6929) + | + + unmap_single_vma (0xffffffff81198c05) + 45% (1) time:104673 max:104673 min:0 avg:104673 + unmap_vmas (0xffffffff81199174) + exit_mmap (0xffffffff811a1f5b) + mmput (0xffffffff8107699a) + flush_old_exec (0xffffffff811ddb75) + load_elf_binary (0xffffffff812287df) + search_binary_handler (0xffffffff811dd3e0) + do_execveat_common.isra.31 (0xffffffff811de8bd) + do_execve (0xffffffff811dea8c) + SyS_execve (0xffffffff811ded1e) + return_to_handler (0xffffffff816c8458) + stub_execve (0xffffffff816c6929) + + + + + Event: sched_switch:S (1) Total: 1000513242 Avg: 1000513242 Max: 1000513242 Min:1000513242 + | + + ftrace_raw_event_sched_switch (0xffffffff8109f310) + 100% (1) time:1000513242 max:1000513242 min:0 avg:1000513242 + __schedule (0xffffffff816c1e81) + schedule (0xffffffff816c23b9) + do_nanosleep (0xffffffff816c4f1c) + hrtimer_nanosleep (0xffffffff810dcd86) + SyS_nanosleep (0xffffffff810dcea6) + return_to_handler (0xffffffff816c8458) + tracesys_phase2 (0xffffffff816c65b0) + + + + Event: sched_wakeup:1121 (1) Total: 43405 Avg: 43405 Max: 43405 Min:43405 + | + + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) + 100% (1) time:43405 max:43405 min:0 avg:43405 + ttwu_do_wakeup (0xffffffff810a01a2) + ttwu_do_activate.constprop.122 (0xffffffff810a0236) + try_to_wake_up (0xffffffff810a3ec3) + wake_up_process (0xffffffff810a4057) + hrtimer_wakeup (0xffffffff810db772) + __run_hrtimer (0xffffffff810dbd91) + hrtimer_interrupt (0xffffffff810dc6b7) + local_apic_timer_interrupt (0xffffffff810363e7) + smp_trace_apic_timer_interrupt (0xffffffff816c8c6a) + trace_apic_timer_interrupt (0xffffffff816c725a) + finish_task_switch (0xffffffff8109c3a4) + __schedule (0xffffffff816c1e01) + schedule (0xffffffff816c23b9) + ring_buffer_wait (0xffffffff811323a3) + wait_on_pipe (0xffffffff81133d93) + tracing_buffers_splice_read (0xffffffff811350b0) + do_splice_to (0xffffffff8120476f) + SyS_splice (0xffffffff81206c1f) + tracesys_phase2 (0xffffffff816c65b0) + + + Event: func: sys_nanosleep() (1) Total: 1000598016 Avg: 1000598016 Max: 1000598016 Min:1000598016 + Event: func: sys_munmap() (1) Total: 14300 Avg: 14300 Max: 14300 Min:14300 + Event: func: sys_arch_prctl() (1) Total: 571 Avg: 571 Max: 571 Min:571 + Event: func: sys_mprotect() (4) Total: 14382 Avg: 3595 Max: 7196 Min:2190 + Event: func: SyS_read() (1) Total: 2640 Avg: 2640 Max: 2640 Min:2640 + Event: func: sys_close() (5) Total: 4001 Avg: 800 Max: 1252 Min:414 + Event: func: sys_newfstat() (3) Total: 11684 Avg: 3894 Max: 10206 Min:636 + Event: func: SyS_open() (3) Total: 23615 Avg: 7871 Max: 10535 Min:4743 + Event: func: sys_access() (1) Total: 5924 Avg: 5924 Max: 5924 Min:5924 + Event: func: SyS_mmap() (8) Total: 39153 Avg: 4894 Max: 12354 Min:1518 + Event: func: smp_trace_apic_timer_interrupt() (1) Total: 10298 Avg: 10298 Max: 10298 Min:10298 + Event: func: SyS_brk() (4) Total: 2407 Avg: 601 Max: 1564 Min:206 + Event: func: do_notify_resume() (2) Total: 4095 Avg: 2047 Max: 2521 Min:1574 + Event: func: sys_execve() (5) Total: 1625251 Avg: 325050 Max: 1605698 Min:3570 + | + + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) + 100% (1) time:1605698 max:1605698 min:0 avg:1605698 + ttwu_do_wakeup (0xffffffff810a01a2) + ttwu_do_activate.constprop.122 (0xffffffff810a0236) + try_to_wake_up (0xffffffff810a3ec3) + wake_up_process (0xffffffff810a4057) + cpu_stop_queue_work (0xffffffff81108df8) + stop_one_cpu (0xffffffff8110909a) + sched_exec (0xffffffff810a119b) + do_execveat_common.isra.31 (0xffffffff811de528) + do_execve (0xffffffff811dea8c) + SyS_execve (0xffffffff811ded1e) + return_to_handler (0xffffffff816c8458) + stub_execve (0xffffffff816c6929) + stub_execve (0xffffffff816c6929) + + + Event: func: syscall_trace_enter_phase2() (38) Total: 21544 Avg: 566 Max: 1066 Min:329 + Event: func: syscall_trace_enter_phase1() (38) Total: 9202 Avg: 242 Max: 376 Min:150 + Event: func: __do_page_fault() (53) Total: 257672 Avg: 4861 Max: 27745 Min:458 + | + + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) + 100% (1) time:27745 max:27745 min:0 avg:27745 + ttwu_do_wakeup (0xffffffff810a01a2) + ttwu_do_activate.constprop.122 (0xffffffff810a0236) + try_to_wake_up (0xffffffff810a3ec3) + default_wake_function (0xffffffff810a4002) + autoremove_wake_function (0xffffffff810b50fd) + __wake_up_common (0xffffffff810b4958) + __wake_up (0xffffffff810b4cb8) + rb_wake_up_waiters (0xffffffff8112f126) + irq_work_run_list (0xffffffff81157d0f) + irq_work_run (0xffffffff81157d5e) + smp_trace_irq_work_interrupt (0xffffffff810082fc) + trace_irq_work_interrupt (0xffffffff816c7aaa) + return_to_handler (0xffffffff816c8458) + trace_do_page_fault (0xffffffff810478b2) + trace_page_fault (0xffffffff816c7dd2) + + + Event: func: syscall_trace_leave() (38) Total: 26145 Avg: 688 Max: 1264 Min:381 + Event: func: __sb_end_write() (1) Total: 373 Avg: 373 Max: 373 Min:373 + Event: func: fsnotify() (1) Total: 598 Avg: 598 Max: 598 Min:598 + Event: func: __fsnotify_parent() (1) Total: 286 Avg: 286 Max: 286 Min:286 + Event: func: mutex_unlock() (2) Total: 39636 Avg: 19818 Max: 39413 Min:223 + Event: func: smp_trace_irq_work_interrupt() (6) Total: 236459 Avg: 39409 Max: 100671 Min:634 + | + + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) + 100% (4) time:234348 max:100671 min:38745 avg:58587 + ttwu_do_wakeup (0xffffffff810a01a2) + ttwu_do_activate.constprop.122 (0xffffffff810a0236) + try_to_wake_up (0xffffffff810a3ec3) + default_wake_function (0xffffffff810a4002) + autoremove_wake_function (0xffffffff810b50fd) + __wake_up_common (0xffffffff810b4958) + __wake_up (0xffffffff810b4cb8) + rb_wake_up_waiters (0xffffffff8112f126) + irq_work_run_list (0xffffffff81157d0f) + irq_work_run (0xffffffff81157d5e) + smp_trace_irq_work_interrupt (0xffffffff810082fc) + return_to_handler (0xffffffff816c8458) + trace_irq_work_interrupt (0xffffffff816c7aaa) + | + + ftrace_return_to_handler (0xffffffff81140840) + | 84% (3) time:197396 max:100671 min:38745 avg:65798 + | return_to_handler (0xffffffff816c846d) + | trace_page_fault (0xffffffff816c7dd2) + | + + ftrace_return_to_handler (0xffffffff81140840) + 16% (1) time:36952 max:36952 min:0 avg:36952 + ftrace_graph_caller (0xffffffff816c8428) + mutex_unlock (0xffffffff816c3f75) + rb_simple_write (0xffffffff81133142) + vfs_write (0xffffffff811d7727) + SyS_write (0xffffffff811d7acf) + tracesys_phase2 (0xffffffff816c65b0) + + + + + Event: sys_enter:35 (1) Total: 1000599765 Avg: 1000599765 Max: 1000599765 Min:1000599765 + Event: sys_enter:11 (1) Total: 55025 Avg: 55025 Max: 55025 Min:55025 + Event: sys_enter:158 (1) Total: 1584 Avg: 1584 Max: 1584 Min:1584 + Event: sys_enter:10 (4) Total: 18359 Avg: 4589 Max: 8764 Min:2933 + Event: sys_enter:0 (1) Total: 4223 Avg: 4223 Max: 4223 Min:4223 + Event: sys_enter:3 (5) Total: 9948 Avg: 1989 Max: 2606 Min:1203 + Event: sys_enter:5 (3) Total: 15530 Avg: 5176 Max: 11840 Min:1405 + Event: sys_enter:2 (3) Total: 28002 Avg: 9334 Max: 12035 Min:5656 + Event: sys_enter:21 (1) Total: 7814 Avg: 7814 Max: 7814 Min:7814 + Event: sys_enter:9 (8) Total: 49583 Avg: 6197 Max: 14137 Min:2362 + Event: sys_enter:12 (4) Total: 108493 Avg: 27123 Max: 104079 Min:922 + Event: sys_enter:59 (5) Total: 1631608 Avg: 326321 Max: 1607529 Min:4563 + Event: page_fault_user:0x398d86b630 (1) + Event: page_fault_user:0x398d844de0 (1) + Event: page_fault_user:0x398d8d9020 (1) + Event: page_fault_user:0x1d37008 (1) + Event: page_fault_user:0x7f0b89e91074 (1) + Event: page_fault_user:0x7f0b89d98ed0 (1) + Event: page_fault_user:0x7f0b89ec8950 (1) + Event: page_fault_user:0x7f0b89d83644 (1) + Event: page_fault_user:0x7f0b89d622a8 (1) + Event: page_fault_user:0x7f0b89d5a560 (1) + Event: page_fault_user:0x7f0b89d34010 (1) + Event: page_fault_user:0x1d36008 (1) + Event: page_fault_user:0x398d900510 (1) + Event: page_fault_user:0x398dbb3ae8 (1) + Event: page_fault_user:0x398d87f490 (1) + Event: page_fault_user:0x398d8eb660 (1) + Event: page_fault_user:0x398d8bd730 (1) + Event: page_fault_user:0x398d9625d9 (1) + Event: page_fault_user:0x398d931810 (1) + Event: page_fault_user:0x398dbb7114 (1) + Event: page_fault_user:0x398d837610 (1) + Event: page_fault_user:0x398d89e860 (1) + Event: page_fault_user:0x398d8f23b0 (1) + Event: page_fault_user:0x398dbb4510 (1) + Event: page_fault_user:0x398dbad6f0 (1) + Event: page_fault_user:0x398dbb1018 (1) + Event: page_fault_user:0x398d977b37 (1) + Event: page_fault_user:0x398d92eb60 (1) + Event: page_fault_user:0x398d8abff0 (1) + Event: page_fault_user:0x398dbb0d30 (1) + Event: page_fault_user:0x398dbb6c24 (1) + Event: page_fault_user:0x398d821c50 (1) + Event: page_fault_user:0x398dbb6c20 (1) + Event: page_fault_user:0x398d886350 (1) + Event: page_fault_user:0x7f0b90125000 (1) + Event: page_fault_user:0x7f0b90124740 (1) + Event: page_fault_user:0x7f0b90126000 (1) + Event: page_fault_user:0x398d816230 (1) + Event: page_fault_user:0x398d8002b8 (1) + Event: page_fault_user:0x398dbb0b40 (1) + Event: page_fault_user:0x398dbb2880 (1) + Event: page_fault_user:0x7f0b90141cc6 (1) + Event: page_fault_user:0x7f0b9013b85c (1) + Event: page_fault_user:0x7f0b90127000 (1) + Event: page_fault_user:0x606e70 (1) + Event: page_fault_user:0x7f0b90144010 (1) + Event: page_fault_user:0x7fffcb31b038 (1) + Event: page_fault_user:0x606da8 (1) + Event: page_fault_user:0x400040 (1) + Event: page_fault_user:0x398d222218 (1) + Event: page_fault_user:0x398d015120 (1) + Event: page_fault_user:0x398d220ce8 (1) + Event: page_fault_user:0x398d220b80 (1) + Event: page_fault_user:0x7fffcb2fcff8 (1) + Event: page_fault_user:0x398d001590 (1) + Event: page_fault_user:0x398d838490 (1) + Event: softirq_raise:RCU (3) Total: 252931 Avg: 84310 Max: 243288 Min:4639 + Event: softirq_raise:SCHED (2) Total: 241249 Avg: 120624 Max: 239076 Min:2173 + | + + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) + 100% (1) time:239076 max:239076 min:0 avg:239076 + ttwu_do_wakeup (0xffffffff810a01a2) + ttwu_do_activate.constprop.122 (0xffffffff810a0236) + try_to_wake_up (0xffffffff810a3ec3) + default_wake_function (0xffffffff810a4002) + autoremove_wake_function (0xffffffff810b50fd) + __wake_up_common (0xffffffff810b4958) + __wake_up (0xffffffff810b4cb8) + rb_wake_up_waiters (0xffffffff8112f126) + irq_work_run_list (0xffffffff81157d0f) + irq_work_run (0xffffffff81157d5e) + smp_trace_irq_work_interrupt (0xffffffff810082fc) + trace_irq_work_interrupt (0xffffffff816c7aaa) + irq_exit (0xffffffff8107dd66) + smp_trace_apic_timer_interrupt (0xffffffff816c8c7a) + trace_apic_timer_interrupt (0xffffffff816c725a) + prepare_ftrace_return (0xffffffff8103d4fd) + ftrace_graph_caller (0xffffffff816c8428) + mem_cgroup_begin_page_stat (0xffffffff811cfd25) + page_remove_rmap (0xffffffff811a4fc5) + stub_execve (0xffffffff816c6929) + unmap_single_vma (0xffffffff81198b1c) + unmap_vmas (0xffffffff81199174) + exit_mmap (0xffffffff811a1f5b) + mmput (0xffffffff8107699a) + flush_old_exec (0xffffffff811ddb75) + load_elf_binary (0xffffffff812287df) + search_binary_handler (0xffffffff811dd3e0) + do_execveat_common.isra.31 (0xffffffff811de8bd) + do_execve (0xffffffff811dea8c) + SyS_execve (0xffffffff811ded1e) + return_to_handler (0xffffffff816c8458) + + + Event: softirq_raise:HI (3) Total: 72472 Avg: 24157 Max: 64186 Min:3430 + Event: softirq_entry:RCU (2) Total: 3191 Avg: 1595 Max: 1788 Min:1403 + | + + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) + 100% (1) time:1788 max:1788 min:0 avg:1788 + ttwu_do_wakeup (0xffffffff810a01a2) + ttwu_do_activate.constprop.122 (0xffffffff810a0236) + try_to_wake_up (0xffffffff810a3ec3) + default_wake_function (0xffffffff810a4002) + autoremove_wake_function (0xffffffff810b50fd) + __wake_up_common (0xffffffff810b4958) + __wake_up (0xffffffff810b4cb8) + rb_wake_up_waiters (0xffffffff8112f126) + irq_work_run_list (0xffffffff81157d0f) + irq_work_run (0xffffffff81157d5e) + smp_trace_irq_work_interrupt (0xffffffff810082fc) + trace_irq_work_interrupt (0xffffffff816c7aaa) + irq_work_queue (0xffffffff81157e95) + ring_buffer_unlock_commit (0xffffffff8113039f) + __buffer_unlock_commit (0xffffffff811367d5) + trace_buffer_unlock_commit (0xffffffff811376a2) + ftrace_event_buffer_commit (0xffffffff81146d5f) + ftrace_raw_event_sched_process_exec (0xffffffff8109c511) + do_execveat_common.isra.31 (0xffffffff811de9a3) + do_execve (0xffffffff811dea8c) + SyS_execve (0xffffffff811ded1e) + return_to_handler (0xffffffff816c8458) + stub_execve (0xffffffff816c6929) + + + Event: softirq_entry:SCHED (2) Total: 2289 Avg: 1144 Max: 1350 Min:939 + Event: softirq_entry:HI (3) Total: 180146 Avg: 60048 Max: 178969 Min:499 + | + + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) + 100% (1) time:178969 max:178969 min:0 avg:178969 + ttwu_do_wakeup (0xffffffff810a01a2) + ttwu_do_activate.constprop.122 (0xffffffff810a0236) + try_to_wake_up (0xffffffff810a3ec3) + wake_up_process (0xffffffff810a4057) + wake_up_worker (0xffffffff8108de74) + insert_work (0xffffffff8108fca6) + __queue_work (0xffffffff8108fe12) + delayed_work_timer_fn (0xffffffff81090088) + call_timer_fn (0xffffffff810d8f89) + run_timer_softirq (0xffffffff810da8a1) + __do_softirq (0xffffffff8107d8fa) + irq_exit (0xffffffff8107dd66) + smp_trace_apic_timer_interrupt (0xffffffff816c8c7a) + trace_apic_timer_interrupt (0xffffffff816c725a) + prepare_ftrace_return (0xffffffff8103d4fd) + ftrace_graph_caller (0xffffffff816c8428) + mem_cgroup_begin_page_stat (0xffffffff811cfd25) + page_remove_rmap (0xffffffff811a4fc5) + stub_execve (0xffffffff816c6929) + unmap_single_vma (0xffffffff81198b1c) + unmap_vmas (0xffffffff81199174) + exit_mmap (0xffffffff811a1f5b) + mmput (0xffffffff8107699a) + flush_old_exec (0xffffffff811ddb75) + load_elf_binary (0xffffffff812287df) + search_binary_handler (0xffffffff811dd3e0) + do_execveat_common.isra.31 (0xffffffff811de8bd) + do_execve (0xffffffff811dea8c) + SyS_execve (0xffffffff811ded1e) + return_to_handler (0xffffffff816c8458) + --- + +The above uses *-F* to follow the sleep task. It filters only on events +that pertain to sleep. Note, in order to follow forks, you need to also +include the *-c* flag. + +Other tasks will appear in the profile as well if events reference more +than one task (like sched_switch and sched_wakeup do. The "prev_pid" and +"next_pid" of sched_switch, and the "common_pid" and "pid" of sched_wakeup). + +Stack traces are attached to events that are related to them. + +Taking a look at the above output: + + Event: sched_switch:R (2) Total: 234559 Avg: 117279 Max: 129886 Min:104673 + +This shows that task was preempted (it's in the running 'R' state). +It was preempted twice '(2)' for a total of 234,559 nanoseconds, with a average +preempt time of 117,279 ns, and maximum of 128,886 ns and minimum of 104,673 ns. + +The tree shows where it was preempted: + + + | + + ftrace_raw_event_sched_switch (0xffffffff8109f310) + 100% (2) time:234559 max:129886 min:104673 avg:117279 + __schedule (0xffffffff816c1e81) + preempt_schedule (0xffffffff816c236e) + ___preempt_schedule (0xffffffff81351a59) + | + + unmap_single_vma (0xffffffff81198c05) + | 55% (1) time:129886 max:129886 min:0 avg:129886 + | stop_one_cpu (0xffffffff8110909a) + | sched_exec (0xffffffff810a119b) + | do_execveat_common.isra.31 (0xffffffff811de528) + | do_execve (0xffffffff811dea8c) + | SyS_execve (0xffffffff811ded1e) + | return_to_handler (0xffffffff816c8458) + | stub_execve (0xffffffff816c6929) + | + + unmap_single_vma (0xffffffff81198c05) + 45% (1) time:104673 max:104673 min:0 avg:104673 + unmap_vmas (0xffffffff81199174) + exit_mmap (0xffffffff811a1f5b) + mmput (0xffffffff8107699a) + flush_old_exec (0xffffffff811ddb75) + load_elf_binary (0xffffffff812287df) + search_binary_handler (0xffffffff811dd3e0) + do_execveat_common.isra.31 (0xffffffff811de8bd) + do_execve (0xffffffff811dea8c) + SyS_execve (0xffffffff811ded1e) + return_to_handler (0xffffffff816c8458) + stub_execve (0xffffffff816c6929) + + + Event: sched_switch:S (1) Total: 1000513242 Avg: 1000513242 Max: 1000513242 Min:10005132 + +This shows that the task was scheduled out in the INTERRUPTIBLE state once +for a total of 1,000,513,242 ns (~1s), which makes sense as the task was a +"sleep 1". + +After the schedule events, the function events are shown. By default the +profiler will use the function graph tracer if the depth setting is supported +by the kernel. It will set the depth to one which will only trace the first +function that enters the kernel. It will also record the amount of time +it was in the kernel. + + Event: func: sys_nanosleep() (1) Total: 1000598016 Avg: 1000598016 Max: 1000598016 Min:1000598016 + Event: func: sys_munmap() (1) Total: 14300 Avg: 14300 Max: 14300 Min:14300 + Event: func: sys_arch_prctl() (1) Total: 571 Avg: 571 Max: 571 Min:571 + Event: func: sys_mprotect() (4) Total: 14382 Avg: 3595 Max: 7196 Min:2190 + Event: func: SyS_read() (1) Total: 2640 Avg: 2640 Max: 2640 Min:2640 + Event: func: sys_close() (5) Total: 4001 Avg: 800 Max: 1252 Min:414 + Event: func: sys_newfstat() (3) Total: 11684 Avg: 3894 Max: 10206 Min:636 + Event: func: SyS_open() (3) Total: 23615 Avg: 7871 Max: 10535 Min:4743 + Event: func: sys_access() (1) Total: 5924 Avg: 5924 Max: 5924 Min:5924 + Event: func: SyS_mmap() (8) Total: 39153 Avg: 4894 Max: 12354 Min:1518 + Event: func: smp_trace_apic_timer_interrupt() (1) Total: 10298 Avg: 10298 Max: 10298 Min:10298 + Event: func: SyS_brk() (4) Total: 2407 Avg: 601 Max: 1564 Min:206 + Event: func: do_notify_resume() (2) Total: 4095 Avg: 2047 Max: 2521 Min:1574 + Event: func: sys_execve() (5) Total: 1625251 Avg: 325050 Max: 1605698 Min:3570 + + +Count of times the event was hit is always in parenthesis '(5)'. + +The function graph trace may produce too much overhead as it is still +triggering (just not tracing) on all functions. To limit functions just to +system calls (not interrupts), add the following option: + + -l 'sys_*' -l 'SyS_*' + +To disable function graph tracing totally, use: + + -p nop + +To use function tracing instead (note, this will not record timings, but just +the count of times a function is hit): + + -p function + + +Following the functions are the events that are recorded. + + + Event: sys_enter:35 (1) Total: 1000599765 Avg: 1000599765 Max: 1000599765 Min:1000599765 + Event: sys_enter:11 (1) Total: 55025 Avg: 55025 Max: 55025 Min:55025 + Event: sys_enter:158 (1) Total: 1584 Avg: 1584 Max: 1584 Min:1584 + Event: sys_enter:10 (4) Total: 18359 Avg: 4589 Max: 8764 Min:2933 + Event: sys_enter:0 (1) Total: 4223 Avg: 4223 Max: 4223 Min:4223 + Event: sys_enter:3 (5) Total: 9948 Avg: 1989 Max: 2606 Min:1203 + Event: sys_enter:5 (3) Total: 15530 Avg: 5176 Max: 11840 Min:1405 + Event: sys_enter:2 (3) Total: 28002 Avg: 9334 Max: 12035 Min:5656 + Event: sys_enter:21 (1) Total: 7814 Avg: 7814 Max: 7814 Min:7814 + Event: sys_enter:9 (8) Total: 49583 Avg: 6197 Max: 14137 Min:2362 + Event: sys_enter:12 (4) Total: 108493 Avg: 27123 Max: 104079 Min:922 + Event: sys_enter:59 (5) Total: 1631608 Avg: 326321 Max: 1607529 Min:4563 + +These are the raw system call events, with the raw system call ID after +the "sys_enter:" For example, "59" is execve(2). Why did it execute 5 times? +Looking at a strace of this run, we can see: + + execve("/usr/lib64/ccache/sleep", ["sleep", "1"], [/* 27 vars */] <unfinished ...> + <... execve resumed> ) = -1 ENOENT (No such file or directory) + execve("/usr/local/sbin/sleep", ["sleep", "1"], [/* 27 vars */] <unfinished ...> + <... execve resumed> ) = -1 ENOENT (No such file or directory) + execve("/usr/local/bin/sleep", ["sleep", "1"], [/* 27 vars */] <unfinished ...> + <... execve resumed> ) = -1 ENOENT (No such file or directory) + execve("/usr/sbin/sleep", ["sleep", "1"], [/* 27 vars */] <unfinished ...> + <... execve resumed> ) = -1 ENOENT (No such file or directory) + execve("/usr/bin/sleep", ["sleep", "1"], [/* 27 vars */] <unfinished ...> + <... execve resumed> ) = 0 + +It attempted to execve the "sleep" command for each path in $PATH until it found +one. + +The page_fault_user events show what userspace address took a page fault. + + Event: softirq_raise:RCU (3) Total: 252931 Avg: 84310 Max: 243288 Min:4639 + Event: softirq_raise:SCHED (2) Total: 241249 Avg: 120624 Max: 239076 Min:2173 + | + + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) + 100% (1) time:239076 max:239076 min:0 avg:239076 + ttwu_do_wakeup (0xffffffff810a01a2) + ttwu_do_activate.constprop.122 (0xffffffff810a0236) + try_to_wake_up (0xffffffff810a3ec3) + default_wake_function (0xffffffff810a4002) + autoremove_wake_function (0xffffffff810b50fd) + __wake_up_common (0xffffffff810b4958) + __wake_up (0xffffffff810b4cb8) + rb_wake_up_waiters (0xffffffff8112f126) + irq_work_run_list (0xffffffff81157d0f) + irq_work_run (0xffffffff81157d5e) + smp_trace_irq_work_interrupt (0xffffffff810082fc) + trace_irq_work_interrupt (0xffffffff816c7aaa) + irq_exit (0xffffffff8107dd66) + +The timings for the softirq_raise events measure the time it took from the raised +softirq to the time it executed. + +The timings for the softirq_entry events measure the time the softirq took to +execute. + +The stack traces for the softirqs (and possibly other events) are used when +an event has a stack attached to it. This can happen if the profile ran +more stacks than just the sched events, or when events are dropped and +stacks + + +To have full control of what gets traced, use the *-S* option that will have +trace-cmd not enable any events or the function_graph tracer. Only the events +listed on the command line are shown. + +If only the time of kmalloc is needed to be seen, and where it was recorded, +using the *-S* option and enabling function_graph and stack tracing for just +the function needed will give the profile of only that function. + + --- +# trace-cmd profile -S -p function_graph -l '*kmalloc*' -l '*kmalloc*:stacktrace' sleep 1 +task: sshd-11786 + Event: func: __kmalloc_reserve.isra.59() (2) Total: 149684 Avg: 74842 Max: 75598 Min:74086 + | + + __alloc_skb (0xffffffff815a8917) + | 67% (2) time:149684 max:75598 min:74086 avg:74842 + | __kmalloc_node_track_caller (0xffffffff811c6635) + | __kmalloc_reserve.isra.59 (0xffffffff815a84ac) + | return_to_handler (0xffffffff816c8458) + | sk_stream_alloc_skb (0xffffffff81604ea1) + | tcp_sendmsg (0xffffffff8160592c) + | inet_sendmsg (0xffffffff8162fed1) + | sock_aio_write (0xffffffff8159f9fc) + | do_sync_write (0xffffffff811d694a) + | vfs_write (0xffffffff811d7825) + | SyS_write (0xffffffff811d7adf) + | system_call_fastpath (0xffffffff816c63d2) + | + + __alloc_skb (0xffffffff815a8917) + 33% (1) time:74086 max:74086 min:74086 avg:74086 + __alloc_skb (0xffffffff815a8917) + sk_stream_alloc_skb (0xffffffff81604ea1) + tcp_sendmsg (0xffffffff8160592c) + inet_sendmsg (0xffffffff8162fed1) + sock_aio_write (0xffffffff8159f9fc) + do_sync_write (0xffffffff811d694a) + vfs_write (0xffffffff811d7825) + SyS_write (0xffffffff811d7adf) + system_call_fastpath (0xffffffff816c63d2) + [..] +--- + +To watch the command run but save the output of the profile to a file +use --stderr, and redirect stderr to a file + +# trace-cmd profile --stderr cyclictest -p 80 -n -t1 2> profile.out + +Or simple use *-o* + +# trace-cmd profile -o profile.out cyclictest -p 80 -n -t1 + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2014 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-record.1.txt b/Documentation/trace-cmd/trace-cmd-record.1.txt new file mode 100644 index 00000000..6b8e3b4a --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-record.1.txt @@ -0,0 +1,519 @@ +TRACE-CMD-RECORD(1) +=================== + +NAME +---- +trace-cmd-record - record a trace from the Ftrace Linux internal tracer + +SYNOPSIS +-------- +*trace-cmd record* ['OPTIONS'] ['command'] + +DESCRIPTION +----------- +The trace-cmd(1) record command will set up the Ftrace Linux kernel tracer to +record the specified plugins or events that happen while the 'command' +executes. If no command is given, then it will record until the user hits +Ctrl-C. + +The record command of trace-cmd will set up the Ftrace tracer to start tracing +the various events or plugins that are given on the command line. It will then +create a number of tracing processes (one per CPU) that will start recording +from the kernel ring buffer straight into temporary files. When the command is +complete (or Ctrl-C is hit) all the files will be combined into a trace.dat +file that can later be read (see trace-cmd-report(1)). + +OPTIONS +------- +*-p* 'tracer':: + Specify a tracer. Tracers usually do more than just trace an event. + Common tracers are: *function*, *function_graph*, *preemptirqsoff*, + *irqsoff*, *preemptoff* and *wakeup*. A tracer must be supported by the + running kernel. To see a list of available tracers, see trace-cmd-list(1). + +*-e* 'event':: + Specify an event to trace. Various static trace points have been added to + the Linux kernel. They are grouped by subsystem where you can enable all + events of a given subsystem or specify specific events to be enabled. The + 'event' is of the format "subsystem:event-name". You can also just specify + the subsystem without the ':event-name' or the event-name without the + "subsystem:". Using "-e sched_switch" will enable the "sched_switch" event + where as, "-e sched" will enable all events under the "sched" subsystem. + + The 'event' can also contain glob expressions. That is, "*stat*" will + select all events (or subsystems) that have the characters "stat" in their + names. + + The keyword 'all' can be used to enable all events. + +*-a*:: + Every event that is being recorded has its output format file saved + in the output file to be able to display it later. But if other + events are enabled in the trace without trace-cmd's knowledge, the + formats of those events will not be recorded and trace-cmd report will + not be able to display them. If this is the case, then specify the + *-a* option and the format for all events in the system will be saved. + +*-T*:: + Enable a stacktrace on each event. For example: + + <idle>-0 [003] 58549.289091: sched_switch: kworker/0:1:0 [120] R ==> trace-cmd:2603 [120] + <idle>-0 [003] 58549.289092: kernel_stack: <stack trace> +=> schedule (ffffffff814b260e) +=> cpu_idle (ffffffff8100a38c) +=> start_secondary (ffffffff814ab828) + +*--func-stack*:: + Enable a stack trace on all functions. Note this is only applicable + for the "function" plugin tracer, and will only take effect if the + -l option is used and succeeds in limiting functions. If the function + tracer is not filtered, and the stack trace is enabled, you can live + lock the machine. + +*-f* 'filter':: + Specify a filter for the previous event. This must come after a *-e*. This + will filter what events get recorded based on the content of the event. + Filtering is passed to the kernel directly so what filtering is allowed + may depend on what version of the kernel you have. Basically, it will + let you use C notation to check if an event should be processed or not. + +---------------------------------------- + ==, >=, <=, >, <, &, |, && and || +---------------------------------------- + + The above are usually safe to use to compare fields. + +*--no-filter*:: + Do not filter out the trace-cmd threads. By default, the threads are + filtered out to not be traced by events. This option will have the trace-cmd + threads also be traced. + +*-R* 'trigger':: + Specify a trigger for the previous event. This must come after a *-e*. + This will add a given trigger to the given event. To only enable the trigger + and not the event itself, then place the event after the *-v* option. + + See Documentation/trace/events.txt in the Linux kernel source for more + information on triggers. + +*-v*:: + This will cause all events specified after it on the command line to not + be traced. This is useful for selecting a subsystem to be traced but to + leave out various events. For Example: "-e sched -v -e "\*stat\*"" will + enable all events in the sched subsystem except those that have "stat" in + their names. + + Note: the *-v* option was taken from the way grep(1) inverts the following + matches. + +*-F*:: + This will filter only the executable that is given on the command line. If + no command is given, then it will filter itself (pretty pointless). + Using *-F* will let you trace only events that are caused by the given + command. + +*-P* 'pid':: + Similar to *-F* but lets you specify a process ID to trace. + +*-c*:: + Used with either *-F* (or *-P* if kernel supports it) to trace the process' + children too. + +*--user*:: + Execute the specified *command* as given user. + +*-C* 'clock':: + Set the trace clock to "clock". + + Use trace-cmd(1) list -C to see what clocks are available. + +*-o* 'output-file':: + By default, trace-cmd report will create a 'trace.dat' file. You can + specify a different file to write to with the *-o* option. + +*-l* 'function-name':: + This will limit the 'function' and 'function_graph' tracers to only trace + the given function name. More than one *-l* may be specified on the + command line to trace more than one function. This supports both full + regex(3) parsing, or basic glob parsing. If the filter has only alphanumeric, + '_', '*', '?' and '.' characters, then it will be parsed as a basic glob. + to force it to be a regex, prefix the filter with '^' or append it with '$' + and it will then be parsed as a regex. + +*-g* 'function-name':: + This option is for the function_graph plugin. It will graph the given + function. That is, it will only trace the function and all functions that + it calls. You can have more than one *-g* on the command line. + +*-n* 'function-name':: + This has the opposite effect of *-l*. The function given with the *-n* + option will not be traced. This takes precedence, that is, if you include + the same function for both *-n* and *-l*, it will not be traced. + +*-d*:: + Some tracer plugins enable the function tracer by default. Like the + latency tracers. This option prevents the function tracer from being + enabled at start up. + +*-D*:: + The option *-d* will try to use the function-trace option to disable the + function tracer (if available), otherwise it defaults to the proc file: + /proc/sys/kernel/ftrace_enabled, but will not touch it if the function-trace + option is available. The *-D* option will disable both the ftrace_enabled + proc file as well as the function-trace option if it exists. + + Note, this disable function tracing for all users, which includes users + outside of ftrace tracers (stack_tracer, perf, etc). + +*-O* 'option':: + Ftrace has various options that can be enabled or disabled. This allows + you to set them. Appending the text 'no' to an option disables it. + For example: "-O nograph-time" will disable the "graph-time" Ftrace + option. + +*-s* 'interval':: + The processes that trace-cmd creates to record from the ring buffer need + to wake up to do the recording. Setting the 'interval' to zero will cause + the processes to wakeup every time new data is written into the buffer. + But since Ftrace is recording kernel activity, the act of this processes + going back to sleep may cause new events into the ring buffer which will + wake the process back up. This will needlessly add extra data into the + ring buffer. + + The 'interval' metric is microseconds. The default is set to 1000 (1 ms). + This is the time each recording process will sleep before waking up to + record any new data that was written to the ring buffer. + +*-r* 'priority':: + The priority to run the capture threads at. In a busy system the trace + capturing threads may be staved and events can be lost. This increases + the priority of those threads to the real time (FIFO) priority. + But use this option with care, it can also change the behaviour of + the system being traced. + +*-b* 'size':: + This sets the ring buffer size to 'size' kilobytes. Because the Ftrace + ring buffer is per CPU, this size is the size of each per CPU ring buffer + inside the kernel. Using "-b 10000" on a machine with 4 CPUs will make + Ftrace have a total buffer size of 40 Megs. + +*-B* 'buffer-name':: + If the kernel supports multiple buffers, this will add a buffer with + the given name. If the buffer name already exists, that buffer is just + reset and will not be deleted at the end of record execution. If the + buffer is created, it will be removed at the end of execution (unless + the *-k* is set, or 'start' command was used). + + After a buffer name is stated, all events added after that will be + associated with that buffer. If no buffer is specified, or an event + is specified before a buffer name, it will be associated with the + main (toplevel) buffer. + + trace-cmd record -e sched -B block -e block -B time -e timer sleep 1 + + The above is will enable all sched events in the main buffer. It will + then create a 'block' buffer instance and enable all block events within + that buffer. A 'time' buffer instance is created and all timer events + will be enabled for that event. + +*-m* 'size':: + The max size in kilobytes that a per cpu buffer should be. Note, due + to rounding to page size, the number may not be totally correct. + Also, this is performed by switching between two buffers that are half + the given size thus the output may not be of the given size even if + much more was written. + + Use this to prevent running out of diskspace for long runs. + +*-M* 'cpumask':: + Set the cpumask for to trace. It only affects the last buffer instance + given. If supplied before any buffer instance, then it affects the + main buffer. The value supplied must be a hex number. + + trace-cmd record -p function -M c -B events13 -e all -M 5 + + If the -M is left out, then the mask stays the same. To enable all + CPUs, pass in a value of '-1'. + +*-k*:: + By default, when trace-cmd is finished tracing, it will reset the buffers + and disable all the tracing that it enabled. This option keeps trace-cmd + from disabling the tracer and reseting the buffer. This option is useful for + debugging trace-cmd. + + Note: usually trace-cmd will set the "tracing_on" file back to what it + was before it was called. This option will leave that file set to zero. + +*-i*:: + By default, if an event is listed that trace-cmd does not find, it + will exit with an error. This option will just ignore events that are + listed on the command line but are not found on the system. + +*-N* 'host:port':: + If another machine is running "trace-cmd listen", this option is used to + have the data sent to that machine with UDP packets. Instead of writing + to an output file, the data is sent off to a remote box. This is ideal for + embedded machines with little storage, or having a single machine that + will keep all the data in a single repository. + + Note: This option is not supported with latency tracer plugins: + wakeup, wakeup_rt, irqsoff, preemptoff and preemptirqsoff + +*-V* 'cid:port':: + If recording on a guest VM and the host is running *trace-cmd listen* with + the *-V* option as well, or if this is recording on the host, and a guest + in running *trace-cmd listen* with the *-V* option, then connect to the + listener (the same as connecting with the *-N* option via the network). + This has the same limitations as the *-N* option above with respect to + latency tracer plugins. + +*-t*:: + This option is used with *-N*, when there's a need to send the live data + with TCP packets instead of UDP. Although TCP is not nearly as fast as + sending the UDP packets, but it may be needed if the network is not that + reliable, the amount of data is not that intensive, and a guarantee is + needed that all traced information is transfered successfully. + +*-q* | *--quiet*:: + For use with recording an application. Suppresses normal output + (except for errors) to allow only the application's output to be displayed. + +*--date*:: + With the *--date* option, "trace-cmd" will write timestamps into the + trace buffer after it has finished recording. It will then map the + timestamp to gettimeofday which will allow wall time output from the + timestamps reading the created 'trace.dat' file. + +*--max-graph-depth* 'depth':: + Set the maximum depth the function_graph tracer will trace into a function. + A value of one will only show where userspace enters the kernel but not any + functions called in the kernel. The default is zero, which means no limit. + +*--cmdlines-size* 'size':: + Set the number of entries the kernel tracing file "saved_cmdlines" can + contain. This file is a circular buffer which stores the mapping between + cmdlines and PIDs. If full, it leads to unresolved cmdlines ("<...>") within + the trace. The kernel default value is 128. + +*--module* 'module':: + Filter a module's name in function tracing. It is equivalent to adding + ':mod:module' after all other functions being filtered. If no other function + filter is listed, then all modules functions will be filtered in the filter. + + '--module snd' is equivalent to '-l :mod:snd' + + '--module snd -l "*jack*"' is equivalent to '-l "*jack*:mod:snd"' + + '--module snd -n "*"' is equivalent to '-n :mod:snd' + +*--proc-map*:: + Save the traced process address map into the trace.dat file. The traced + processes can be specified using the option *-P*, or as a given 'command'. + +*--profile*:: + With the *--profile* option, "trace-cmd" will enable tracing that can + be used with trace-cmd-report(1) --profile option. If a tracer *-p* is + not set, and function graph depth is supported by the kernel, then + the function_graph tracer will be enabled with a depth of one (only + show where userspace enters into the kernel). It will also enable + various tracepoints with stack tracing such that the report can show + where tasks have been blocked for the longest time. + + See trace-cmd-profile(1) for more details and examples. + +*-G*:: + Set interrupt (soft and hard) events as global (associated to CPU + instead of tasks). Only works for --profile. + +*-H* 'event-hooks':: + Add custom event matching to connect any two events together. When not + used with *--profile*, it will save the parameter and this will be + used by trace-cmd report --profile, too. That is: + + trace-cmd record -H hrtimer_expire_entry,hrtimer/hrtimer_expire_exit,hrtimer,sp + trace-cmd report --profile + + Will profile hrtimer_expire_entry and hrtimer_expire_ext times. + + See trace-cmd-profile(1) for format. + +*-S*:: (for --profile only) + Only enable the tracer or events speficied on the command line. + With this option, the function_graph tracer is not enabled, nor are + any events (like sched_switch), unless they are specifically specified + on the command line (i.e. -p function -e sched_switch -e sched_wakeup) + +*--ts-offset offset*:: + Add an offset for the timestamp in the trace.dat file. This will add a + offset option into the trace.dat file such that a trace-cmd report will + offset all the timestamps of the events by the given offset. The offset + is in raw units. That is, if the event timestamps are in nanoseconds + the offset will also be in nanoseconds even if the displayed units are + in microseconds. + +*--tsync-interval*:: + Set the loop interval, in ms, for timestamps synchronization with guests: + If a negative number is specified, timestamps synchronization is disabled + If 0 is specified, no loop is performed - timestamps offset is calculated only twice," + at the beginning and at the end of the trace. + Timestamps synchronization with guests works only if there is support for VSOCK.\n" + +*--tsc2nsec*:: + Convert the current clock to nanoseconds, using tsc multiplier and shift from the Linux + kernel's perf interface. This option does not change the trace clock, just assumes that + the tsc multiplier and shift are applicable for the selected clock. You may use the + "-C tsc2nsec" clock, if not sure what clock to select. +*--stderr*:: + Have output go to stderr instead of stdout, but the output of the command + executed will not be changed. This is useful if you want to monitor the + output of the command being executed, but not see the output from trace-cmd. + +*--poll*:: + Waiting for data to be available on the trace ring-buffers may trigger + IPIs. This might generate unacceptable trace noise when tracing low latency + or real time systems. The poll option forces trace-cmd to use O_NONBLOCK. + Traces are extracted by busy waiting, which will hog the CPUs, so only use + when really needed. + +*--name*:: + Give a specific name for the current agent being processed. Used after *-A* to + give the guest being traced a name. Useful when using the vsocket ID instead of + a name of the guest. + +*--verbose*[='level']:: + Set the log level. Supported log levels are "none", "critical", "error", "warning", + "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log + level to specific value enables all logs from that and all previous levels. + The level will default to "info" if one is not specified. + + Example: enable all critical, error and warning logs + + trace-cmd record --verbose=warning + +*--file-version*:: + Desired version of the output file. Supported versions are 6 or 7. + +*--compression*:: + Compression of the trace output file, one of these strings can be passed: + + 'any' - auto select the best available compression algorithm + + 'none' - do not compress the trace file + + 'name' - the name of the desired compression algorithms. Available algorithms can be listed with + trace-cmd list -c + +EXAMPLES +-------- + +The basic way to trace all events: + +------------------------------ + # trace-cmd record -e all ls > /dev/null + # trace-cmd report + trace-cmd-13541 [003] 106260.693809: filemap_fault: address=0x128122 offset=0xce + trace-cmd-13543 [001] 106260.693809: kmalloc: call_site=81128dd4 ptr=0xffff88003dd83800 bytes_req=768 bytes_alloc=1024 gfp_flags=GFP_KERNEL|GFP_ZERO + ls-13545 [002] 106260.693809: kfree: call_site=810a7abb ptr=0x0 + ls-13545 [002] 106260.693818: sys_exit_write: 0x1 + + +------------------------------ + + + +To use the function tracer with sched switch tracing: + +------------------------------ + # trace-cmd record -p function -e sched_switch ls > /dev/null + # trace-cmd report + ls-13587 [002] 106467.860310: function: hrtick_start_fair <-- pick_next_task_fair + ls-13587 [002] 106467.860313: sched_switch: prev_comm=trace-cmd prev_pid=13587 prev_prio=120 prev_state=R ==> next_comm=trace-cmd next_pid=13583 next_prio=120 + trace-cmd-13585 [001] 106467.860314: function: native_set_pte_at <-- __do_fault + trace-cmd-13586 [003] 106467.860314: function: up_read <-- do_page_fault + ls-13587 [002] 106467.860317: function: __phys_addr <-- schedule + trace-cmd-13585 [001] 106467.860318: function: _raw_spin_unlock <-- __do_fault + ls-13587 [002] 106467.860320: function: native_load_sp0 <-- __switch_to + trace-cmd-13586 [003] 106467.860322: function: down_read_trylock <-- do_page_fault + + +------------------------------ + +Here is a nice way to find what interrupts have the highest latency: +------------------------------------------ + # trace-cmd record -p function_graph -e irq_handler_entry -l do_IRQ sleep 10 + # trace-cmd report + <idle>-0 [000] 157412.933969: funcgraph_entry: | do_IRQ() { + <idle>-0 [000] 157412.933974: irq_handler_entry: irq=48 name=eth0 + <idle>-0 [000] 157412.934004: funcgraph_exit: + 36.358 us | } + <idle>-0 [000] 157413.895004: funcgraph_entry: | do_IRQ() { + <idle>-0 [000] 157413.895011: irq_handler_entry: irq=48 name=eth0 + <idle>-0 [000] 157413.895026: funcgraph_exit: + 24.014 us | } + <idle>-0 [000] 157415.891762: funcgraph_entry: | do_IRQ() { + <idle>-0 [000] 157415.891769: irq_handler_entry: irq=48 name=eth0 + <idle>-0 [000] 157415.891784: funcgraph_exit: + 22.928 us | } + <idle>-0 [000] 157415.934869: funcgraph_entry: | do_IRQ() { + <idle>-0 [000] 157415.934874: irq_handler_entry: irq=48 name=eth0 + <idle>-0 [000] 157415.934906: funcgraph_exit: + 37.512 us | } + <idle>-0 [000] 157417.888373: funcgraph_entry: | do_IRQ() { + <idle>-0 [000] 157417.888381: irq_handler_entry: irq=48 name=eth0 + <idle>-0 [000] 157417.888398: funcgraph_exit: + 25.943 us | } + + +------------------------------------------ + +An example of the profile: +------------------------------------------ + # trace-cmd record --profile sleep 1 + # trace-cmd report --profile --comm sleep +task: sleep-21611 + Event: sched_switch:R (1) Total: 99442 Avg: 99442 Max: 99442 Min:99442 + <stack> 1 total:99442 min:99442 max:99442 avg=99442 + => ftrace_raw_event_sched_switch (0xffffffff8105f812) + => __schedule (0xffffffff8150810a) + => preempt_schedule (0xffffffff8150842e) + => ___preempt_schedule (0xffffffff81273354) + => cpu_stop_queue_work (0xffffffff810b03c5) + => stop_one_cpu (0xffffffff810b063b) + => sched_exec (0xffffffff8106136d) + => do_execve_common.isra.27 (0xffffffff81148c89) + => do_execve (0xffffffff811490b0) + => SyS_execve (0xffffffff811492c4) + => return_to_handler (0xffffffff8150e3c8) + => stub_execve (0xffffffff8150c699) + Event: sched_switch:S (1) Total: 1000506680 Avg: 1000506680 Max: 1000506680 Min:1000506680 + <stack> 1 total:1000506680 min:1000506680 max:1000506680 avg=1000506680 + => ftrace_raw_event_sched_switch (0xffffffff8105f812) + => __schedule (0xffffffff8150810a) + => schedule (0xffffffff815084b8) + => do_nanosleep (0xffffffff8150b22c) + => hrtimer_nanosleep (0xffffffff8108d647) + => SyS_nanosleep (0xffffffff8108d72c) + => return_to_handler (0xffffffff8150e3c8) + => tracesys_phase2 (0xffffffff8150c304) + Event: sched_wakeup:21611 (1) Total: 30326 Avg: 30326 Max: 30326 Min:30326 + <stack> 1 total:30326 min:30326 max:30326 avg=30326 + => ftrace_raw_event_sched_wakeup_template (0xffffffff8105f653) + => ttwu_do_wakeup (0xffffffff810606eb) + => ttwu_do_activate.constprop.124 (0xffffffff810607c8) + => try_to_wake_up (0xffffffff8106340a) +------------------------------------------ + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), +trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-profile(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-report.1.txt b/Documentation/trace-cmd/trace-cmd-report.1.txt new file mode 100644 index 00000000..aad8ab51 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-report.1.txt @@ -0,0 +1,518 @@ +TRACE-CMD-REPORT(1) +=================== + +NAME +---- +trace-cmd-report - show in ASCII a trace created by trace-cmd record + +SYNOPSIS +-------- +*trace-cmd report* ['OPTIONS'] ['input-file'] + +DESCRIPTION +----------- +The trace-cmd(1) report command will output a human readable report of a trace +created by trace-cmd record. + +OPTIONS +------- +*-i* 'input-file':: + By default, trace-cmd report will read the file 'trace.dat'. But the *-i* + option open up the given 'input-file' instead. Note, the input file may + also be specified as the last item on the command line. + +*-e*:: + This outputs the endianess of the file. trace-cmd report is smart enough + to be able to read big endian files on little endian machines, and vise + versa. + +*-f*:: + This outputs the list of all functions that have been mapped in the trace.dat file. + Note, this list may contain functions that may not appear in the trace, as + it is the list of mappings to translate function addresses into function names. + +*-P*:: + This outputs the list of "trace_printk()" data. The raw trace data points + to static pointers in the kernel. This must be stored in the trace.dat + file. + +*-E*:: + This lists the possible events in the file (but this list is not + necessarily the list of events in the file). + +*--events*:: + This will list the event formats that are stored in the trace.dat file. + +*--event* regex:: + This will print events that match the given regex. If a colon is specified, + then the characters before the colon will be used to match the system and + the characters after the colon will match the event. + + trace-cmd report --event sys:read + + The above will only match events where the system name contains "sys" + and the event name contains "read". + + trace-cmd report --event read + + The above will match all events that contain "read" in its name. Also it + may list all events of a system that contains "read" as well. + +*--check-events*:: + This will parse the event format strings that are stored in the trace.dat + file and return whether the formats can be parsed correctly. It will load + plugins unless *-N* is specified. + +*-t*:: + Print the full timestamp. The timestamps in the data file are usually + recorded to the nanosecond. But the default display of the timestamp + is only to the microsecond. To see the full timestamp, add the *-t* option. + +*-F* 'filter':: + Add a filter to limit what events are displayed. The format of the filter + is: + +------------------------------------------ + <events> ':' <filter> + <events> = SYSTEM'/'EVENT | SYSTEM | EVENT | <events> ',' <events> + <filter> = EVENT_FIELD <op> <value> | <filter> '&&' <filter> | + <filter> '||' <filter> | '(' <filter> ')' | '!' <filter> + <op> = '==' | '!=' | '>=' | '<=' | '>' | '<' | '&' | '|' | '^' | + '+' | '-' | '*' | '/' | '%' + <value> = NUM | STRING | EVENT_FIELD +------------------------------------------ + + SYSTEM is the name of the system to filter on. If the EVENT is left out, + then it applies to all events under the SYSTEM. If only one string is used + without the '/' to deliminate between SYSTEM and EVENT, then the filter + will be applied to all systems and events that match the given string. + + Whitespace is ignored, such that "sched:next_pid==123" is equivalent to + "sched : next_pid == 123". + + STRING is defined with single or double quotes (single quote must end with + single quote, and double with double). Whitespace within quotes are not + ignored. + + The representation of a SYSTEM or EVENT may also be a regular expression + as defined by 'regcomp(3)'. + + The EVENT_FIELD is the name of the field of an event that is being + filtered. If the event does not contain the EVENT_FIELD, that part of the + equation will be considered false. + +------------------------------------------ + -F 'sched : bogus == 1 || common_pid == 2' +------------------------------------------ + + The "bogus == 1" will always evaluate to FALSE because no event has a + field called "bogus", but the "common_pid == 2" will still be evaluated + since all events have the field "common_pid". Any "sched" event that was + traced by the process with the PID of 2 will be shown. + + Note, the EVENT_FIELD is the field name as shown by an events format + (as displayed with *--events*), and not what is found in the output. + If the output shows "ID:foo" but the field that "foo" belongs to was + called "name" in the event format, then "name" must be used in the filter. + The same is true about values. If the value that is displayed is converted + by to a string symbol, the filter checks the original value and not the + value displayed. For example, to filter on all tasks that were in the + running state at a context switch: + +------------------------------------------ + -F 'sched/sched_switch : prev_state==0' +------------------------------------------ + + Although the output displays 'R', having 'prev_stat=="R"' will not work. + + Note: You can also specify 'COMM' as an EVENT_FIELD. This will use the + task name (or comm) of the record to compare. For example, to filter out + all of the "trace-cmd" tasks: + +------------------------------------------ + -F '.*:COMM != "trace-cmd"' +------------------------------------------ + +*-I*:: + Do not print events where the HARDIRQ latency flag is set. + This will filter out most events that are from interrupt context. + Note, it may not filter out function traced functions that are + in interrupt context but were called before the kernel "in interrupt" + flag was set. + +*-S*:: + Do not print events where the SOFTIRQ latency flag is set. + This will filter out most events that are from soft interrupt context. + +*-v*:: + This causes the following filters of *-F* to filter out the matching + events. + +------------------------------------------ + -v -F 'sched/sched_switch : prev_state == 0' +------------------------------------------ + + Will not display any sched_switch events that have a prev_state of 0. + Removing the *-v* will only print out those events. + +*-T*:: + Test the filters of -F. After processing a filter string, the + resulting filter will be displayed for each event. This is useful + for using a filter for more than one event where a field may not + exist in all events. Also it can be used to make sure there are no + misspelled event field names, as they will simply be ignored. + *-T* is ignored if *-F* is not specified. + +*-V*:: + Show verbose messages (see *--verbose* but only for the numbers) + +*-L*:: + This will not load system wide plugins. It loads "local only". That is + what it finds in the ~/.trace-cmd/plugins directory. + +*-N*:: + This will not load any plugins. + +*-n* 'event-re':: + This will cause all events that match the option to ignore any registered + handler (by the plugins) to print the event. The normal event will be printed + instead. The 'event-re' is a regular expression as defined by 'regcomp(3)'. + +*--profile*:: + With the *--profile* option, "trace-cmd report" will process all the events + first, and then output a format showing where tasks have spent their time + in the kernel, as well as where they are blocked the most, and where wake up + latencies are. + + See trace-cmd-profile(1) for more details and examples. + +*-G*:: + Set interrupt (soft and hard) events as global (associated to CPU + instead of tasks). Only works for --profile. + +*-H* 'event-hooks':: + Add custom event matching to connect any two events together. + + See trace-cmd-profile(1) for format. + +*-R*:: + This will show the events in "raw" format. That is, it will ignore the event's + print formatting and just print the contents of each field. + +*-r* 'event-re':: + This will cause all events that match the option to print its raw fields. + The 'event-re' is a regular expression as defined by 'regcomp(3)'. + +*-l*:: + This adds a "latency output" format. Information about interrupts being + disabled, soft irq being disabled, the "need_resched" flag being set, + preempt count, and big kernel lock are all being recorded with every + event. But the default display does not show this information. This option + will set display this information with 6 characters. When one of the + fields is zero or N/A a \'.\' is shown. + +------------------------------------------ + <idle>-0 0d.h1. 106467.859747: function: ktime_get <-- tick_check_idle +------------------------------------------ + + The 0d.h1. denotes this information. The first character is never a '.' + and represents what CPU the trace was recorded on (CPU 0). The 'd' denotes + that interrupts were disabled. The 'h' means that this was called inside + an interrupt handler. The '1' is the preemption disabled (preempt_count) + was set to one. The two '.'s are "need_resched" flag and kernel lock + counter. If the "need_resched" flag is set, then that character would be a + 'N'. + +*-w*:: + If both the 'sched_switch' and 'sched_wakeup' events are enabled, then + this option will report the latency between the time the task was first + woken, and the time it was scheduled in. + +*-q*:: + Quiet non critical warnings. + +*-O*:: + Pass options to the trace-cmd plugins that are loaded. + + -O plugin:var=value + + The 'plugin:' and '=value' are optional. Value may be left off for options + that are boolean. If the 'plugin:' is left off, then any variable that matches + in all plugins will be set. + + Example: -O fgraph:tailprint + +*--cpu* <cpu list>:: + List of CPUs, separated by "," or ":", used for filtering the events. + A range of CPUs can be specified using "cpuX-cpuY" notation, where all CPUs + in the range between cpuX and cpuY will be included in the list. The order + of CPUs in the list must be from lower to greater. + + Example: "--cpu 0,3" - show events from CPUs 0 and 3 + "--cpu 2-4" - show events from CPUs 2, 3 and 4 + +*--cpus*:: + List the CPUs that have data in the trace file then exit. + +*--stat*:: + If the trace.dat file recorded the final stats (outputed at the end of record) + the *--stat* option can be used to retrieve them. + +*--uname*:: + If the trace.dat file recorded uname during the run, this will retrieve that + information. + +*--version*:: + If the trace.dat file recorded the version of the executable used to create + it, report that version. + +*--ts-offset* offset:: + Add (or subtract if negative) an offset for all timestamps of the previous + data file specified with *-i*. This is useful to merge sort multiple trace.dat + files where the difference in the timestamp is known. For example if a trace + is done on a virtual guest, and another trace is done on the host. If the + host timestamp is 1000 units ahead of the guest, the following can be done: + + trace-cmd report -i host.dat --ts-offset -1000 -i guest.dat + + This will subtract 1000 timestamp units from all the host events as it merges + with the guest.dat events. Note, the units is for the raw units recorded in + the trace. If the units are nanoseconds, the addition (or subtraction) from + the offset will be nanoseconds even if the displayed units are microseconds. + +*--ts2secs* HZ:: + Convert the current clock source into a second (nanosecond resolution) + output. When using clocks like x86-tsc, if the frequency is known, + by passing in the clock frequency, this will convert the time to seconds. + + This option affects any trace.dat file given with *-i* proceeding it. + If this option comes before any *-i* option, then that value becomes + the default conversion for all other trace.dat files. If another + --ts2secs option appears after a *-i* trace.dat file, than that option + will override the default value. + + Example: On a 3.4 GHz machine + + trace-cmd record -p function -C x86-tsc + + trace-cmd report --ts2ns 3400000000 + + The report will convert the cycles timestamps into a readable second + display. The default display resolution is microseconds, unless *-t* + is used. + + The value of --ts-offset must still be in the raw timestamp units, even + with this option. The offset will be converted as well. + +*--ts-diff*:: + Show the time differences between events. The difference will appear in + parenthesis just after the timestamp. + +*--ts-check*:: + Make sure no timestamp goes backwards, and if it does, print out a warning + message of the fact. + +*--nodate*:: + Ignore converting the timestamps to the date set by *trace-cmd record*(3) --date option. + +*--raw-ts*:: + Display raw timestamps, without any corrections. + +*--align-ts*:: + Display timestamps aligned to the first event. + +*--verbose*[='level']:: + Set the log level. Supported log levels are "none", "crit", "err", "warn", + "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log + level to specific value enables all logs from that and all previous levels. + The level will default to "info" if one is not specified. + + Example: enable all critical, error and warning logs + + trace-cmd report --verbose=warning + +EXAMPLES +-------- + +Using a trace.dat file that was created with: + +------------------------------------------ + # trace-cmd record -p function -e all sleep 5 + + +------------------------------------------ + +The default report shows: + +------------------------------------------ + # trace-cmd report + trace-cmd-16129 [002] 158126.498411: function: __mutex_unlock_slowpath <-- mutex_unlock + trace-cmd-16131 [000] 158126.498411: kmem_cache_alloc: call_site=811223c5 ptr=0xffff88003ecf2b40 bytes_req=272 bytes_alloc=320 gfp_flags=GFP_KERNEL|GFP_ZERO + trace-cmd-16130 [003] 158126.498411: function: do_splice_to <-- sys_splice + sleep-16133 [001] 158126.498412: function: inotify_inode_queue_event <-- vfs_write + trace-cmd-16129 [002] 158126.498420: lock_release: 0xffff88003f1fa4f8 &sb->s_type->i_mutex_key + trace-cmd-16131 [000] 158126.498421: function: security_file_alloc <-- get_empty_filp + sleep-16133 [001] 158126.498422: function: __fsnotify_parent <-- vfs_write + trace-cmd-16130 [003] 158126.498422: function: rw_verify_area <-- do_splice_to + trace-cmd-16131 [000] 158126.498424: function: cap_file_alloc_security <-- security_file_alloc + trace-cmd-16129 [002] 158126.498425: function: syscall_trace_leave <-- int_check_syscall_exit_work + sleep-16133 [001] 158126.498426: function: inotify_dentry_parent_queue_event <-- vfs_write + trace-cmd-16130 [003] 158126.498426: function: security_file_permission <-- rw_verify_area + trace-cmd-16129 [002] 158126.498428: function: audit_syscall_exit <-- syscall_trace_leave +[...] + + +------------------------------------------ + +To see everything but the function traces: + +------------------------------------------ + # trace-cmd report -v -F 'function' + trace-cmd-16131 [000] 158126.498411: kmem_cache_alloc: call_site=811223c5 ptr=0xffff88003ecf2b40 bytes_req=272 bytes_alloc=320 gfp_flags=GFP_KERNEL|GFP_ZERO + trace-cmd-16129 [002] 158126.498420: lock_release: 0xffff88003f1fa4f8 &sb->s_type->i_mutex_key + trace-cmd-16130 [003] 158126.498436: lock_acquire: 0xffffffff8166bf78 read all_cpu_access_lock + trace-cmd-16131 [000] 158126.498438: lock_acquire: 0xffff88003df5b520 read &fs->lock + trace-cmd-16129 [002] 158126.498446: kfree: call_site=810a7abb ptr=0x0 + trace-cmd-16130 [003] 158126.498448: lock_acquire: 0xffff880002250a80 &per_cpu(cpu_access_lock, cpu) + trace-cmd-16129 [002] 158126.498450: sys_exit_splice: 0xfffffff5 + trace-cmd-16131 [000] 158126.498454: lock_release: 0xffff88003df5b520 &fs->lock + sleep-16133 [001] 158126.498456: kfree: call_site=810a7abb ptr=0x0 + sleep-16133 [001] 158126.498460: sys_exit_write: 0x1 + trace-cmd-16130 [003] 158126.498462: kmalloc: call_site=810bf95b ptr=0xffff88003dedc040 bytes_req=24 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO + + +------------------------------------------ + +To see only the kmalloc calls that were greater than 1000 bytes: + +------------------------------------------ + #trace-cmd report -F 'kmalloc: bytes_req > 1000' + <idle>-0 [000] 158128.126641: kmalloc: call_site=81330635 ptr=0xffff88003c2fd000 bytes_req=2096 bytes_alloc=4096 gfp_flags=GFP_ATOMIC + + +------------------------------------------ + +To see wakeups and sched switches that left the previous task in the running +state: +------------------------------------------ + # trace-cmd report -F 'sched: prev_state == 0 || (success == 1)' + trace-cmd-16132 [002] 158126.499951: sched_wakeup: comm=trace-cmd pid=16129 prio=120 success=1 target_cpu=002 + trace-cmd-16132 [002] 158126.500401: sched_switch: prev_comm=trace-cmd prev_pid=16132 prev_prio=120 prev_state=R ==> next_comm=trace-cmd next_pid=16129 next_prio=120 + <idle>-0 [003] 158126.500585: sched_wakeup: comm=trace-cmd pid=16130 prio=120 success=1 target_cpu=003 + <idle>-0 [003] 158126.501241: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=trace-cmd next_pid=16130 next_prio=120 + trace-cmd-16132 [000] 158126.502475: sched_wakeup: comm=trace-cmd pid=16131 prio=120 success=1 target_cpu=000 + trace-cmd-16131 [002] 158126.506516: sched_wakeup: comm=trace-cmd pid=16129 prio=120 success=1 target_cpu=002 + <idle>-0 [003] 158126.550110: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=trace-cmd next_pid=16130 next_prio=120 + trace-cmd-16131 [003] 158126.570243: sched_wakeup: comm=trace-cmd pid=16129 prio=120 success=1 target_cpu=003 + trace-cmd-16130 [002] 158126.618202: sched_switch: prev_comm=trace-cmd prev_pid=16130 prev_prio=120 prev_state=R ==> next_comm=yum-updatesd next_pid=3088 next_prio=1 20 + trace-cmd-16129 [003] 158126.622379: sched_wakeup: comm=trace-cmd pid=16131 prio=120 success=1 target_cpu=003 + trace-cmd-16129 [000] 158126.649287: sched_wakeup: comm=trace-cmd pid=16131 prio=120 success=1 target_cpu=000 + + +------------------------------------------ + +The above needs a little explanation. The filter specifies the "sched" +subsystem, which includes both sched_switch and sched_wakeup events. Any event +that does not have the format field "prev_state" or "success", will evaluate +those expressions as FALSE, and will not produce a match. Using "||" will have +the "prev_state" test happen for the "sched_switch" event and the "success" +test happen for the "sched_wakeup" event. + + +------------------------------------------ + # trace-cmd report -w -F 'sched_switch, sched_wakeup.*' +[...] + trace-cmd-16130 [003] 158131.580616: sched_wakeup: comm=trace-cmd pid=16131 prio=120 success=1 target_cpu=003 + trace-cmd-16129 [000] 158131.581502: sched_switch: prev_comm=trace-cmd prev_pid=16129 prev_prio=120 prev_state=S ==> next_comm=trace-cmd next_pid=16131 next_prio=120 Latency: 885.901 usecs + trace-cmd-16131 [000] 158131.582414: sched_wakeup: comm=trace-cmd pid=16129 prio=120 success=1 target_cpu=000 + trace-cmd-16132 [001] 158131.583219: sched_switch: prev_comm=trace-cmd prev_pid=16132 prev_prio=120 prev_state=S ==> next_comm=trace-cmd next_pid=16129 next_prio=120 Latency: 804.809 usecs + sleep-16133 [002] 158131.584121: sched_wakeup: comm=trace-cmd pid=16120 prio=120 success=1 target_cpu=002 + trace-cmd-16129 [001] 158131.584128: sched_wakeup: comm=trace-cmd pid=16132 prio=120 success=1 target_cpu=001 + sleep-16133 [002] 158131.584275: sched_switch: prev_comm=sleep prev_pid=16133 prev_prio=120 prev_state=R ==> next_comm=trace-cmd next_pid=16120 next_prio=120 Latency: 153.915 usecs + trace-cmd-16130 [003] 158131.585284: sched_switch: prev_comm=trace-cmd prev_pid=16130 prev_prio=120 prev_state=S ==> next_comm=trace-cmd next_pid=16132 next_prio=120 Latency: 1155.677 usecs + +Average wakeup latency: 26626.656 usecs + + +------------------------------------------ + +The above trace produces the wakeup latencies of the tasks. The "sched_switch" +event reports each individual latency after writing the event information. +At the end of the report, the average wakeup latency is reported. + +------------------------------------------ + # trace-cmd report -w -F 'sched_switch, sched_wakeup.*: prio < 100 || next_prio < 100' + <idle>-0 [003] 158131.516753: sched_wakeup: comm=ksoftirqd/3 pid=13 prio=49 success=1 target_cpu=003 + <idle>-0 [003] 158131.516855: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/3 next_pid=13 next_prio=49 Latency: 101.244 usecs + <idle>-0 [003] 158131.533781: sched_wakeup: comm=ksoftirqd/3 pid=13 prio=49 success=1 target_cpu=003 + <idle>-0 [003] 158131.533897: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/3 next_pid=13 next_prio=49 Latency: 115.608 usecs + <idle>-0 [003] 158131.569730: sched_wakeup: comm=ksoftirqd/3 pid=13 prio=49 success=1 target_cpu=003 + <idle>-0 [003] 158131.569851: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/3 next_pid=13 next_prio=49 Latency: 121.024 usecs + +Average wakeup latency: 110.021 usecs + + +------------------------------------------ + +The above version will only show the wakeups and context switches of Real Time +tasks. The 'prio' used inside the kernel starts at 0 for highest priority. +That is 'prio' 0 is equivalent to user space real time priority 99, and +priority 98 is equivalent to user space real time priority 1. +Prios less than 100 represent Real Time tasks. + +An example of the profile: +------------------------------------------ + # trace-cmd record --profile sleep 1 + # trace-cmd report --profile --comm sleep +task: sleep-21611 + Event: sched_switch:R (1) Total: 99442 Avg: 99442 Max: 99442 Min:99442 + <stack> 1 total:99442 min:99442 max:99442 avg=99442 + => ftrace_raw_event_sched_switch (0xffffffff8105f812) + => __schedule (0xffffffff8150810a) + => preempt_schedule (0xffffffff8150842e) + => ___preempt_schedule (0xffffffff81273354) + => cpu_stop_queue_work (0xffffffff810b03c5) + => stop_one_cpu (0xffffffff810b063b) + => sched_exec (0xffffffff8106136d) + => do_execve_common.isra.27 (0xffffffff81148c89) + => do_execve (0xffffffff811490b0) + => SyS_execve (0xffffffff811492c4) + => return_to_handler (0xffffffff8150e3c8) + => stub_execve (0xffffffff8150c699) + Event: sched_switch:S (1) Total: 1000506680 Avg: 1000506680 Max: 1000506680 Min:1000506680 + <stack> 1 total:1000506680 min:1000506680 max:1000506680 avg=1000506680 + => ftrace_raw_event_sched_switch (0xffffffff8105f812) + => __schedule (0xffffffff8150810a) + => schedule (0xffffffff815084b8) + => do_nanosleep (0xffffffff8150b22c) + => hrtimer_nanosleep (0xffffffff8108d647) + => SyS_nanosleep (0xffffffff8108d72c) + => return_to_handler (0xffffffff8150e3c8) + => tracesys_phase2 (0xffffffff8150c304) + Event: sched_wakeup:21611 (1) Total: 30326 Avg: 30326 Max: 30326 Min:30326 + <stack> 1 total:30326 min:30326 max:30326 avg=30326 + => ftrace_raw_event_sched_wakeup_template (0xffffffff8105f653) + => ttwu_do_wakeup (0xffffffff810606eb) + => ttwu_do_activate.constprop.124 (0xffffffff810607c8) + => try_to_wake_up (0xffffffff8106340a) +------------------------------------------ + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-start(1), trace-cmd-stop(1), +trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-profile(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-reset.1.txt b/Documentation/trace-cmd/trace-cmd-reset.1.txt new file mode 100644 index 00000000..eee86751 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-reset.1.txt @@ -0,0 +1,116 @@ +TRACE-CMD-RESET(1) +================== + +NAME +---- +trace-cmd-reset - turn off all Ftrace tracing to bring back full performance + +SYNOPSIS +-------- +*trace-cmd reset* ['OPTIONS'] + +DESCRIPTION +----------- +The trace-cmd(1) reset command turns off all tracing of Ftrace. This will +bring back the performance of the system before tracing was enabled. This is +necessary since 'trace-cmd-record(1)', 'trace-cmd-stop(1)' and +'trace-cmd-extract(1)' do not disable the tracer, event after the data has +been pulled from the buffers. The rational is that the user may want to +manually enable the tracer with the Ftrace pseudo file system, or examine other +parts of Ftrace to see what trace-cmd did. After the reset command happens, +the data in the ring buffer, and the options that were used are all lost. + +OPTIONS +------- +Please note that the order that options are specified on the command line is +significant. See EXAMPLES. + +*-b* 'buffer_size':: + When the kernel boots, the Ftrace ring buffer is of a minimal size (3 + pages per CPU). The first time the tracer is used, the ring buffer size + expands to what it was set for (default 1.4 Megs per CPU). + + If no more tracing is to be done, this option allows you to shrink the + ring buffer down to free up available memory. + + trace-cmd reset -b 1 + + The buffer instance affected is the one (or ones) specified by the most + recently preceding *-B*, *-t*, or *-a* option: + + When used after *-B*, resizes the buffer instance that precedes it on + the command line. + + When used after *-a*, resizes all buffer instances except the top one. + + When used after *-t* or before any *-B* or *-a*, resizes the top + instance. + +*-B* 'buffer-name':: + If the kernel supports multiple buffers, this will reset the trace for + only the given buffer. It does not affect any other buffer. This may be + used multiple times to specify different buffers. The top level buffer + will not be reset if this option is given (unless the *-t* option is + also supplied). + +*-a*:: + Reset the trace for all existing buffer instances. When this option + is used, the top level instance will not be reset unless *-t* is given. + +*-d*:: + This option deletes the instance buffer(s) specified by the most recently + preceding *-B* or *-a* option. Because the top-level instance buffer + cannot be deleted, it is invalid to use this immediatly following *-t* or + prior to any *-B* or *-a* option on the command line. + +*-t*:: + Resets the top level instance buffer. Without the *-B* or *-a* option + this is the same as the default. But if *-B* or *-a* is used, this is + required if the top level instance buffer should also be reset. + +EXAMPLES +-------- + +Reset tracing for instance-one and set its per-cpu buffer size to 4096kb. +Also deletes instance-two. The top level instance and any other instances +remain unaffected: + + trace-cmd reset -B instance-one -b 4096 -B instance-two -d + +Delete all instance buffers. Top level instance remains unaffected: + + trace-cmd reset -a -d + +Delete all instance buffers and also reset the top instance: + + trace-cmd reset -t -a -d + +Invalid. This command implies an attempt to delete the top instance: + + trace-cmd reset -a -t -d + +Reset the top instance and set its per-cpu buffer size to 1024kb. If any +instance buffers exist, they will be unaffected: + + trace-cmd reset -b 1024 + + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-restore.1.txt b/Documentation/trace-cmd/trace-cmd-restore.1.txt new file mode 100644 index 00000000..ebcbb1b6 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-restore.1.txt @@ -0,0 +1,105 @@ +TRACE-CMD-RESTORE(1) +==================== + +NAME +---- +trace-cmd-restore - restore a failed trace record + +SYNOPSIS +-------- +*trace-cmd restore* ['OPTIONS'] ['command'] cpu-file [cpu-file ...] + +DESCRIPTION +----------- +The trace-cmd(1) restore command will restore a crashed trace-cmd-record(1) +file. If for some reason a trace-cmd record fails, it will leave a the +per-cpu data files and not create the final trace.dat file. The trace-cmd +restore will append the files to create a working trace.dat file that can +be read with trace-cmd-report(1). + +When trace-cmd record runs, it spawns off a process per CPU and writes +to a per cpu file usually called 'trace.dat.cpuX', where X represents the +CPU number that it is tracing. If the -o option was used in the trace-cmd +record, then the CPU data files will have that name instead of the +'trace.dat' name. If a unexpected crash occurs before the tracing +is finished, then the per CPU files will still exist but there will +not be any trace.dat file to read from. trace-cmd restore will allow you +to create a trace.dat file with the existing data files. + +OPTIONS +------- +*-c*:: + Create a partial trace.dat file from the machine, to be used with + a full trace-cmd restore at another time. This option is useful for + embedded devices. If a server contains the cpu files of a crashed + trace-cmd record (or trace-cmd listen), trace-cmd restore can be + executed on the embedded device with the -c option to get all the + stored information of that embedded device. Then the file created + could be copied to the server to run the trace-cmd restore there + with the cpu files. + + If *-o* is not specified, then the file created will be called + 'trace-partial.dat'. This is because the file is not a full version + of something that trace-cmd-report(1) could use. + +*-t* tracing_dir:: + Used with *-c*, it overrides the location to read the events from. + By default, tracing information is read from the debugfs/tracing + directory. *-t* will use that location instead. This can be useful + if the trace.dat file to create is from another machine. + Just tar -cvf events.tar debugfs/tracing and copy and untar that + file locally, and use that directory instead. + +*-k* kallsyms:: + Used with *-c*, it overrides where to read the kallsyms file from. + By default, /proc/kallsyms is used. *-k* will override the file to + read the kallsyms from. This can be useful if the trace.dat file + to create is from another machine. Just copy the /proc/kallsyms + file locally, and use *-k* to point to that file. + +*-o* output':: + By default, trace-cmd restore will create a 'trace.dat' file + (or 'trace-partial.dat' if *-c* is specified). You can + specify a different file to write to with the *-o* option. + +*-i* input:: + By default, trace-cmd restore will read the information of the + current system to create the initial data stored in the 'trace.dat' + file. If the crash was on another machine, then that machine should + have the trace-cmd restore run with the *-c* option to create the + trace.dat partial file. Then that file can be copied to the current + machine where trace-cmd restore will use *-i* to load that file + instead of reading from the current system. + +EXAMPLES +-------- + +If a crash happened on another box, you could run: + + $ trace-cmd restore -c -o box-partial.dat + +Then on the server that has the cpu files: + + $ trace-cmd restore -i box-partial.dat trace.dat.cpu0 trace.dat.cpu1 + +This would create a trace.dat file for the embedded box. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-set.1.txt b/Documentation/trace-cmd/trace-cmd-set.1.txt new file mode 100644 index 00000000..a182d191 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-set.1.txt @@ -0,0 +1,273 @@ +TRACE-CMD-SET(1) +================ + +NAME +---- +trace-cmd-set - set a configuration parameter of the Ftrace Linux internal tracer + +SYNOPSIS +-------- +*trace-cmd set* ['OPTIONS'] ['command'] + +DESCRIPTION +----------- +The trace-cmd(1) set command will set a configuration parameter of the Ftrace +Linux kernel tracer. The specified *command* will be run after the ftrace state +is set. The configured ftrace state can be restored to default +using the trace-cmd-reset(1) command. + +OPTIONS +------- +*-p* 'tracer':: + Specify a tracer. Tracers usually do more than just trace an event. + Common tracers are: *function*, *function_graph*, *preemptirqsoff*, + *irqsoff*, *preemptoff* and *wakeup*. A tracer must be supported by the + running kernel. To see a list of available tracers, see trace-cmd-list(1). + +*-e* 'event':: + Specify an event to trace. Various static trace points have been added to + the Linux kernel. They are grouped by subsystem where you can enable all + events of a given subsystem or specify specific events to be enabled. The + 'event' is of the format "subsystem:event-name". You can also just specify + the subsystem without the ':event-name' or the event-name without the + "subsystem:". Using "-e sched_switch" will enable the "sched_switch" event + where as, "-e sched" will enable all events under the "sched" subsystem. + + The 'event' can also contain glob expressions. That is, "*stat*" will + select all events (or subsystems) that have the characters "stat" in their + names. + + The keyword 'all' can be used to enable all events. + +*-T*:: + Enable a stacktrace on each event. For example: + + <idle>-0 [003] 58549.289091: sched_switch: kworker/0:1:0 [120] R ==> trace-cmd:2603 [120] + <idle>-0 [003] 58549.289092: kernel_stack: <stack trace> +=> schedule (ffffffff814b260e) +=> cpu_idle (ffffffff8100a38c) +=> start_secondary (ffffffff814ab828) + +*--func-stack*:: + Enable a stack trace on all functions. Note this is only applicable + for the "function" plugin tracer, and will only take effect if the + -l option is used and succeeds in limiting functions. If the function + tracer is not filtered, and the stack trace is enabled, you can live + lock the machine. + +*-f* 'filter':: + Specify a filter for the previous event. This must come after a *-e*. This + will filter what events get recorded based on the content of the event. + Filtering is passed to the kernel directly so what filtering is allowed + may depend on what version of the kernel you have. Basically, it will + let you use C notation to check if an event should be processed or not. + +---------------------------------------- + ==, >=, <=, >, <, &, |, && and || +---------------------------------------- + + The above are usually safe to use to compare fields. + +*-R* 'trigger':: + Specify a trigger for the previous event. This must come after a *-e*. + This will add a given trigger to the given event. To only enable the trigger + and not the event itself, then place the event after the *-v* option. + + See Documentation/trace/events.txt in the Linux kernel source for more + information on triggers. + +*-v*:: + This will negate options specified after it on the command line. It affects: +[verse] +-- + *-e*: Causes all specified events to not be traced. This is useful for + selecting a subsystem to be traced but to leave out various events. + For example: "-e sched -v -e "\*stat\*"" will enable all events in + the sched subsystem except those that have "stat" in their names. + *-B*: Deletes the specified ftrace instance. There must be no + configuration options related to this instance in the command line. + For example: "-v -B bar -B foo" will delete instance bar and create + a new instance foo. + Note: the *-v* option was taken from the way grep(1) inverts the following + matches. +-- +*-P* 'pid':: + This will filter only the specified process IDs. Using *-P* will let you + trace only events that are caused by the process. + +*-c*:: + Used *-P* to trace the process' children too (if kernel supports it). + +*--user*:: + Execute the specified *command* as given user. + +*-C* 'clock':: + Set the trace clock to "clock". + + Use trace-cmd(1) list -C to see what clocks are available. + +*-l* 'function-name':: + This will limit the 'function' and 'function_graph' tracers to only trace + the given function name. More than one *-l* may be specified on the + command line to trace more than one function. The limited use of glob + expressions are also allowed. These are 'match\*' to only filter functions + that start with 'match'. '\*match' to only filter functions that end with + 'match'. '\*match\*' to only filter on functions that contain 'match'. + +*-g* 'function-name':: + This option is for the function_graph plugin. It will graph the given + function. That is, it will only trace the function and all functions that + it calls. You can have more than one *-g* on the command line. + +*-n* 'function-name':: + This has the opposite effect of *-l*. The function given with the *-n* + option will not be traced. This takes precedence, that is, if you include + the same function for both *-n* and *-l*, it will not be traced. + +*-d*:: + Some tracer plugins enable the function tracer by default. Like the + latency tracers. This option prevents the function tracer from being + enabled at start up. + +*-D*:: + The option *-d* will try to use the function-trace option to disable the + function tracer (if available), otherwise it defaults to the proc file: + /proc/sys/kernel/ftrace_enabled, but will not touch it if the function-trace + option is available. The *-D* option will disable both the ftrace_enabled + proc file as well as the function-trace option if it exists. + + Note, this disable function tracing for all users, which includes users + outside of ftrace tracers (stack_tracer, perf, etc). + +*-O* 'option':: + Ftrace has various options that can be enabled or disabled. This allows + you to set them. Appending the text 'no' to an option disables it. + For example: "-O nograph-time" will disable the "graph-time" Ftrace + option. + +*-b* 'size':: + This sets the ring buffer size to 'size' kilobytes. Because the Ftrace + ring buffer is per CPU, this size is the size of each per CPU ring buffer + inside the kernel. Using "-b 10000" on a machine with 4 CPUs will make + Ftrace have a total buffer size of 40 Megs. + +*-B* 'buffer-name':: + If the kernel supports multiple buffers, this will add a buffer with + the given name. If the buffer name already exists, that buffer is just + reset. + + After a buffer name is stated, all events added after that will be + associated with that buffer. If no buffer is specified, or an event + is specified before a buffer name, it will be associated with the + main (toplevel) buffer. + + trace-cmd set -e sched -B block -e block -B time -e timer sleep 1 + + The above is will enable all sched events in the main buffer. It will + then create a 'block' buffer instance and enable all block events within + that buffer. A 'time' buffer instance is created and all timer events + will be enabled for that event. + +*-m* 'size':: + The max size in kilobytes that a per cpu buffer should be. Note, due + to rounding to page size, the number may not be totally correct. + Also, this is performed by switching between two buffers that are half + the given size thus the output may not be of the given size even if + much more was written. + + Use this to prevent running out of diskspace for long runs. + +*-M* 'cpumask':: + Set the cpumask for to trace. It only affects the last buffer instance + given. If supplied before any buffer instance, then it affects the + main buffer. The value supplied must be a hex number. + + trace-cmd set -p function -M c -B events13 -e all -M 5 + + If the -M is left out, then the mask stays the same. To enable all + CPUs, pass in a value of '-1'. + +*-i*:: + By default, if an event is listed that trace-cmd does not find, it + will exit with an error. This option will just ignore events that are + listed on the command line but are not found on the system. + +*-q* | *--quiet*:: + Suppresses normal output, except for errors. + +*--max-graph-depth* 'depth':: + Set the maximum depth the function_graph tracer will trace into a function. + A value of one will only show where userspace enters the kernel but not any + functions called in the kernel. The default is zero, which means no limit. + +*--cmdlines-size* 'size':: + Set the number of entries the kernel tracing file "saved_cmdlines" can + contain. This file is a circular buffer which stores the mapping between + cmdlines and PIDs. If full, it leads to unresolved cmdlines ("<...>") within + the trace. The kernel default value is 128. + +*--module* 'module':: + Filter a module's name in function tracing. It is equivalent to adding + ':mod:module' after all other functions being filtered. If no other function + filter is listed, then all modules functions will be filtered in the filter. + + '--module snd' is equivalent to '-l :mod:snd' + + '--module snd -l "*jack*"' is equivalent to '-l "*jack*:mod:snd"' + + '--module snd -n "*"' is equivalent to '-n :mod:snd' + +*--stderr*:: + Have output go to stderr instead of stdout, but the output of the command + executed will not be changed. This is useful if you want to monitor the + output of the command being executed, but not see the output from trace-cmd. + +*--fork*:: + If a command is listed, then trace-cmd will wait for that command to finish, + unless the *--fork* option is specified. Then it will fork the command and + return immediately. + +*--verbose*[='level']:: + Set the log level. Supported log levels are "none", "critical", "error", "warning", + "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log + level to specific value enables all logs from that and all previous levels. + The level will default to "info" if one is not specified. + + Example: enable all critical, error and warning logs + + trace-cmd set --verbose=warning + +EXAMPLES +-------- + +Enable all events for tracing: + +------------------------------ + # trace-cmd set -e all +------------------------------ + +Set the function tracer: + +------------------------------ + # trace-cmd set -p function +------------------------------ + + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), +trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-profile(1) + +AUTHOR +------ +Written by Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/trace-cmd/trace-cmd-show.1.txt b/Documentation/trace-cmd/trace-cmd-show.1.txt new file mode 100644 index 00000000..ea2fda28 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-show.1.txt @@ -0,0 +1,100 @@ +TRACE-CMD-SHOW(1) +================= + +NAME +---- +trace-cmd-show - show the contents of the Ftrace Linux kernel tracing buffer. + +SYNOPSIS +-------- +*trace-cmd show* ['OPTIONS'] + +DESCRIPTION +----------- +The trace-cmd(1) show displays the contents of one of the Ftrace Linux +kernel tracing files: trace, snapshot, or trace_pipe. It is basically +the equivalent of doing: + + cat /sys/kernel/debug/tracing/trace + +OPTIONS +------- +*-p*:: + Instead of displaying the contents of the "trace" file, use the + "trace_pipe" file. The difference between the two is that the "trace" + file is static. That is, if tracing is stopped, the "trace" file + will show the same contents each time. + + The "trace_pipe" file is a consuming read, where a read of the file + will consume the output of what was read and it will not read the + same thing a second time even if tracing is stopped. This file + als will block. If no data is available, trace-cmd show will stop + and wait for data to appear. + +*-s*:: + Instead of reading the "trace" file, read the snapshot file. The snapshot + is made by an application writing into it and the kernel will perform + as swap between the currently active buffer and the current snapshot + buffer. If no more swaps are made, the snapshot will remain static. + This is not a consuming read. + +*-c* 'cpu':: + Read only the trace file for a specified CPU. + +*-f*:: + Display the full path name of the file that is being displayed. + +*-B* 'buf':: + If a buffer instance was created, then the *-B* option will access the + files associated with the given buffer. + +*--tracing_on*:: + Show if tracing is on for the given instance. + +*--current_tracer*:: + Show what the current tracer is. + +*--buffer_size*:: + Show the current buffer size (per-cpu) + +*--buffer_total_size*:: + Show the total size of all buffers. + +*--ftrace_filter*:: + Show what function filters are set. + +*--ftrace_notrace*:: + Show what function disabled filters are set. + +*--ftrace_pid*:: + Show the PIDs the function tracer is limited to (if any). + +*--graph_function*:: + Show the functions that will be graphed. + +*--graph_notrace*:: + Show the functions that will not be graphed. + +*--cpumask*:: + Show the mask of CPUs that tracing will trace. + + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-snapshot.1.txt b/Documentation/trace-cmd/trace-cmd-snapshot.1.txt new file mode 100644 index 00000000..0a34bcd9 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-snapshot.1.txt @@ -0,0 +1,65 @@ +TRACE-CMD-SNAPSHOT(1) +===================== + +NAME +---- +trace-cmd-snapshot - take, reset, free, or show a Ftrace kernel snapshot + +SYNOPSIS +-------- +*trace-cmd snapshot* ['OPTIONS'] + +DESCRIPTION +----------- +The trace-cmd(1) snapshot controls or displays the Ftrace Linux kernel +snapshot feature (if the kernel supports it). This is useful to "freeze" +an instance of a live trace but without stopping the trace. + + trace-cmd start -p function + trace-cmd snapshot -s + trace-cmd snapshot + [ dumps the content of buffer at 'trace-cmd snapshot -s' ] + trace-cmd snapshot -s + trace-cmd snapshot + [ dumps the new content of the buffer at the last -s operation ] + +OPTIONS +------- +*-s*:: + Take a snapshot of the currently running buffer. + +*-r*:: + Clear out the buffer. + +*-f*:: + Free the snapshot buffer. The buffer takes up memory inside the + kernel. It is best to free it when not in use. The first -s + operation will allocate it if it is not already allocated. + +*-c* 'cpu':: + Operate on a per cpu snapshot (may not be fully supported by all kernels) + +*-B* 'buf':: + If a buffer instance was created, then the *-B* option will operate on + the snapshot within the buffer. + + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-split.1.txt b/Documentation/trace-cmd/trace-cmd-split.1.txt new file mode 100644 index 00000000..25385796 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-split.1.txt @@ -0,0 +1,107 @@ +TRACE-CMD-SPLIT(1) +================== + +NAME +---- +trace-cmd-split - split a trace.dat file into smaller files + +SYNOPSIS +-------- +*trace-cmd split* ['OPTIONS'] ['start-time' ['end-time']] + +DESCRIPTION +----------- +The trace-cmd(1) split is used to break up a trace.dat into small files. +The 'start-time' specifies where the new file will start at. Using +'trace-cmd-report(1)' and copying the time stamp given at a particular event, +can be used as input for either 'start-time' or 'end-time'. The split will +stop creating files when it reaches an event after 'end-time'. If only the +end-time is needed, use 0.0 as the start-time. + +If start-time is left out, then the split will start at the beginning of the +file. If end-time is left out, then split will continue to the end unless it +meets one of the requirements specified by the options. + +OPTIONS +------- +*-i* 'file':: + If this option is not specified, then the split command will look for the + file named 'trace.dat'. This options will allow the reading of another + file other than 'trace.dat'. + +*-o* 'file':: + By default, the split command will use the input file name as a basis of + where to write the split files. The output file will be the input file + with an attached \'.#\' to the end: trace.dat.1, trace.dat.2, etc. + + This option will change the name of the base file used. + + -o file will create file.1, file.2, etc. + +*-s* 'seconds':: + This specifies how many seconds should be recorded before the new file + should stop. + +*-m* 'milliseconds':: + This specifies how many milliseconds should be recorded before the new + file should stop. + +*-u* 'microseconds':: + This specifies how many microseconds should be recorded before the new + file should stop. + +*-e* 'events':: + This specifies how many events should be recorded before the new file + should stop. + +*-p* 'pages':: + This specifies the number of pages that should be recorded before the new + file should stop. + + Note: only one of *-p*, *-e*, *-u*, *-m*, *-s* may be specified at a time. + + If *-p* is specified, then *-c* is automatically set. + +*-r*:: + This option causes the break up to repeat until end-time is reached (or + end of the input if end-time is not specified). + + trace-cmd split -r -e 10000 + + This will break up trace.dat into several smaller files, each with at most + 10,000 events in it. + +*-c*:: + This option causes the above break up to be per CPU. + + trace-cmd split -c -p 10 + + This will create a file that has 10 pages per each CPU from the input. + +*-C* 'cpu':: + This option will split for a single CPU. Only the cpu named will be extracted + from the file. + + trace-cmd split -C 1 + + This will split out all the events for cpu 1 in the file. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-stack.1.txt b/Documentation/trace-cmd/trace-cmd-stack.1.txt new file mode 100644 index 00000000..20752407 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-stack.1.txt @@ -0,0 +1,50 @@ +TRACE-CMD-STACK(1) +================== + +NAME +---- +trace-cmd-stack - read, enable or disable Ftrace Linux kernel stack tracing. + +SYNOPSIS +-------- +*trace-cmd stack* + +DESCRIPTION +----------- +The trace-cmd(1) stack enables the Ftrace stack tracer within the kernel. +The stack tracer enables the function tracer and at each function call +within the kernel, the stack is checked. When a new maximum usage stack +is discovered, it is recorded. + +When no option is used, the current stack is displayed. + +To enable the stack tracer, use the option *--start*, and to disable +the stack tracer, use the option *--stop*. The output will be the maximum +stack found since the start was enabled. + +Use *--reset* to reset the stack counter to zero. + +User *--verbose*[='level'] to set the log level. Supported log levels are "none", "critical", "error", +"warning", "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting +the log level to specific value enables all logs from that and all previous levels. The level will +default to "info" if one is not specified. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-start.1.txt b/Documentation/trace-cmd/trace-cmd-start.1.txt new file mode 100644 index 00000000..03c5d127 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-start.1.txt @@ -0,0 +1,51 @@ +TRACE-CMD-START(1) +================== + +NAME +---- +trace-cmd-start - start the Ftrace Linux kernel tracer without recording + +SYNOPSIS +-------- +*trace-cmd start* ['OPTIONS'] + +DESCRIPTION +----------- +The trace-cmd(1) start enables all the Ftrace tracing the same way +trace-cmd-record(1) does. The difference is that it does not run threads to +create a trace.dat file. This is useful just to enable Ftrace and you are only +interested in the trace after some event has occurred and the trace is +stopped. Then the trace can be read straight from the Ftrace pseudo file +system or can be extracted with trace-cmd-extract(1). + +OPTIONS +------- +The options are the same as 'trace-cmd-record(1)', except that it does not +take options specific to recording (*-s*, *-o*, *-N*, and *-t*). + +*--fork* :: + This option is only available for trace-cmd start. It tells trace-cmd + to not wait for the process to finish before returning. + With this option, trace-cmd start will return right after it forks + the process on the command line. This option only has an effect if + trace-cmd start also executes a command. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-stop(1), +trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-stat.1.txt b/Documentation/trace-cmd/trace-cmd-stat.1.txt new file mode 100644 index 00000000..fb800f91 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-stat.1.txt @@ -0,0 +1,80 @@ +TRACE-CMD-STAT(1) +================= + +NAME +---- +trace-cmd-stat - show the status of the tracing (ftrace) system + +SYNOPSIS +-------- +*trace-cmd stat* ['OPTIONS'] + +DESCRIPTION +----------- +The trace-cmd(1) stat displays the various status of the tracing (ftrace) +system. The status that it shows is: + +*Instances:* List all configured ftrace instances. + +*Tracer:* if one of the tracers (like function_graph) is active. Otherwise + nothing is displayed. + +*Events:* Lists the events that are enable. + +*Event filters:* Shows any filters that are set for any events + +*Function filters:* Shows any filters for the function tracers + +*Graph functions:* Shows any functions that the function graph tracer should graph + +*Buffers:* Shows the trace buffer size if they have been expanded. + By default, tracing buffers are in a compressed format until they are used. + If they are compressed, the buffer display will not be shown. + +*Trace clock:* If the tracing clock is anything other than the default "local" + it will be displayed. + +*Trace CPU mask:* If not all available CPUs are in the tracing CPU mask, then + the tracing CPU mask will be displayed. + +*Trace max latency:* Shows the value of the trace max latency if it is other than zero. + +*Kprobes:* Shows any kprobes that are defined for tracing. + +*Uprobes:* Shows any uprobes that are defined for tracing. + +*Error log:* Dump the content of ftrace error_log file. + +OPTIONS +------- +*-B* 'buffer-name':: + Display the status of a given buffer instance. May be specified more than once + to display the status of multiple instances. + +*-t*:: + If *-B* is also specified, show the status of the top level tracing directory + as well as the instance(s). + +*-o*:: + Display the all the options along with their values. If they start with "no", then + the option is disabled. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), +trace-cmd-split(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2014 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-stop.1.txt b/Documentation/trace-cmd/trace-cmd-stop.1.txt new file mode 100644 index 00000000..313192c3 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-stop.1.txt @@ -0,0 +1,63 @@ +TRACE-CMD-STOP(1) +================= + +NAME +---- +trace-cmd-stop - stop the Ftrace Linux kernel tracer from writing to the ring +buffer. + +SYNOPSIS +-------- +*trace-cmd stop* ['OPTIONS'] + +DESCRIPTION +----------- +The trace-cmd(1) stop is a complement to 'trace-cmd-start(1)'. This will +disable Ftrace from writing to the ring buffer. This does not stop the +overhead that the tracing may incur. Only the updating of the ring buffer is +disabled, the Ftrace tracing may still be inducing overhead. + +After stopping the trace, the 'trace-cmd-extract(1)' may strip out the data +from the ring buffer and create a trace.dat file. The Ftrace pseudo file +system may also be examined. + +To disable the tracing completely to remove the overhead it causes, use +'trace-cmd-reset(1)'. But after a reset is performed, the data that has been +recorded is lost. + +OPTIONS +------- +*-B* 'buffer-name':: + If the kernel supports multiple buffers, this will stop the trace for + only the given buffer. It does not affect any other buffer. This may be + used multiple times to specify different buffers. When this option is + used, the top level instance will not be stopped unless *-t* is given. + +*-a*:: + Stop the trace for all existing buffer instances. When this option + is used, the top level instance will not be stopped unless *-t* is given. + +*-t*:: + Stops the top level instance buffer. Without the *-B* or *-a* option this + is the same as the default. But if *-B* or *-a* is used, this is + required if the top level instance buffer should also be stopped. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd-stream.1.txt b/Documentation/trace-cmd/trace-cmd-stream.1.txt new file mode 100644 index 00000000..f83652b8 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd-stream.1.txt @@ -0,0 +1,50 @@ +TRACE-CMD-STREAM(1) +=================== + +NAME +---- +trace-cmd-stream - stream a trace to stdout as it is happening + +SYNOPSIS +-------- +*trace-cmd stream ['OPTIONS']* ['command'] + +DESCRIPTION +----------- +The trace-cmd(1) stream will start tracing just like trace-cmd-record(1), except +it will not record to a file and instead it will read the binary buffer +as it is happening, convert it to a human readable format and write it to +stdout. + +This is basically the same as trace-cmd-start(1) and then doing a trace-cmd-show(1) +with the *-p* option. trace-cmd-stream is not as efficient as reading from the +pipe file as most of the stream work is done in userspace. This is useful if +it is needed to do the work mostly in userspace instead of the kernel, and stream +also helps to debug trace-cmd-profile(1) which uses the stream code to perform +the live data analysis for the profile. + + +OPTIONS +------- + These are the same as trace-cmd-record(1), except that it does not take + the *-o* option. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-reset(1), trace-cmd-split(1), +trace-cmd-list(1), trace-cmd-listen(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2014 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd.1.txt b/Documentation/trace-cmd/trace-cmd.1.txt new file mode 100644 index 00000000..7e161273 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd.1.txt @@ -0,0 +1,109 @@ +TRACE-CMD(1) +============ + +NAME +---- +trace-cmd - interacts with Ftrace Linux kernel internal tracer + +SYNOPSIS +-------- +*trace-cmd* 'COMMAND' ['OPTIONS'] + +DESCRIPTION +----------- +The trace-cmd(1) command interacts with the Ftrace tracer that is built inside +the Linux kernel. It interfaces with the Ftrace specific files found in the +debugfs file system under the tracing directory. A 'COMMAND' must be +specified to tell trace-cmd what to do. + + +COMMANDS +-------- + + record - record a live trace and write a trace.dat file to the + local disk or to the network. + + set - set a ftrace configuration parameter. + + report - reads a trace.dat file and converts the binary data to a + ASCII text readable format. + + stream - Start tracing and read the output directly + + profile - Start profiling and read the output directly + + hist - show a histogram of the events. + + stat - show tracing (ftrace) status of the running system + + options - list the plugin options that are available to *report* + + start - start the tracing without recording to a trace.dat file. + + stop - stop tracing (only disables recording, overhead of tracer + is still in effect) + + restart - restart tracing from a previous stop (only effects recording) + + extract - extract the data from the kernel buffer and create a trace.dat + file. + + show - display the contents of one of the Ftrace Linux kernel tracing files + + reset - disables all tracing and gives back the system performance. + (clears all data from the kernel buffers) + + clear - clear the content of the Ftrace ring buffers. + + split - splits a trace.dat file into smaller files. + + list - list the available plugins or events that can be recorded. + + listen - open up a port to listen for remote tracing connections. + + agent - listen on a vsocket for trace clients + + setup-guest - create FIFOs for tracing guest VMs + + restore - restore the data files of a crashed run of trace-cmd record + + snapshot- take snapshot of running trace + + stack - run and display the stack tracer + + check-events - parse format strings for all trace events and return + whether all formats are parseable + + convert - convert trace files + + dump - read out the meta data from a trace file + +OPTIONS +------- + +*-h*, --help:: + Display the help text. + +Other options see the man page for the corresponding command. + +SEE ALSO +-------- +trace-cmd-record(1), trace-cmd-report(1), trace-cmd-hist(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), +trace-cmd-restore(1), trace-cmd-stack(1), trace-cmd-convert(1), +trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1), +trace-cmd.dat(5), trace-cmd-check-events(1) trace-cmd-stat(1) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). + diff --git a/Documentation/trace-cmd/trace-cmd.dat.v6.5.txt b/Documentation/trace-cmd/trace-cmd.dat.v6.5.txt new file mode 100644 index 00000000..8437b363 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd.dat.v6.5.txt @@ -0,0 +1,266 @@ +TRACE-CMD.DAT.v6(5) +=================== + +NAME +---- +trace-cmd.dat.v6 - trace-cmd version 6 file format + +SYNOPSIS +-------- +*trace-cmd.dat* ignore + +DESCRIPTION +----------- +The trace-cmd(1) utility produces a "trace.dat" file. The file may also +be named anything depending if the user specifies a different output name, +but it must have a certain binary format. The file is used +by trace-cmd to save kernel traces into it and be able to extract +the trace from it at a later point (see *trace-cmd-report(1)*). + + +INITIAL FORMAT +-------------- + + The first three bytes contain the magic value: + + 0x17 0x08 0x44 + + The next 7 bytes contain the characters: + + "tracing" + + The next set of characters contain a null '\0' terminated string + that contains the version of the file: + + "6\0" + + The next 1 byte contains the flags for the file endianess: + + 0 = little endian + 1 = big endian + + The next byte contains the number of bytes per "long" value: + + 4 - 32-bit long values + 8 - 64-bit long values + + Note: This is the long size of the target's userspace. Not the + kernel space size. + + [ Now all numbers are written in file defined endianess. ] + + The next 4 bytes are a 32-bit word that defines what the traced + host machine page size was. + +HEADER INFO FORMAT +------------------ + + Directly after the initial format comes information about the + trace headers recorded from the target box. + + The next 12 bytes contain the string: + + "header_page\0" + + The next 8 bytes are a 64-bit word containing the size of the + page header information stored next. + + The next set of data is of the size read from the previous 8 bytes, + and contains the data retrieved from debugfs/tracing/events/header_page. + + Note: The size of the second field \fBcommit\fR contains the target + kernel long size. For example: + + field: local_t commit; offset:8; \fBsize:8;\fR signed:1; + + shows the kernel has a 64-bit long. + + The next 13 bytes contain the string: + + "header_event\0" + + The next 8 bytes are a 64-bit word containing the size of the + event header information stored next. + + The next set of data is of the size read from the previous 8 bytes + and contains the data retrieved from debugfs/tracing/events/header_event. + + This data allows the trace-cmd tool to know if the ring buffer format + of the kernel made any changes. + +FTRACE EVENT FORMATS +-------------------- + + Directly after the header information comes the information about + the Ftrace specific events. These are the events used by the Ftrace plugins + and are not enabled by the event tracing. + + The next 4 bytes contain a 32-bit word of the number of Ftrace event + format files that are stored in the file. + + For the number of times defined by the previous 4 bytes is the + following: + + 8 bytes for the size of the Ftrace event format file. + + The Ftrace event format file copied from the target machine: + debugfs/tracing/events/ftrace/<event>/format + +EVENT FORMATS +------------- + + Directly after the Ftrace formats comes the information about + the event layout. + + The next 4 bytes are a 32-bit word containing the number of + event systems that are stored in the file. These are the + directories in debugfs/tracing/events excluding the \fBftrace\fR + directory. + + For the number of times defined by the previous 4 bytes is the + following: + + A null-terminated string containing the system name. + + 4 bytes containing a 32-bit word containing the number + of events within the system. + + For the number of times defined in the previous 4 bytes is the + following: + + 8 bytes for the size of the event format file. + + The event format file copied from the target machine: + debugfs/tracing/events/<system>/<event>/format + +KALLSYMS INFORMATION +-------------------- + + Directly after the event formats comes the information of the mapping + of function addresses to the function names. + + The next 4 bytes are a 32-bit word containing the size of the + data holding the function mappings. + + The next set of data is of the size defined by the previous 4 bytes + and contains the information from the target machine's file: + /proc/kallsyms + + +TRACE_PRINTK INFORMATION +------------------------ + + If a developer used trace_printk() within the kernel, it may + store the format string outside the ring buffer. + This information can be found in: + debugfs/tracing/printk_formats + + The next 4 bytes are a 32-bit word containing the size of the + data holding the printk formats. + + The next set of data is of the size defined by the previous 4 bytes + and contains the information from debugfs/tracing/printk_formats. + + +PROCESS INFORMATION +------------------- + + Directly after the trace_printk formats comes the information mapping + a PID to a process name. + + The next 8 bytes contain a 64-bit word that holds the size of the + data mapping the PID to a process name. + + The next set of data is of the size defined by the previous 8 bytes + and contains the information from debugfs/tracing/saved_cmdlines. + + +REST OF TRACE-CMD HEADER +------------------------ + + Directly after the process information comes the last bit of the + trace.dat file header. + + The next 4 bytes are a 32-bit word defining the number of CPUs that + were discovered on the target machine (and has matching trace data + for it). + + The next 10 bytes are one of the following: + + "options \0" + + "latency \0" + + "flyrecord\0" + + If it is "options \0" then: + + The next 2 bytes are a 16-bit word defining the current option. + If the the value is zero then there are no more options. + + Otherwise, the next 4 bytes contain a 32-bit word containing the + option size. If the reader does not know how to handle the option + it can simply skip it. Currently there are no options defined, + but this is here to extend the data. + + The next option will be directly after the previous option, and + the options ends with a zero in the option type field. + + The next 10 bytes after the options are one of the following: + + "latency \0" + + "flyrecord\0" + + which would follow the same as if options were not present. + + If the value is "latency \0", then the rest of the file is + simply ASCII text that was taken from the target's: + debugfs/tracing/trace + + If the value is "flyrecord\0", the following is present: + + For the number of CPUs that were read earlier, the + following is present: + + 8 bytes that are a 64-bit word containing the offset into the file + that holds the data for the CPU. + + 8 bytes that are a 64-bit word containing the size of the CPU + data at that offset. + +CPU DATA +-------- + + The CPU data is located in the part of the file that is specified + in the end of the header. Padding is placed between the header and + the CPU data, placing the CPU data at a page aligned (target page) position + in the file. + + This data is copied directly from the Ftrace ring buffer and is of the + same format as the ring buffer specified by the event header files + loaded in the header format file. + + The trace-cmd tool will try to \fBmmap(2)\fR the data page by page with the + target's page size if possible. If it fails to mmap, it will just read the + data instead. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), +trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1), +trace-cmd.dat(5) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/trace-cmd/trace-cmd.dat.v7.5.txt b/Documentation/trace-cmd/trace-cmd.dat.v7.5.txt new file mode 100644 index 00000000..e5bcac76 --- /dev/null +++ b/Documentation/trace-cmd/trace-cmd.dat.v7.5.txt @@ -0,0 +1,451 @@ +TRACE-CMD.DAT.v7(5) +=================== + +NAME +---- +trace-cmd.dat.v7 - trace-cmd version 7 file format + +SYNOPSIS +-------- +*trace-cmd.dat* ignore + +DESCRIPTION +----------- +The trace-cmd(1) utility produces a "trace.dat" file. The file may also +be named anything depending if the user specifies a different output name, +but it must have a certain binary format. The file is used +by trace-cmd to save kernel traces into it and be able to extract +the trace from it at a later point (see *trace-cmd-report(1)*). + + +INITIAL FORMAT +-------------- + + The first three bytes contain the magic value: + + 0x17 0x08 0x44 + + The next 7 bytes contain the characters: + + "tracing" + + The next set of characters contain a null '\0' terminated string + that contains the version of the file: + + "7\0" + + The next 1 byte contains the flags for the file endianess: + + 0 = little endian + 1 = big endian + + The next byte contains the number of bytes per "long" value: + + 4 - 32-bit long values + 8 - 64-bit long values + + Note: This is the long size of the target's user space. Not the + kernel space size. + + [ Now all numbers are written in file defined endianess. ] + + The next 4 bytes are a 32-bit word that defines what the traced + host machine page size was. + + The compression algorithm header is written next: + "name\0version\0" + where "name" and "version" are strings, name and version of the + compression algorithm used to compress the trace file. If the name + is "none", the data in the file is not compressed. + + The next 8 bytes are 64-bit integer, the offset within the file where + the first OPTIONS section is located. + + The rest of the file consists of different sections. The only mandatory + is the first OPTIONS section, all others are optional. The location and + the order of the sections is not strict. Each section starts with a header: + +FORMAT OF THE SECTION HEADER +---------------------------- + <2 bytes> unsigned short integer, ID of the section. + <2 bytes> unsigned short integer, section flags: + 1 = the section is compressed. + <4 bytes> ID of a string, description of the section. + <4 bytes> unsigned integer, size of the section in the file. + + If the section is compressed, the above is the compressed size. + The section must be uncompressed on reading. The described format of + the sections refers to the uncompressed data. + +COMPRESSION FORMAT OF THE FILE SECTIONS +--------------------------------------- + + Some of the sections in the file may be compressed with the compression algorithm, + specified in the compression algorithm header. Compressed sections have a compression + header, written after the section header and right before the compressed data: + <4 bytes> unsigned int, size of compressed data in this section. + <4 bytes> unsigned int, size of uncompressed data. + <data> binary compressed data, with the specified size. + +COMPRESSION FORMAT OF THE TRACE DATA +------------------------------------ + + There are two special sections, BUFFER FLYRECORD and BUFFER LATENCY, containing + trace data. These sections may be compressed with the compression algorithm, specified + in the compression header. Usually the size of these sections is huge, that's why its + compression format is different from the other sections. The trace data is compressed + in chunks The size of one chunk is specified in the file creation time. The format + of compressed trace data is: + <4 bytes> unsigned int, count of chunks. + Follows the compressed chunks of given count. For each chunk: + <4 bytes> unsigned int, size of compressed data in this chunk. + <4 bytes> unsigned int, size of uncompressed data, aligned with the trace page size. + <data> binary compressed data, with the specified size. + These chunks must be uncompressed on reading. The described format of + trace data refers to the uncompressed data. + +OPTIONS SECTION +--------------- + + Section ID: 0 + + This is the the only mandatory section in the file. There can be multiple + options sections, the first one is located at the offset specified right + after the compression algorithm header. The section consists of multiple + trace options, each option has the following format: + <2 bytes> unsigned short integer, ID of the option. + <4 bytes> unsigned integer, size of the option's data. + <binary data> bytes of the size specified above, data of the option. + + + Options, supported by the trace file version 7: + + DONE: id 0, size 8 + This option indicates the end of the options section, it is written + always as last option. The DONE option data is: + <8 bytes> long long unsigned integer, offset in the trace file where + the next options section is located. If this offset is 0, then there + are no more options sections. + + DATE: id 1, size vary + The DATE option data is a null terminated ASCII string, which represents + the time difference between trace events timestamps and the Generic Time + of Day of the system. + + CPUSTAT: id 2, size vary + The CPUSTAT option data is a null terminated ASCII string, the content of the + "per_cpu/cpu<id>/stats" file from the trace directory. There is a CPUSTAT option + for each CPU. + + BUFFER: id 3, size vary + The BUFFER option describes the flyrecord trace data saved in the file, collected + from one trace instance. There is BUFFER option for each trace instance. The format + of the BUFFER data is: + <8 bytes> long long unsigned integer, offset in the trace file where the + BUFFER FLYRECORD section is located, containing flyrecord trace data. + <string> a null terminated ASCII string, name of the trace instance. Empty string "" + is saved as name of the top instance. + <string> a null terminated ASCII string, trace clock used for events timestamps in + this trace instance. + <4 bytes> unsigned integer, size of the trace buffer page. + <4 bytes> unsigned integer, count of the CPUs with trace data. + For each CPU of the above count: + <4 bytes> unsigned integer, ID of the CPU. + <8 bytes> long long unsigned integer, offset in the trace file where the trace data + for this CPU is located. + <8 bytes> long long unsigned integer, size of the trace data for this CPU. + + TRACECLOCK: id 4, size vary + The TRACECLOCK option data is a null terminated ASCII string, the content of the + "trace_clock" file from the trace directory. + + UNAME: id 5, size vary + The UNAME option data is a null terminated ASCII string, identifying the system where + the trace data is collected. The string is retrieved by the uname() system call. + + HOOK: id 6, size vary + The HOOK option data is a null terminated ASCII string, describing event hooks: custom + event matching to connect any two events together. + + OFFSET: id 7, size vary + The OFFSET option data is a null terminated ASCII string, representing a fixed time that + is added to each event timestamp on reading. + + CPUCOUNT: id 8, size 4 + The CPUCOUNT option data is: + <4 bytes> unsigned integer, number of CPUs in the system. + + VERSION: id 9, size vary + The VERSION option data is a null terminated ASCII string, representing the version of + the trace-cmd application, used to collect these trace logs. + + PROCMAPS: id 10, size vary + The PROCMAPS option data is a null terminated ASCII string, representing the memory map + of each traced filtered process. The format of the string is, for each filtered process: + <procss ID> <libraries count> <process command> \n + <memory start address> <memory end address> <full path of the mapped library file> \n + ... + separate line for each library, used by this process + ... + ... + + TRACEID: id 11, size 8 + The TRACEID option data is a unique identifier of this tracing session: + <8 bytes> long long unsigned integer, trace session identifier. + + TIME_SHIFT: id 12, size vary + The TIME_SHIFT option stores time synchronization information, collected during host and guest + tracing session. Usually it is saved in the guest trace file. This information is used to + synchronize guest with host events timestamps, when displaying all files from this tracing + session. The format of the TIME_SHIFT option data is: + <8 bytes> long long unsigned integer, trace identifier of the peer (usually the host). + <4 bytes> unsigned integer, flags specific to the time synchronization protocol, used in this + trace session. + <4 bytes> unsigned integer, number of traced CPUs. For each CPU, timestamps corrections + are recorded: + <4 bytes> unsigned integer, count of the recorded timestamps corrections for this CPU. + <array of unsigned long long integers of the above count>, times when the corrections are calculated + <array of unsigned long long integers of the above count>, corrections offsets + <array of unsigned long long integers of the above count>, corrections scaling ratio + + GUEST: id 13, size vary + The GUEST option stores information about traced guests in this tracing session. Usually it is + saved in the host trace file. There is a separate GUEST option for each traced guest. + The information is used when displaying all files from this tracing session. The format of + the GUEST option data is: + <string> a null terminated ASCII string, name of the guest. + <8 bytes> long long unsigned integer, trace identifier of the guest for this session. + <4 bytes> unsigned integer, number of guest's CPUs. For each CPU: + <4 bytes> unsigned integer, ID of the CPU. + <4 bytes> unsigned integer, PID of the host task, emulating this guest CPU. + + TSC2NSEC: id 14, size 16 + The TSC2NSEC option stores information, used to convert TSC events timestamps to nanoseconds. + The format of the TSC2NSEC option data is: + <4 bytes> unsigned integer, time multiplier. + <4 bytes> unsigned integer, time shift. + <8 bytes> unsigned long long integer, time offset. + + HEADER_INFO: id 16, size 8 + The HEADER_INFO option data is: + <8 bytes> long long unsigned integer, offset into the trace file where the HEADER INFO + section is located + + FTRACE_EVENTS: id 17, size 8 + The FTRACE_EVENTS option data is: + <8 bytes> long long unsigned integer, offset into the trace file where the + FTRACE EVENT FORMATS section is located. + + EVENT_FORMATS: id 18, size 8 + The EVENT_FORMATS option data is: + <8 bytes> long long unsigned integer, offset into the trace file where the EVENT FORMATS + section is located. + + KALLSYMS: id 19, size 8 + The KALLSYMS option data is: + <8 bytes> long long unsigned integer, offset into the trace file where the KALLSYMS + section is located. + + PRINTK: id 20, size 8 + The PRINTK option data is: + <8 bytes> long long unsigned integer, offset into the trace file where the TRACE_PRINTK + section is located. + + CMDLINES: id 21, size 8 + The CMDLINES option data is: + <8 bytes> long long unsigned integer, offset into the trace file where the + SAVED COMMAND LINES section is located. + + BUFFER_TEXT: id 22, size + The BUFFER_LAT option describes the latency trace data saved in the file. The format + of the BUFFER_LAT data is: + <8 bytes> long long unsigned integer, offset in the trace file where the + BUFFER LATENCY section is located, containing latency trace data. + <string> a null terminated ASCII string, name of the trace instance. Empty string "" + is saved as name of the top instance. + <string> a null terminated ASCII string, trace clock used for events timestamps in + this trace instance. + + +HEADER INFO SECTION +------------------- + + Section ID: 16 + + The first 12 bytes of the section, after the section header, contain the string: + + "header_page\0" + + The next 8 bytes are a 64-bit word containing the size of the + page header information stored next. + + The next set of data is of the size read from the previous 8 bytes, + and contains the data retrieved from debugfs/tracing/events/header_page. + + Note: The size of the second field \fBcommit\fR contains the target + kernel long size. For example: + + field: local_t commit; offset:8; \fBsize:8;\fR signed:1; + + shows the kernel has a 64-bit long. + + The next 13 bytes contain the string: + + "header_event\0" + + The next 8 bytes are a 64-bit word containing the size of the + event header information stored next. + + The next set of data is of the size read from the previous 8 bytes + and contains the data retrieved from debugfs/tracing/events/header_event. + + This data allows the trace-cmd tool to know if the ring buffer format + of the kernel made any changes. + +FTRACE EVENT FORMATS SECTION +---------------------------- + + Section ID: 17 + + Directly after the section header comes the information about + the Ftrace specific events. These are the events used by the Ftrace plugins + and are not enabled by the event tracing. + + The next 4 bytes contain a 32-bit word of the number of Ftrace event + format files that are stored in the file. + + For the number of times defined by the previous 4 bytes is the + following: + + 8 bytes for the size of the Ftrace event format file. + + The Ftrace event format file copied from the target machine: + debugfs/tracing/events/ftrace/<event>/format + +EVENT FORMATS SECTION +--------------------- + + Section ID: 18 + + Directly after the section header comes the information about + the event layout. + + The next 4 bytes are a 32-bit word containing the number of + event systems that are stored in the file. These are the + directories in debugfs/tracing/events excluding the \fBftrace\fR + directory. + + For the number of times defined by the previous 4 bytes is the + following: + + A null-terminated string containing the system name. + + 4 bytes containing a 32-bit word containing the number + of events within the system. + + For the number of times defined in the previous 4 bytes is the + following: + + 8 bytes for the size of the event format file. + + The event format file copied from the target machine: + debugfs/tracing/events/<system>/<event>/format + +KALLSYMS SECTION +---------------- + + Section ID: 19 + + Directly after the section header comes the information of the mapping + of function addresses to the function names. + + The next 4 bytes are a 32-bit word containing the size of the + data holding the function mappings. + + The next set of data is of the size defined by the previous 4 bytes + and contains the information from the target machine's file: + /proc/kallsyms + + +TRACE_PRINTK SECTION +-------------------- + + Section ID: 20 + + If a developer used trace_printk() within the kernel, it may + store the format string outside the ring buffer. + This information can be found in: + debugfs/tracing/printk_formats + + The next 4 bytes are a 32-bit word containing the size of the + data holding the printk formats. + + The next set of data is of the size defined by the previous 4 bytes + and contains the information from debugfs/tracing/printk_formats. + + +SAVED COMMAND LINES SECTION +--------------------------- + + Section ID: 21 + + Directly after the section header comes the information mapping + a PID to a process name. + + The next 8 bytes contain a 64-bit word that holds the size of the + data mapping the PID to a process name. + + The next set of data is of the size defined by the previous 8 bytes + and contains the information from debugfs/tracing/saved_cmdlines. + + +BUFFER FLYRECORD SECTION +------------------------ + + This section contains flyrecord tracing data, collected in one trace instance. + The data is saved per CPU. Each BUFFER FLYRECORD section has a corresponding BUFFER + option, containing information about saved CPU's trace data. Padding is placed between + the section header and the CPU data, placing the CPU data at a page aligned (target page) + position in the file. + + This data is copied directly from the Ftrace ring buffer and is of the + same format as the ring buffer specified by the event header files + loaded in the header format file. + + The trace-cmd tool will try to \fBmmap(2)\fR the data page by page with the + target's page size if possible. If it fails to mmap, it will just read the + data instead. + +BUFFER TEXT SECTION +------------------------ + + This section contains latency tracing data, ASCII text taken from the + target's debugfs/tracing/trace file. + +STRINGS SECTION +------------------------ + + All strings from trace file metadata are stored in string section in the file. The section + contains a list of NULL terminated ASCII strings. An ID of the string is used in the file + meta data, which is the offset of the actual string into the string section. Strings can be stored + into multiple string sections in the file. + +SEE ALSO +-------- +trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), +trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), +trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1), +trace-cmd.dat(5) + +AUTHOR +------ +Written by Steven Rostedt, <rostedt@goodmis.org> + +RESOURCES +--------- +https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ + +COPYING +------- +Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..915311d1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,865 @@ +Valid-License-Identifier: GPL-2.0 +Valid-License-Identifier: GPL-2.0-only +Valid-License-Identifier: GPL-2.0+ +Valid-License-Identifier: GPL-2.0-or-later +SPDX-URL: https://spdx.org/licenses/GPL-2.0.html +Usage-Guide: + To use this license in source code, put one of the following SPDX + tag/value pairs into a comment according to the placement + guidelines in the licensing rules documentation. + For 'GNU General Public License (GPL) version 2 only' use: + SPDX-License-Identifier: GPL-2.0 + or + SPDX-License-Identifier: GPL-2.0-only + For 'GNU General Public License (GPL) version 2 or any later version' use: + SPDX-License-Identifier: GPL-2.0+ + or + SPDX-License-Identifier: GPL-2.0-or-later +License-Text: + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. + +------------------------------------------------------------------------- + +Valid-License-Identifier: LGPL-2.1 +Valid-License-Identifier: LGPL-2.1+ +SPDX-URL: https://spdx.org/licenses/LGPL-2.1.html +Usage-Guide: + To use this license in source code, put one of the following SPDX + tag/value pairs into a comment according to the placement + guidelines in the licensing rules documentation. + For 'GNU Lesser General Public License (LGPL) version 2.1 only' use: + SPDX-License-Identifier: LGPL-2.1 + For 'GNU Lesser General Public License (LGPL) version 2.1 or any later + version' use: + SPDX-License-Identifier: LGPL-2.1+ +License-Text: + +GNU LESSER GENERAL PUBLIC LICENSE +Version 2.1, February 1999 + +Copyright (C) 1991, 1999 Free Software Foundation, Inc. +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts as +the successor of the GNU Library Public License, version 2, hence the +version number 2.1.] + +Preamble + +The licenses for most software are designed to take away your freedom to +share and change it. By contrast, the GNU General Public Licenses are +intended to guarantee your freedom to share and change free software--to +make sure the software is free for all its users. + +This license, the Lesser General Public License, applies to some specially +designated software packages--typically libraries--of the Free Software +Foundation and other authors who decide to use it. You can use it too, but +we suggest you first think carefully about whether this license or the +ordinary General Public License is the better strategy to use in any +particular case, based on the explanations below. + +When we speak of free software, we are referring to freedom of use, not +price. Our General Public Licenses are designed to make sure that you have +the freedom to distribute copies of free software (and charge for this +service if you wish); that you receive source code or can get it if you +want it; that you can change the software and use pieces of it in new free +programs; and that you are informed that you can do these things. + +To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for you if +you distribute copies of the library or if you modify it. + +For example, if you distribute copies of the library, whether gratis or for +a fee, you must give the recipients all the rights that we gave you. You +must make sure that they, too, receive or can get the source code. If you +link other code with the library, you must provide complete object files to +the recipients, so that they can relink them with the library after making +changes to the library and recompiling it. And you must show them these +terms so they know their rights. + +We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + +To protect each distributor, we want to make it very clear that there is no +warranty for the free library. Also, if the library is modified by someone +else and passed on, the recipients should know that what they have is not +the original version, so that the original author's reputation will not be +affected by problems that might be introduced by others. + +Finally, software patents pose a constant threat to the existence of any +free program. We wish to make sure that a company cannot effectively +restrict the users of a free program by obtaining a restrictive license +from a patent holder. Therefore, we insist that any patent license obtained +for a version of the library must be consistent with the full freedom of +use specified in this license. + +Most GNU software, including some libraries, is covered by the ordinary GNU +General Public License. This license, the GNU Lesser General Public +License, applies to certain designated libraries, and is quite different +from the ordinary General Public License. We use this license for certain +libraries in order to permit linking those libraries into non-free +programs. + +When a program is linked with a library, whether statically or using a +shared library, the combination of the two is legally speaking a combined +work, a derivative of the original library. The ordinary General Public +License therefore permits such linking only if the entire combination fits +its criteria of freedom. The Lesser General Public License permits more lax +criteria for linking other code with the library. + +We call this license the "Lesser" General Public License because it does +Less to protect the user's freedom than the ordinary General Public +License. It also provides other free software developers Less of an +advantage over competing non-free programs. These disadvantages are the +reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + +For example, on rare occasions, there may be a special need to encourage +the widest possible use of a certain library, so that it becomes a de-facto +standard. To achieve this, non-free programs must be allowed to use the +library. A more frequent case is that a free library does the same job as +widely used non-free libraries. In this case, there is little to gain by +limiting the free library to free software only, so we use the Lesser +General Public License. + +In other cases, permission to use a particular library in non-free programs +enables a greater number of people to use a large body of free +software. For example, permission to use the GNU C Library in non-free +programs enables many more people to use the whole GNU operating system, as +well as its variant, the GNU/Linux operating system. + +Although the Lesser General Public License is Less protective of the users' +freedom, it does ensure that the user of a program that is linked with the +Library has the freedom and the wherewithal to run that program using a +modified version of the Library. + +The precise terms and conditions for copying, distribution and modification +follow. Pay close attention to the difference between a "work based on the +library" and a "work that uses the library". The former contains code +derived from the library, whereas the latter must be combined with the +library in order to run. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License Agreement applies to any software library or other program + which contains a notice placed by the copyright holder or other + authorized party saying it may be distributed under the terms of this + Lesser General Public License (also called "this License"). Each + licensee is addressed as "you". + + A "library" means a collection of software functions and/or data + prepared so as to be conveniently linked with application programs + (which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work which + has been distributed under these terms. A "work based on the Library" + means either the Library or any derivative work under copyright law: + that is to say, a work containing the Library or a portion of it, either + verbatim or with modifications and/or translated straightforwardly into + another language. (Hereinafter, translation is included without + limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for making + modifications to it. For a library, complete source code means all the + source code for all modules it contains, plus any associated interface + definition files, plus the scripts used to control compilation and + installation of the library. + + Activities other than copying, distribution and modification are not + covered by this License; they are outside its scope. The act of running + a program using the Library is not restricted, and output from such a + program is covered only if its contents constitute a work based on the + Library (independent of the use of the Library in a tool for writing + it). Whether that is true depends on what the Library does and what the + program that uses the Library does. + +1. You may copy and distribute verbatim copies of the Library's complete + source code as you receive it, in any medium, provided that you + conspicuously and appropriately publish on each copy an appropriate + copyright notice and disclaimer of warranty; keep intact all the notices + that refer to this License and to the absence of any warranty; and + distribute a copy of this License along with the Library. + + You may charge a fee for the physical act of transferring a copy, and + you may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Library or any portion of it, + thus forming a work based on the Library, and copy and distribute such + modifications or work under the terms of Section 1 above, provided that + you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices stating + that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no charge to + all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a table + of data to be supplied by an application program that uses the + facility, other than as an argument passed when the facility is + invoked, then you must make a good faith effort to ensure that, in + the event an application does not supply such function or table, the + facility still operates, and performs whatever part of its purpose + remains meaningful. + + (For example, a function in a library to compute square roots has a + purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must be + optional: if the application does not supply it, the square root + function must still compute square roots.) + + These requirements apply to the modified work as a whole. If + identifiable sections of that work are not derived from the Library, and + can be reasonably considered independent and separate works in + themselves, then this License, and its terms, do not apply to those + sections when you distribute them as separate works. But when you + distribute the same sections as part of a whole which is a work based on + the Library, the distribution of the whole must be on the terms of this + License, whose permissions for other licensees extend to the entire + whole, and thus to each and every part regardless of who wrote it. + + Thus, it is not the intent of this section to claim rights or contest + your rights to work written entirely by you; rather, the intent is to + exercise the right to control the distribution of derivative or + collective works based on the Library. + + In addition, mere aggregation of another work not based on the Library + with the Library (or with a work based on the Library) on a volume of a + storage or distribution medium does not bring the other work under the + scope of this License. + +3. You may opt to apply the terms of the ordinary GNU General Public + License instead of this License to a given copy of the Library. To do + this, you must alter all the notices that refer to this License, so that + they refer to the ordinary GNU General Public License, version 2, + instead of to this License. (If a newer version than version 2 of the + ordinary GNU General Public License has appeared, then you can specify + that version instead if you wish.) Do not make any other change in these + notices. + + Once this change is made in a given copy, it is irreversible for that + copy, so the ordinary GNU General Public License applies to all + subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of the + Library into a program that is not a library. + +4. You may copy and distribute the Library (or a portion or derivative of + it, under Section 2) in object code or executable form under the terms + of Sections 1 and 2 above provided that you accompany it with the + complete corresponding machine-readable source code, which must be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange. + + If distribution of object code is made by offering access to copy from a + designated place, then offering equivalent access to copy the source + code from the same place satisfies the requirement to distribute the + source code, even though third parties are not compelled to copy the + source along with the object code. + +5. A program that contains no derivative of any portion of the Library, but + is designed to work with the Library by being compiled or linked with + it, is called a "work that uses the Library". Such a work, in isolation, + is not a derivative work of the Library, and therefore falls outside the + scope of this License. + + However, linking a "work that uses the Library" with the Library creates + an executable that is a derivative of the Library (because it contains + portions of the Library), rather than a "work that uses the + library". The executable is therefore covered by this License. Section 6 + states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file + that is part of the Library, the object code for the work may be a + derivative work of the Library even though the source code is + not. Whether this is true is especially significant if the work can be + linked without the Library, or if the work is itself a library. The + threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data structure + layouts and accessors, and small macros and small inline functions (ten + lines or less in length), then the use of the object file is + unrestricted, regardless of whether it is legally a derivative + work. (Executables containing this object code plus portions of the + Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may + distribute the object code for the work under the terms of Section + 6. Any executables containing that work also fall under Section 6, + whether or not they are linked directly with the Library itself. + +6. As an exception to the Sections above, you may also combine or link a + "work that uses the Library" with the Library to produce a work + containing portions of the Library, and distribute that work under terms + of your choice, provided that the terms permit modification of the work + for the customer's own use and reverse engineering for debugging such + modifications. + + You must give prominent notice with each copy of the work that the + Library is used in it and that the Library and its use are covered by + this License. You must supply a copy of this License. If the work during + execution displays copyright notices, you must include the copyright + notice for the Library among them, as well as a reference directing the + user to the copy of this License. Also, you must do one of these things: + + a) Accompany the work with the complete corresponding machine-readable + source code for the Library including whatever changes were used in + the work (which must be distributed under Sections 1 and 2 above); + and, if the work is an executable linked with the Library, with the + complete machine-readable "work that uses the Library", as object + code and/or source code, so that the user can modify the Library and + then relink to produce a modified executable containing the modified + Library. (It is understood that the user who changes the contents of + definitions files in the Library will not necessarily be able to + recompile the application to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a copy + of the library already present on the user's computer system, rather + than copying library functions into the executable, and (2) will + operate properly with a modified version of the library, if the user + installs one, as long as the modified version is interface-compatible + with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least three + years, to give the same user the materials specified in Subsection + 6a, above, for a charge no more than the cost of performing this + distribution. + + d) If distribution of the work is made by offering access to copy from a + designated place, offer equivalent access to copy the above specified + materials from the same place. + + e) Verify that the user has already received a copy of these materials + or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the Library" + must include any data and utility programs needed for reproducing the + executable from it. However, as a special exception, the materials to be + distributed need not include anything that is normally distributed (in + either source or binary form) with the major components (compiler, + kernel, and so on) of the operating system on which the executable runs, + unless that component itself accompanies the executable. + + It may happen that this requirement contradicts the license restrictions + of other proprietary libraries that do not normally accompany the + operating system. Such a contradiction means you cannot use both them + and the Library together in an executable that you distribute. + +7. You may place library facilities that are a work based on the Library + side-by-side in a single library together with other library facilities + not covered by this License, and distribute such a combined library, + provided that the separate distribution of the work based on the Library + and of the other library facilities is otherwise permitted, and provided + that you do these two things: + + a) Accompany the combined library with a copy of the same work based on + the Library, uncombined with any other library facilities. This must + be distributed under the terms of the Sections above. + + b) Give prominent notice with the combined library of the fact that part + of it is a work based on the Library, and explaining where to find + the accompanying uncombined form of the same work. + +8. You may not copy, modify, sublicense, link with, or distribute the + Library except as expressly provided under this License. Any attempt + otherwise to copy, modify, sublicense, link with, or distribute the + Library is void, and will automatically terminate your rights under this + License. However, parties who have received copies, or rights, from you + under this License will not have their licenses terminated so long as + such parties remain in full compliance. + +9. You are not required to accept this License, since you have not signed + it. However, nothing else grants you permission to modify or distribute + the Library or its derivative works. These actions are prohibited by law + if you do not accept this License. Therefore, by modifying or + distributing the Library (or any work based on the Library), you + indicate your acceptance of this License to do so, and all its terms and + conditions for copying, distributing or modifying the Library or works + based on it. + +10. Each time you redistribute the Library (or any work based on the + Library), the recipient automatically receives a license from the + original licensor to copy, distribute, link with or modify the Library + subject to these terms and conditions. You may not impose any further + restrictions on the recipients' exercise of the rights granted + herein. You are not responsible for enforcing compliance by third + parties with this License. + +11. If, as a consequence of a court judgment or allegation of patent + infringement or for any other reason (not limited to patent issues), + conditions are imposed on you (whether by court order, agreement or + otherwise) that contradict the conditions of this License, they do not + excuse you from the conditions of this License. If you cannot + distribute so as to satisfy simultaneously your obligations under this + License and any other pertinent obligations, then as a consequence you + may not distribute the Library at all. For example, if a patent license + would not permit royalty-free redistribution of the Library by all + those who receive copies directly or indirectly through you, then the + only way you could satisfy both it and this License would be to refrain + entirely from distribution of the Library. + + If any portion of this section is held invalid or unenforceable under + any particular circumstance, the balance of the section is intended to + apply, and the section as a whole is intended to apply in other + circumstances. + + It is not the purpose of this section to induce you to infringe any + patents or other property right claims or to contest validity of any + such claims; this section has the sole purpose of protecting the + integrity of the free software distribution system which is implemented + by public license practices. Many people have made generous + contributions to the wide range of software distributed through that + system in reliance on consistent application of that system; it is up + to the author/donor to decide if he or she is willing to distribute + software through any other system and a licensee cannot impose that + choice. + + This section is intended to make thoroughly clear what is believed to + be a consequence of the rest of this License. + +12. If the distribution and/or use of the Library is restricted in certain + countries either by patents or by copyrighted interfaces, the original + copyright holder who places the Library under this License may add an + explicit geographical distribution limitation excluding those + countries, so that distribution is permitted only in or among countries + not thus excluded. In such case, this License incorporates the + limitation as if written in the body of this License. + +13. The Free Software Foundation may publish revised and/or new versions of + the Lesser General Public License from time to time. Such new versions + will be similar in spirit to the present version, but may differ in + detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the Library + specifies a version number of this License which applies to it and "any + later version", you have the option of following the terms and + conditions either of that version or of any later version published by + the Free Software Foundation. If the Library does not specify a license + version number, you may choose any version ever published by the Free + Software Foundation. + +14. If you wish to incorporate parts of the Library into other free + programs whose distribution conditions are incompatible with these, + write to the author to ask for permission. For software which is + copyrighted by the Free Software Foundation, write to the Free Software + Foundation; we sometimes make exceptions for this. Our decision will be + guided by the two goals of preserving the free status of all + derivatives of our free software and of promoting the sharing and reuse + of software generally. + +NO WARRANTY + +15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY + FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN + OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES + PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER + EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE + ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH + YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL + NECESSARY SERVICING, REPAIR OR CORRECTION. + +16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING + WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR + REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR + DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL + DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY + (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED + INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF + THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR + OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Libraries + +If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + +To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + +one line to give the library's name and an idea of what it does. +Copyright (C) year name of author + +This library is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at +your option) any later version. + +This library is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this library; if not, write to the Free Software Foundation, +Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add +information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in +the library `Frob' (a library for tweaking knobs) written +by James Random Hacker. + +signature of Ty Coon, 1 April 1990 +Ty Coon, President of Vice +That's all there is to it! diff --git a/LICENSES/GPL-2.0 b/LICENSES/GPL-2.0 new file mode 100644 index 00000000..ff0812fd --- /dev/null +++ b/LICENSES/GPL-2.0 @@ -0,0 +1,359 @@ +Valid-License-Identifier: GPL-2.0 +Valid-License-Identifier: GPL-2.0-only +Valid-License-Identifier: GPL-2.0+ +Valid-License-Identifier: GPL-2.0-or-later +SPDX-URL: https://spdx.org/licenses/GPL-2.0.html +Usage-Guide: + To use this license in source code, put one of the following SPDX + tag/value pairs into a comment according to the placement + guidelines in the licensing rules documentation. + For 'GNU General Public License (GPL) version 2 only' use: + SPDX-License-Identifier: GPL-2.0 + or + SPDX-License-Identifier: GPL-2.0-only + For 'GNU General Public License (GPL) version 2 or any later version' use: + SPDX-License-Identifier: GPL-2.0+ + or + SPDX-License-Identifier: GPL-2.0-or-later +License-Text: + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/LICENSES/LGPL-2.1 b/LICENSES/LGPL-2.1 new file mode 100644 index 00000000..27bb4342 --- /dev/null +++ b/LICENSES/LGPL-2.1 @@ -0,0 +1,503 @@ +Valid-License-Identifier: LGPL-2.1 +Valid-License-Identifier: LGPL-2.1+ +SPDX-URL: https://spdx.org/licenses/LGPL-2.1.html +Usage-Guide: + To use this license in source code, put one of the following SPDX + tag/value pairs into a comment according to the placement + guidelines in the licensing rules documentation. + For 'GNU Lesser General Public License (LGPL) version 2.1 only' use: + SPDX-License-Identifier: LGPL-2.1 + For 'GNU Lesser General Public License (LGPL) version 2.1 or any later + version' use: + SPDX-License-Identifier: LGPL-2.1+ +License-Text: + +GNU LESSER GENERAL PUBLIC LICENSE +Version 2.1, February 1999 + +Copyright (C) 1991, 1999 Free Software Foundation, Inc. +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts as +the successor of the GNU Library Public License, version 2, hence the +version number 2.1.] + +Preamble + +The licenses for most software are designed to take away your freedom to +share and change it. By contrast, the GNU General Public Licenses are +intended to guarantee your freedom to share and change free software--to +make sure the software is free for all its users. + +This license, the Lesser General Public License, applies to some specially +designated software packages--typically libraries--of the Free Software +Foundation and other authors who decide to use it. You can use it too, but +we suggest you first think carefully about whether this license or the +ordinary General Public License is the better strategy to use in any +particular case, based on the explanations below. + +When we speak of free software, we are referring to freedom of use, not +price. Our General Public Licenses are designed to make sure that you have +the freedom to distribute copies of free software (and charge for this +service if you wish); that you receive source code or can get it if you +want it; that you can change the software and use pieces of it in new free +programs; and that you are informed that you can do these things. + +To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for you if +you distribute copies of the library or if you modify it. + +For example, if you distribute copies of the library, whether gratis or for +a fee, you must give the recipients all the rights that we gave you. You +must make sure that they, too, receive or can get the source code. If you +link other code with the library, you must provide complete object files to +the recipients, so that they can relink them with the library after making +changes to the library and recompiling it. And you must show them these +terms so they know their rights. + +We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + +To protect each distributor, we want to make it very clear that there is no +warranty for the free library. Also, if the library is modified by someone +else and passed on, the recipients should know that what they have is not +the original version, so that the original author's reputation will not be +affected by problems that might be introduced by others. + +Finally, software patents pose a constant threat to the existence of any +free program. We wish to make sure that a company cannot effectively +restrict the users of a free program by obtaining a restrictive license +from a patent holder. Therefore, we insist that any patent license obtained +for a version of the library must be consistent with the full freedom of +use specified in this license. + +Most GNU software, including some libraries, is covered by the ordinary GNU +General Public License. This license, the GNU Lesser General Public +License, applies to certain designated libraries, and is quite different +from the ordinary General Public License. We use this license for certain +libraries in order to permit linking those libraries into non-free +programs. + +When a program is linked with a library, whether statically or using a +shared library, the combination of the two is legally speaking a combined +work, a derivative of the original library. The ordinary General Public +License therefore permits such linking only if the entire combination fits +its criteria of freedom. The Lesser General Public License permits more lax +criteria for linking other code with the library. + +We call this license the "Lesser" General Public License because it does +Less to protect the user's freedom than the ordinary General Public +License. It also provides other free software developers Less of an +advantage over competing non-free programs. These disadvantages are the +reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + +For example, on rare occasions, there may be a special need to encourage +the widest possible use of a certain library, so that it becomes a de-facto +standard. To achieve this, non-free programs must be allowed to use the +library. A more frequent case is that a free library does the same job as +widely used non-free libraries. In this case, there is little to gain by +limiting the free library to free software only, so we use the Lesser +General Public License. + +In other cases, permission to use a particular library in non-free programs +enables a greater number of people to use a large body of free +software. For example, permission to use the GNU C Library in non-free +programs enables many more people to use the whole GNU operating system, as +well as its variant, the GNU/Linux operating system. + +Although the Lesser General Public License is Less protective of the users' +freedom, it does ensure that the user of a program that is linked with the +Library has the freedom and the wherewithal to run that program using a +modified version of the Library. + +The precise terms and conditions for copying, distribution and modification +follow. Pay close attention to the difference between a "work based on the +library" and a "work that uses the library". The former contains code +derived from the library, whereas the latter must be combined with the +library in order to run. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License Agreement applies to any software library or other program + which contains a notice placed by the copyright holder or other + authorized party saying it may be distributed under the terms of this + Lesser General Public License (also called "this License"). Each + licensee is addressed as "you". + + A "library" means a collection of software functions and/or data + prepared so as to be conveniently linked with application programs + (which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work which + has been distributed under these terms. A "work based on the Library" + means either the Library or any derivative work under copyright law: + that is to say, a work containing the Library or a portion of it, either + verbatim or with modifications and/or translated straightforwardly into + another language. (Hereinafter, translation is included without + limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for making + modifications to it. For a library, complete source code means all the + source code for all modules it contains, plus any associated interface + definition files, plus the scripts used to control compilation and + installation of the library. + + Activities other than copying, distribution and modification are not + covered by this License; they are outside its scope. The act of running + a program using the Library is not restricted, and output from such a + program is covered only if its contents constitute a work based on the + Library (independent of the use of the Library in a tool for writing + it). Whether that is true depends on what the Library does and what the + program that uses the Library does. + +1. You may copy and distribute verbatim copies of the Library's complete + source code as you receive it, in any medium, provided that you + conspicuously and appropriately publish on each copy an appropriate + copyright notice and disclaimer of warranty; keep intact all the notices + that refer to this License and to the absence of any warranty; and + distribute a copy of this License along with the Library. + + You may charge a fee for the physical act of transferring a copy, and + you may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Library or any portion of it, + thus forming a work based on the Library, and copy and distribute such + modifications or work under the terms of Section 1 above, provided that + you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices stating + that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no charge to + all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a table + of data to be supplied by an application program that uses the + facility, other than as an argument passed when the facility is + invoked, then you must make a good faith effort to ensure that, in + the event an application does not supply such function or table, the + facility still operates, and performs whatever part of its purpose + remains meaningful. + + (For example, a function in a library to compute square roots has a + purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must be + optional: if the application does not supply it, the square root + function must still compute square roots.) + + These requirements apply to the modified work as a whole. If + identifiable sections of that work are not derived from the Library, and + can be reasonably considered independent and separate works in + themselves, then this License, and its terms, do not apply to those + sections when you distribute them as separate works. But when you + distribute the same sections as part of a whole which is a work based on + the Library, the distribution of the whole must be on the terms of this + License, whose permissions for other licensees extend to the entire + whole, and thus to each and every part regardless of who wrote it. + + Thus, it is not the intent of this section to claim rights or contest + your rights to work written entirely by you; rather, the intent is to + exercise the right to control the distribution of derivative or + collective works based on the Library. + + In addition, mere aggregation of another work not based on the Library + with the Library (or with a work based on the Library) on a volume of a + storage or distribution medium does not bring the other work under the + scope of this License. + +3. You may opt to apply the terms of the ordinary GNU General Public + License instead of this License to a given copy of the Library. To do + this, you must alter all the notices that refer to this License, so that + they refer to the ordinary GNU General Public License, version 2, + instead of to this License. (If a newer version than version 2 of the + ordinary GNU General Public License has appeared, then you can specify + that version instead if you wish.) Do not make any other change in these + notices. + + Once this change is made in a given copy, it is irreversible for that + copy, so the ordinary GNU General Public License applies to all + subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of the + Library into a program that is not a library. + +4. You may copy and distribute the Library (or a portion or derivative of + it, under Section 2) in object code or executable form under the terms + of Sections 1 and 2 above provided that you accompany it with the + complete corresponding machine-readable source code, which must be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange. + + If distribution of object code is made by offering access to copy from a + designated place, then offering equivalent access to copy the source + code from the same place satisfies the requirement to distribute the + source code, even though third parties are not compelled to copy the + source along with the object code. + +5. A program that contains no derivative of any portion of the Library, but + is designed to work with the Library by being compiled or linked with + it, is called a "work that uses the Library". Such a work, in isolation, + is not a derivative work of the Library, and therefore falls outside the + scope of this License. + + However, linking a "work that uses the Library" with the Library creates + an executable that is a derivative of the Library (because it contains + portions of the Library), rather than a "work that uses the + library". The executable is therefore covered by this License. Section 6 + states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file + that is part of the Library, the object code for the work may be a + derivative work of the Library even though the source code is + not. Whether this is true is especially significant if the work can be + linked without the Library, or if the work is itself a library. The + threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data structure + layouts and accessors, and small macros and small inline functions (ten + lines or less in length), then the use of the object file is + unrestricted, regardless of whether it is legally a derivative + work. (Executables containing this object code plus portions of the + Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may + distribute the object code for the work under the terms of Section + 6. Any executables containing that work also fall under Section 6, + whether or not they are linked directly with the Library itself. + +6. As an exception to the Sections above, you may also combine or link a + "work that uses the Library" with the Library to produce a work + containing portions of the Library, and distribute that work under terms + of your choice, provided that the terms permit modification of the work + for the customer's own use and reverse engineering for debugging such + modifications. + + You must give prominent notice with each copy of the work that the + Library is used in it and that the Library and its use are covered by + this License. You must supply a copy of this License. If the work during + execution displays copyright notices, you must include the copyright + notice for the Library among them, as well as a reference directing the + user to the copy of this License. Also, you must do one of these things: + + a) Accompany the work with the complete corresponding machine-readable + source code for the Library including whatever changes were used in + the work (which must be distributed under Sections 1 and 2 above); + and, if the work is an executable linked with the Library, with the + complete machine-readable "work that uses the Library", as object + code and/or source code, so that the user can modify the Library and + then relink to produce a modified executable containing the modified + Library. (It is understood that the user who changes the contents of + definitions files in the Library will not necessarily be able to + recompile the application to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a copy + of the library already present on the user's computer system, rather + than copying library functions into the executable, and (2) will + operate properly with a modified version of the library, if the user + installs one, as long as the modified version is interface-compatible + with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least three + years, to give the same user the materials specified in Subsection + 6a, above, for a charge no more than the cost of performing this + distribution. + + d) If distribution of the work is made by offering access to copy from a + designated place, offer equivalent access to copy the above specified + materials from the same place. + + e) Verify that the user has already received a copy of these materials + or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the Library" + must include any data and utility programs needed for reproducing the + executable from it. However, as a special exception, the materials to be + distributed need not include anything that is normally distributed (in + either source or binary form) with the major components (compiler, + kernel, and so on) of the operating system on which the executable runs, + unless that component itself accompanies the executable. + + It may happen that this requirement contradicts the license restrictions + of other proprietary libraries that do not normally accompany the + operating system. Such a contradiction means you cannot use both them + and the Library together in an executable that you distribute. + +7. You may place library facilities that are a work based on the Library + side-by-side in a single library together with other library facilities + not covered by this License, and distribute such a combined library, + provided that the separate distribution of the work based on the Library + and of the other library facilities is otherwise permitted, and provided + that you do these two things: + + a) Accompany the combined library with a copy of the same work based on + the Library, uncombined with any other library facilities. This must + be distributed under the terms of the Sections above. + + b) Give prominent notice with the combined library of the fact that part + of it is a work based on the Library, and explaining where to find + the accompanying uncombined form of the same work. + +8. You may not copy, modify, sublicense, link with, or distribute the + Library except as expressly provided under this License. Any attempt + otherwise to copy, modify, sublicense, link with, or distribute the + Library is void, and will automatically terminate your rights under this + License. However, parties who have received copies, or rights, from you + under this License will not have their licenses terminated so long as + such parties remain in full compliance. + +9. You are not required to accept this License, since you have not signed + it. However, nothing else grants you permission to modify or distribute + the Library or its derivative works. These actions are prohibited by law + if you do not accept this License. Therefore, by modifying or + distributing the Library (or any work based on the Library), you + indicate your acceptance of this License to do so, and all its terms and + conditions for copying, distributing or modifying the Library or works + based on it. + +10. Each time you redistribute the Library (or any work based on the + Library), the recipient automatically receives a license from the + original licensor to copy, distribute, link with or modify the Library + subject to these terms and conditions. You may not impose any further + restrictions on the recipients' exercise of the rights granted + herein. You are not responsible for enforcing compliance by third + parties with this License. + +11. If, as a consequence of a court judgment or allegation of patent + infringement or for any other reason (not limited to patent issues), + conditions are imposed on you (whether by court order, agreement or + otherwise) that contradict the conditions of this License, they do not + excuse you from the conditions of this License. If you cannot + distribute so as to satisfy simultaneously your obligations under this + License and any other pertinent obligations, then as a consequence you + may not distribute the Library at all. For example, if a patent license + would not permit royalty-free redistribution of the Library by all + those who receive copies directly or indirectly through you, then the + only way you could satisfy both it and this License would be to refrain + entirely from distribution of the Library. + + If any portion of this section is held invalid or unenforceable under + any particular circumstance, the balance of the section is intended to + apply, and the section as a whole is intended to apply in other + circumstances. + + It is not the purpose of this section to induce you to infringe any + patents or other property right claims or to contest validity of any + such claims; this section has the sole purpose of protecting the + integrity of the free software distribution system which is implemented + by public license practices. Many people have made generous + contributions to the wide range of software distributed through that + system in reliance on consistent application of that system; it is up + to the author/donor to decide if he or she is willing to distribute + software through any other system and a licensee cannot impose that + choice. + + This section is intended to make thoroughly clear what is believed to + be a consequence of the rest of this License. + +12. If the distribution and/or use of the Library is restricted in certain + countries either by patents or by copyrighted interfaces, the original + copyright holder who places the Library under this License may add an + explicit geographical distribution limitation excluding those + countries, so that distribution is permitted only in or among countries + not thus excluded. In such case, this License incorporates the + limitation as if written in the body of this License. + +13. The Free Software Foundation may publish revised and/or new versions of + the Lesser General Public License from time to time. Such new versions + will be similar in spirit to the present version, but may differ in + detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the Library + specifies a version number of this License which applies to it and "any + later version", you have the option of following the terms and + conditions either of that version or of any later version published by + the Free Software Foundation. If the Library does not specify a license + version number, you may choose any version ever published by the Free + Software Foundation. + +14. If you wish to incorporate parts of the Library into other free + programs whose distribution conditions are incompatible with these, + write to the author to ask for permission. For software which is + copyrighted by the Free Software Foundation, write to the Free Software + Foundation; we sometimes make exceptions for this. Our decision will be + guided by the two goals of preserving the free status of all + derivatives of our free software and of promoting the sharing and reuse + of software generally. + +NO WARRANTY + +15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY + FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN + OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES + PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER + EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE + ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH + YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL + NECESSARY SERVICING, REPAIR OR CORRECTION. + +16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING + WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR + REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR + DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL + DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY + (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED + INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF + THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR + OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Libraries + +If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + +To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + +one line to give the library's name and an idea of what it does. +Copyright (C) year name of author + +This library is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at +your option) any later version. + +This library is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this library; if not, write to the Free Software Foundation, +Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add +information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in +the library `Frob' (a library for tweaking knobs) written +by James Random Hacker. + +signature of Ty Coon, 1 April 1990 +Ty Coon, President of Vice +That's all there is to it! diff --git a/METADATA b/METADATA new file mode 100644 index 00000000..eca2378b --- /dev/null +++ b/METADATA @@ -0,0 +1,13 @@ +name: "trace-cmd" +description: + "trace-cmd is a front-end to the ftrace Linux kernel tracer." + +third_party { + url { + type: GIT + value: "https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git" + } + version: "trace-cmd-v3.0.2" + last_upgrade_date { year: 2022 month: 5 day: 02 } + license_type: RESTRICTED +} diff --git a/MODULE_LICENSE_GPL b/MODULE_LICENSE_GPL new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/MODULE_LICENSE_GPL diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..982514ba --- /dev/null +++ b/Makefile @@ -0,0 +1,591 @@ +# SPDX-License-Identifier: GPL-2.0 +# trace-cmd version +TC_VERSION = 3 +TC_PATCHLEVEL = 0 +TC_EXTRAVERSION = 3 +TRACECMD_VERSION = $(TC_VERSION).$(TC_PATCHLEVEL).$(TC_EXTRAVERSION) + +export TC_VERSION +export TC_PATCHLEVEL +export TC_EXTRAVERSION +export TRACECMD_VERSION + +LIBTC_VERSION = 1 +LIBTC_PATCHLEVEL = 1 +LIBTC_EXTRAVERSION = 3 +LIBTRACECMD_VERSION = $(LIBTC_VERSION).$(LIBTC_PATCHLEVEL).$(LIBTC_EXTRAVERSION) + +export LIBTC_VERSION +export LIBTC_PATCHLEVEL +export LIBTC_EXTRAVERSION +export LIBTRACECMD_VERSION + +VERSION_FILE = ltc_version.h + +LIBTRACEEVENT_MIN_VERSION = 1.5 +LIBTRACEFS_MIN_VERSION = 1.3 + +MAKEFLAGS += --no-print-directory + +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,PKG_CONFIG,pkg-config) +$(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/) +$(call allow-override,LDCONFIG,ldconfig) + +export LD_SO_CONF_PATH LDCONFIG + +EXT = -std=gnu99 +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) +ifeq ($(LP64), 1) + libdir_relative_temp = lib64 +else + libdir_relative_temp = lib +endif + +libdir_relative ?= $(libdir_relative_temp) +prefix ?= /usr/local +bindir_relative = bin +bindir = $(prefix)/$(bindir_relative) +man_dir = $(prefix)/share/man +man_dir_SQ = '$(subst ','\'',$(man_dir))' +html_install_SQ = '$(subst ','\'',$(html_install))' +img_install_SQ = '$(subst ','\'',$(img_install))' +libdir = $(prefix)/$(libdir_relative) +libdir_SQ = '$(subst ','\'',$(libdir))' +includedir = $(prefix)/include +includedir_SQ = '$(subst ','\'',$(includedir))' +pkgconfig_dir ?= $(word 1,$(shell $(PKG_CONFIG) \ + --variable pc_path pkg-config | tr ":" " ")) + +etcdir ?= /etc +etcdir_SQ = '$(subst ','\'',$(etcdir))' + +export man_dir man_dir_SQ html_install html_install_SQ INSTALL +export img_install img_install_SQ libdir libdir_SQ includedir_SQ +export DESTDIR DESTDIR_SQ + +ifeq ($(prefix),$(HOME)) +plugin_tracecmd_dir = $(libdir)/trace-cmd/plugins +python_dir ?= $(libdir)/trace-cmd/python +var_dir = $(HOME)/.trace-cmd/ +else +python_dir ?= $(libdir)/trace-cmd/python +PLUGIN_DIR_TRACECMD = -DPLUGIN_TRACECMD_DIR="$(plugin_tracecmd_dir)" +PYTHON_DIR = -DPYTHON_DIR="$(python_dir)" +PLUGIN_DIR_TRACECMD_SQ = '$(subst ','\'',$(PLUGIN_DIR_TRACECMD))' +PYTHON_DIR_SQ = '$(subst ','\'',$(PYTHON_DIR))' +var_dir = /var +endif + +# Shell quotes +bindir_SQ = $(subst ','\'',$(bindir)) +bindir_relative_SQ = $(subst ','\'',$(bindir_relative)) +plugin_tracecmd_dir_SQ = $(subst ','\'',$(plugin_tracecmd_dir)) +python_dir_SQ = $(subst ','\'',$(python_dir)) + +pound := \# + +VAR_DIR = -DVAR_DIR="$(var_dir)" +VAR_DIR_SQ = '$(subst ','\'',$(VAR_DIR))' +var_dir_SQ = '$(subst ','\'',$(var_dir))' + +HELP_DIR = -DHELP_DIR=$(html_install) +HELP_DIR_SQ = '$(subst ','\'',$(HELP_DIR))' +#' emacs highlighting gets confused by the above escaped quote. + +BASH_COMPLETE_DIR ?= $(etcdir)/bash_completion.d + +export PLUGIN_DIR_TRACECMD +export PYTHON_DIR +export PYTHON_DIR_SQ +export plugin_tracecmd_dir_SQ +export python_dir_SQ +export var_dir + +# copy a bit from Linux kbuild + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +SILENT := $(if $(findstring s,$(filter-out --%,$(MAKEFLAGS))),1) + +SWIG_DEFINED := $(shell if command -v swig; then echo 1; else echo 0; fi) +ifeq ($(SWIG_DEFINED), 0) +BUILD_PYTHON := report_noswig +NO_PYTHON = 1 +endif + +ifndef NO_PYTHON +PYTHON := ctracecmd.so + +PYTHON_VERS ?= python +PYTHON_PKGCONFIG_VERS ?= $(PYTHON_VERS) + +# Can build python? +ifeq ($(shell sh -c "$(PKG_CONFIG) --cflags $(PYTHON_PKGCONFIG_VERS) > /dev/null 2>&1 && echo y"), y) + BUILD_PYTHON := $(PYTHON) + BUILD_PYTHON_WORKS := 1 +else + BUILD_PYTHON := report_nopythondev + NO_PYTHON = 1 +endif +endif # NO_PYTHON + +export BUILD_PYTHON_WORKS +export NO_PYTHON + +# $(call test-build, snippet, ret) -> ret if snippet compiles +# -> empty otherwise +test-build = $(if $(shell sh -c 'echo "$(1)" | \ + $(CC) -o /dev/null -c -x c - > /dev/null 2>&1 && echo y'), $2) + +UDIS86_AVAILABLE := $(call test-build,\#include <udis86.h>, y) +ifneq ($(strip $(UDIS86_AVAILABLE)), y) +NO_UDIS86 := 1 +endif + +ifndef NO_UDIS86 +# have udis86 disassembler library? +udis86-flags := -DHAVE_UDIS86 -ludis86 +udis86-ldflags := -ludis86 +endif # NO_UDIS86 + +define BLK_TC_FLUSH_SOURCE +#include <linux/blktrace_api.h> +int main(void) { return BLK_TC_FLUSH; } +endef + +# have flush/fua block layer instead of barriers? +blk-flags := $(call test-build,$(BLK_TC_FLUSH_SOURCE),-DHAVE_BLK_TC_FLUSH) + +ifeq ("$(origin O)", "command line") + + saved-output := $(O) + BUILD_OUTPUT := $(shell cd $(O) && /bin/pwd) + $(if $(BUILD_OUTPUT),, \ + $(error output directory "$(saved-output)" does not exist)) + +else + BUILD_OUTPUT = $(CURDIR) +endif + +srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) +objtree := $(BUILD_OUTPUT) +src := $(srctree) +obj := $(objtree) + +PKG_CONFIG_SOURCE_FILE = libtracecmd.pc +PKG_CONFIG_FILE := $(addprefix $(BUILD_OUTPUT)/,$(PKG_CONFIG_SOURCE_FILE)) + +export pkgconfig_dir PKG_CONFIG_FILE + +export prefix bindir src obj + +LIBS = -ldl + +LIBTRACECMD_DIR = $(obj)/lib/trace-cmd +LIBTRACECMD_STATIC = $(LIBTRACECMD_DIR)/libtracecmd.a +LIBTRACECMD_SHARED = $(LIBTRACECMD_DIR)/libtracecmd.so.$(LIBTRACECMD_VERSION) +LIBTRACECMD_SHARED_VERSION = $(shell echo $(LIBTRACECMD_SHARED) | sed -e 's/\(\.so\.[0-9]*\).*/\1/') +LIBTRACECMD_SHARED_SO = $(shell echo $(LIBTRACECMD_SHARED) | sed -e 's/\(\.so\).*/\1/') + +export LIBTRACECMD_STATIC LIBTRACECMD_SHARED +export LIBTRACECMD_SHARED_VERSION LIBTRACECMD_SHARED_SO + +LIBTRACEEVENT=libtraceevent +LIBTRACEFS=libtracefs + +TEST_LIBTRACEEVENT = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEEVENT_MIN_VERSION) $(LIBTRACEEVENT) > /dev/null 2>&1 && echo y") +TEST_LIBTRACEFS = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEFS_MIN_VERSION) $(LIBTRACEFS) > /dev/null 2>&1 && echo y") + +ifeq ("$(TEST_LIBTRACEEVENT)", "y") +LIBTRACEEVENT_CFLAGS = $(shell sh -c "$(PKG_CONFIG) --cflags $(LIBTRACEEVENT)") +LIBTRACEEVENT_LDLAGS = $(shell sh -c "$(PKG_CONFIG) --libs $(LIBTRACEEVENT)") +else +.PHONY: warning +warning: + @echo "********************************************" + @echo "** NOTICE: libtraceevent version $(LIBTRACEEVENT_MIN_VERSION) or higher not found on system" + @echo "**" + @echo "** Consider installing the latest libtraceevent from your" + @echo "** distribution, or from source:" + @echo "**" + @echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ " + @echo "**" + @echo "********************************************" +endif + +export LIBTRACEEVENT_CFLAGS LIBTRACEEVENT_LDLAGS + +ifeq ("$(TEST_LIBTRACEFS)", "y") +LIBTRACEFS_CFLAGS = $(shell sh -c "$(PKG_CONFIG) --cflags $(LIBTRACEFS)") +LIBTRACEFS_LDLAGS = $(shell sh -c "$(PKG_CONFIG) --libs $(LIBTRACEFS)") +else +.PHONY: warning +warning: + @echo "********************************************" + @echo "** NOTICE: libtracefs version $(LIBTRACEFS_MIN_VERSION) or higher not found on system" + @echo "**" + @echo "** Consider installing the latest libtracefs from your" + @echo "** distribution, or from source:" + @echo "**" + @echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/ " + @echo "**" + @echo "********************************************" +endif + +export LIBTRACEFS_CFLAGS LIBTRACEFS_LDLAGS + +TRACE_LIBS = -L$(LIBTRACECMD_DIR) -ltracecmd \ + $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) + +export LIBS TRACE_LIBS +export LIBTRACECMD_DIR +export Q SILENT VERBOSE EXT + +# Include the utils +include scripts/utils.mk + +INCLUDES = -I$(src)/include -I$(src)/../../include +INCLUDES += -I$(src)/include/trace-cmd +INCLUDES += -I$(src)/lib/trace-cmd/include +INCLUDES += -I$(src)/lib/trace-cmd/include/private +INCLUDES += -I$(src)/tracecmd/include +INCLUDES += $(LIBTRACEEVENT_CFLAGS) +INCLUDES += $(LIBTRACEFS_CFLAGS) + +include $(src)/features.mk + +# Set compile option CFLAGS if not set elsewhere +CFLAGS ?= -g -Wall +CPPFLAGS ?= +LDFLAGS ?= + +ifndef NO_VSOCK +VSOCK_DEFINED := $(shell if (echo "$(pound)include <linux/vm_sockets.h>" | $(CC) -E - >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi) +else +VSOCK_DEFINED := 0 +endif + +export VSOCK_DEFINED +ifeq ($(VSOCK_DEFINED), 1) +CFLAGS += -DVSOCK +endif + +PERF_DEFINED := $(shell if (echo "$(pound)include <linux/perf_event.h>" | $(CC) -E - >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi) +export PERF_DEFINED +ifeq ($(PERF_DEFINED), 1) +CFLAGS += -DPERF +endif + +ZLIB_INSTALLED := $(shell if (printf "$(pound)include <zlib.h>\n void main(){deflateInit(NULL, Z_BEST_COMPRESSION);}" | $(CC) -o /dev/null -x c - -lz >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi) +ifeq ($(ZLIB_INSTALLED), 1) +export ZLIB_INSTALLED +CFLAGS += -DHAVE_ZLIB +$(info Have zlib compression support) +endif + +TEST_LIBZSTD = $(shell sh -c "$(PKG_CONFIG) --atleast-version 1.4.0 libzstd > /dev/null 2>&1 && echo y") + +ifeq ("$(TEST_LIBZSTD)", "y") +LIBZSTD_CFLAGS = $(shell sh -c "$(PKG_CONFIG) --cflags libzstd") +LIBZSTD_LDLAGS = $(shell sh -c "$(PKG_CONFIG) --libs libzstd") +CFLAGS += -DHAVE_ZSTD +ZSTD_INSTALLED=1 +$(info Have ZSTD compression support) +else +$(info *************************************************************) +$(info ZSTD package not found, best compression algorithm not in use) +$(info *************************************************************) +endif + +export LIBZSTD_CFLAGS LIBZSTD_LDLAGS ZSTD_INSTALLED + +CUNIT_INSTALLED := $(shell if (printf "$(pound)include <CUnit/Basic.h>\n void main(){CU_initialize_registry();}" | $(CC) -o /dev/null -x c - -lcunit >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi) +export CUNIT_INSTALLED + +export CFLAGS +export INCLUDES + +# Required CFLAGS +override CFLAGS += -D_GNU_SOURCE + +ifndef NO_PTRACE +ifneq ($(call try-cc,$(SOURCE_PTRACE),),y) + NO_PTRACE = 1 + override CFLAGS += -DWARN_NO_PTRACE +endif +endif + +ifdef NO_PTRACE +override CFLAGS += -DNO_PTRACE +endif + +ifndef NO_AUDIT +ifneq ($(call try-cc,$(SOURCE_AUDIT),-laudit),y) + NO_AUDIT = 1 + override CFLAGS += -DWARN_NO_AUDIT +endif +endif + +ifdef NO_AUDIT +override CFLAGS += -DNO_AUDIT +else +LIBS += -laudit +endif + +# Append required CFLAGS +override CFLAGS += $(INCLUDES) $(VAR_DIR) +override CFLAGS += $(PLUGIN_DIR_TRACECMD_SQ) +override CFLAGS += $(udis86-flags) $(blk-flags) +override LDFLAGS += $(udis86-ldflags) + +CMD_TARGETS = trace-cmd $(BUILD_PYTHON) + +### +# Default we just build trace-cmd +# +# If you want all libraries, then do: make libs +### + +all: all_cmd plugins show_other_make + +all_cmd: $(CMD_TARGETS) + +BUILD_PREFIX := $(BUILD_OUTPUT)/build_prefix + +$(BUILD_PREFIX): force + $(Q)$(call build_prefix,$(prefix)) + +$(PKG_CONFIG_FILE) : ${PKG_CONFIG_SOURCE_FILE}.template $(BUILD_PREFIX) $(VERSION_FILE) + $(Q) $(call do_make_pkgconfig_file,$(prefix)) + +trace-cmd: force $(LIBTRACECMD_STATIC) \ + force $(obj)/lib/trace-cmd/plugins/tracecmd_plugin_dir + $(Q)$(MAKE) -C $(src)/tracecmd $(obj)/tracecmd/$@ + +$(LIBTRACECMD_STATIC): force + $(Q)$(MAKE) -C $(src)/lib/trace-cmd $@ + +$(LIBTRACECMD_SHARED): force + $(Q)$(MAKE) -C $(src)/lib/trace-cmd libtracecmd.so + +libtracecmd.a: $(LIBTRACECMD_STATIC) +libtracecmd.so: $(LIBTRACECMD_SHARED) + +libs: $(LIBTRACECMD_SHARED) $(PKG_CONFIG_FILE) + +VERSION = $(LIBTC_VERSION) +PATCHLEVEL = $(LIBTC_PATCHLEVEL) +EXTRAVERSION = $(LIBTC_EXTRAVERSION) + +define make_version.h + (echo '/* This file is automatically generated. Do not modify. */'; \ + echo \#define VERSION_CODE $(shell \ + expr $(VERSION) \* 256 + $(PATCHLEVEL)); \ + echo '#define EXTRAVERSION ' $(EXTRAVERSION); \ + echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \ + ) > $1 +endef + +define update_version.h + ($(call make_version.h, $@.tmp); \ + if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + rm -f $@.tmp; \ + else \ + echo ' UPDATE $@'; \ + mv -f $@.tmp $@; \ + fi); +endef + +$(VERSION_FILE): force + $(Q)$(call update_version.h) + +gui: force + @echo "***************************" + @echo " KernelShark has moved!" + @echo " Please use its new home at https://git.kernel.org/pub/scm/utils/trace-cmd/kernel-shark.git/" + @echo "***************************" + +test: force $(LIBTRACECMD_STATIC) +ifneq ($(CUNIT_INSTALLED),1) + $(error CUnit framework not installed, cannot build unit tests)) +endif + $(Q)$(MAKE) -C $(src)/utest $@ + +plugins_tracecmd: force $(obj)/lib/trace-cmd/plugins/tracecmd_plugin_dir + $(Q)$(MAKE) -C $(src)/lib/trace-cmd/plugins + +plugins: plugins_tracecmd + +$(obj)/lib/trace-cmd/plugins/tracecmd_plugin_dir: force + $(Q)$(MAKE) -C $(src)/lib/trace-cmd/plugins $@ + +show_other_make: + @echo "Note: to build man pages, type \"make doc\"" + @echo " to build unit tests, type \"make test\"" + +PHONY += show_other_make + +define find_tag_files + find . -name '\.pc' -prune -o -name '*\.[ch]' -print -o -name '*\.[ch]pp' \ + ! -name '\.#' -print +endef + +tags: force + $(RM) tags + $(call find_tag_files) | xargs ctags --extra=+f --c-kinds=+px + +TAGS: force + $(RM) TAGS + $(call find_tag_files) | xargs etags + +cscope: force + $(RM) cscope* + $(call find_tag_files) | cscope -b -q + +install_plugins_tracecmd: force + $(Q)$(MAKE) -C $(src)/lib/trace-cmd/plugins install_plugins + +install_plugins: install_plugins_tracecmd + +install_python: force + $(Q)$(MAKE) -C $(src)/python $@ + +install_bash_completion: force + $(Q)$(call do_install_data,$(src)/tracecmd/trace-cmd.bash,$(BASH_COMPLETE_DIR)) + +install_cmd: all_cmd install_plugins install_python install_bash_completion + $(Q)$(call do_install,$(obj)/tracecmd/trace-cmd,$(bindir_SQ)) + +install: install_cmd + @echo "Note: to install man pages, type \"make install_doc\"" + +install_gui: force + @echo "Nothing to do here." + @echo " Have you tried https://git.kernel.org/pub/scm/utils/trace-cmd/kernel-shark.git/" + +install_libs: libs + $(Q)$(MAKE) -C $(src)/lib/trace-cmd/ $@ + +doc: + $(MAKE) -C $(src)/Documentation all + +doc_clean: + $(MAKE) -C $(src)/Documentation clean + +install_doc: + $(MAKE) -C $(src)/Documentation install + +clean: + $(RM) *.o *~ *.a *.so .*.d + $(RM) tags TAGS cscope* $(PKG_CONFIG_SOURCE_FILE) $(VERSION_FILE) + $(MAKE) -C $(src)/lib/trace-cmd clean + $(MAKE) -C $(src)/lib/trace-cmd/plugins clean + $(MAKE) -C $(src)/utest clean + $(MAKE) -C $(src)/python clean + $(MAKE) -C $(src)/tracecmd clean + +define build_uninstall_script + $(Q)mkdir $(BUILD_OUTPUT)/tmp_build + $(Q)$(MAKE) -C $(src) DESTDIR=$(BUILD_OUTPUT)/tmp_build O=$(BUILD_OUTPUT) $1 > /dev/null + $(Q)find $(BUILD_OUTPUT)/tmp_build ! -type d -printf "%P\n" > $(BUILD_OUTPUT)/build_$2 + $(Q)$(RM) -rf $(BUILD_OUTPUT)/tmp_build +endef + +build_uninstall: $(BUILD_PREFIX) + $(call build_uninstall_script,install,uninstall) + +$(BUILD_OUTPUT)/build_uninstall: build_uninstall + +build_libs_uninstall: $(BUILD_PREFIX) + $(call build_uninstall_script,install_libs,libs_uninstall) + +$(BUILD_OUTPUT)/build_libs_uninstall: build_libs_uninstall + +define uninstall_file + if [ -f $(DESTDIR)/$1 -o -h $(DESTDIR)/$1 ]; then \ + $(call print_uninstall,$(DESTDIR)/$1)$(RM) $(DESTDIR)/$1; \ + fi; +endef + +uninstall: $(BUILD_OUTPUT)/build_uninstall + @$(foreach file,$(shell cat $(BUILD_OUTPUT)/build_uninstall),$(call uninstall_file,$(file))) + +uninstall_libs: $(BUILD_OUTPUT)/build_libs_uninstall + @$(foreach file,$(shell cat $(BUILD_OUTPUT)/build_libs_uninstall),$(call uninstall_file,$(file))) + +##### PYTHON STUFF ##### + +report_noswig: force + $(Q)echo + $(Q)echo " NO_PYTHON forced: swig not installed, not compiling python plugins" + $(Q)echo + +report_nopythondev: force + $(Q)echo + $(Q)echo " python-dev is not installed, not compiling python plugins" + $(Q)echo + +ifndef NO_PYTHON +PYTHON_INCLUDES = `$(PKG_CONFIG) --cflags $(PYTHON_PKGCONFIG_VERS)` +PYTHON_LDFLAGS = `$(PKG_CONFIG) --libs $(PYTHON_PKGCONFIG_VERS)` \ + $(shell $(PYTHON_VERS)-config --ldflags) +PYGTK_CFLAGS = `$(PKG_CONFIG) --cflags pygtk-2.0` +else +PYTHON_INCLUDES = +PYTHON_LDFLAGS = +PYGTK_CFLAGS = +endif + +export PYTHON_INCLUDES +export PYTHON_LDFLAGS +export PYGTK_CFLAGS + +ctracecmd.so: force $(LIBTRACECMD_STATIC) + $(Q)$(MAKE) -C $(src)/python $@ + +PHONY += python +python: $(PYTHON) + + +dist: + git archive --format=tar --prefix=trace-cmd-$(TRACECMD_VERSION)/ HEAD \ + > ../trace-cmd-$(TRACECMD_VERSION).tar + cat ../trace-cmd-$(TRACECMD_VERSION).tar | \ + bzip2 -c9 > ../trace-cmd-$(TRACECMD_VERSION).tar.bz2 + cat ../trace-cmd-$(TRACECMD_VERSION).tar | \ + xz -e -c8 > ../trace-cmd-$(TRACECMD_VERSION).tar.xz + +PHONY += force +force: + +# Declare the contents of the .PHONY variable as phony. We keep that +# information in a variable so we can use it in if_changed and friends. +.PHONY: $(PHONY) @@ -0,0 +1,2 @@ +kaleshsingh@google.com +namhyung@google.com diff --git a/PACKAGING b/PACKAGING new file mode 100644 index 00000000..7e7d2065 --- /dev/null +++ b/PACKAGING @@ -0,0 +1,30 @@ +The libtracefs and libtraceevent packages are required for trace-cmd +and libtracecmd.so + +In order to create a package directory with libtraceevent, libtracefs +and libtracecmd and trace-cmd, you can follow these steps: + + git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git + git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git + git clone git://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git + + cd libtraceevent + INSTALL_PATH=/tmp/install ../trace-cmd/make-trace-cmd.sh install + + cd ../libtracefs + INSTALL_PATH=/tmp/install ../trace-cmd/make-trace-cmd.sh install + + cd ../trace-cmd + INSTALL_PATH=/tmp/install ./make-trace-cmd.sh install install_libs + + cd /tmp/install + tar cvjf /tmp/trace-cmd-files.tar.bz2 . + +And then the tarball of /tmp/trace-cmd-files.tar.bz2 will can be extracted +on another machine at the root directory, and trace-cmd will be installed there. + +Note, to define a prefix, add a PREFIX variable before calling make-trace-cmd.sh + + For example: + + PREFIX=/usr/local INSTALL_PATH=/tmp/install ./make-trace-cmd.sh install @@ -0,0 +1,68 @@ + + + For more information on contributing please see: https://www.trace-cmd.org + +Note: The official repositiory for trace-cmd and KernelShark is here: + + git://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git + +For bug reports and issues, please file it here: + + https://bugzilla.kernel.org/buglist.cgi?component=Trace-cmd%2FKernelshark&product=Tools&resolution=--- + +These files make up the code that create the trace-cmd programs. +This includes the GUI interface application kernelshark as well +as trace-graph and trace-view. + +These files also make up the code to create the libtracecmd library. + +The applications are licensed under the GNU General Public License 2.0 +(see COPYING) and the libraries are licensed under the GNU +Lesser General Public License 2.1 (See COPYING.LIB). + +BUILDING: + +In order to install build dependencies on Debian / Ubuntu do the following: + sudo apt-get install build-essential git pkg-config -y + sudo apt-get install libtracefs-dev libtraceevent-dev -y + +In order to install build dependencies on Fedora, as root do the following: + dnf install gcc make git pkg-config -y + dnf install libtracefs-devel libtraceevent-devel -y + +In case your distribution does not have the required libtracefs and +libtraceevent libraries, build and install them manually: + + git clone https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ + cd libtraceevent + make + sudo make install + + git clone https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/ + cd libtracefs + make + sudo make install + +To make trace-cmd + make + +To make the gui + make gui + +INSTALL: + +To install trace-cmd + make install + +To install the gui + make install_gui + +Note: The default install is relative to /usr/local + The default install directory is /usr/local/bin + The default plugin directory is /usr/local/lib/trace-cmd/plugins + +To change the default, you can set 'prefix', eg +mkdir $HOME/test-trace +make prefix=$HOME/test-trace +make prefix=$HOME/test-trace install + diff --git a/features.mk b/features.mk new file mode 100644 index 00000000..53f35fd4 --- /dev/null +++ b/features.mk @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: GPL-2.0 + +# taken from perf which was based on Linux Kbuild +# try-cc +# Usage: option = $(call try-cc, source-to-build, cc-options) +try-cc = $(shell sh -c \ + 'TMP="$(BUILD_OUTPUT)$(TMPOUT).$$$$"; \ + echo "$(1)" | \ + $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \ + rm -f "$$TMP"') + +define SOURCE_PTRACE +#include <stdio.h> +#include <sys/ptrace.h> + +int main (void) +{ + int ret; + ret = ptrace(PTRACE_ATTACH, 0, NULL, 0); + ptrace(PTRACE_TRACEME, 0, NULL, 0); + ptrace(PTRACE_GETSIGINFO, 0, NULL, NULL); + ptrace(PTRACE_GETEVENTMSG, 0, NULL, NULL); + ptrace(PTRACE_SETOPTIONS, NULL, NULL, + PTRACE_O_TRACEFORK | + PTRACE_O_TRACEVFORK | + PTRACE_O_TRACECLONE | + PTRACE_O_TRACEEXIT); + ptrace(PTRACE_CONT, NULL, NULL, 0); + ptrace(PTRACE_DETACH, 0, NULL, NULL); + ptrace(PTRACE_SETOPTIONS, 0, NULL, + PTRACE_O_TRACEFORK | + PTRACE_O_TRACEVFORK | + PTRACE_O_TRACECLONE | + PTRACE_O_TRACEEXIT); + return ret; +} +endef + +define SOURCE_AUDIT +#include <stdio.h> +#include <libaudit.h> + +int main (void) +{ + char *name; + int ret; + ret = audit_detect_machine(); + if (ret < 0) + return ret; + name = audit_syscall_to_name(1, ret); + if (!name) + return -1; + return ret; +} +endef diff --git a/include/linux/time64.h b/include/linux/time64.h new file mode 100644 index 00000000..3961589e --- /dev/null +++ b/include/linux/time64.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _TOOLS_LINUX_TIME64_H +#define _TOOLS_LINUX_TIME64_H + +#define MSEC_PER_SEC 1000L +#define USEC_PER_MSEC 1000L +#define NSEC_PER_USEC 1000L +#define NSEC_PER_MSEC 1000000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000L +#define FSEC_PER_SEC 1000000000000000LL + +#endif /* _LINUX_TIME64_H */ diff --git a/include/trace-cmd/trace-cmd.h b/include/trace-cmd/trace-cmd.h new file mode 100644 index 00000000..5d71e8ba --- /dev/null +++ b/include/trace-cmd/trace-cmd.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#ifndef _TRACE_CMD_H +#define _TRACE_CMD_H + +#include "event-parse.h" +#include "tracefs.h" + +struct tracecmd_input; + +enum tracecmd_open_flags { + TRACECMD_FL_LOAD_NO_PLUGINS = 1 << 0, /* Do not load plugins */ + TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS = 1 << 1, /* Do not load system plugins */ +}; + +enum tracecmd_section_flags { + TRACECMD_SEC_FL_COMPRESS = 1 << 0, /* the section is compressed */ +}; + +struct tracecmd_input *tracecmd_open_head(const char *file, int flags); +struct tracecmd_input *tracecmd_open(const char *file, int flags); +struct tracecmd_input *tracecmd_open_fd(int fd, int flags); + +void tracecmd_close(struct tracecmd_input *handle); + +int tracecmd_init_data(struct tracecmd_input *handle); +struct tep_record * +tracecmd_read_cpu_first(struct tracecmd_input *handle, int cpu); +struct tep_record * +tracecmd_read_data(struct tracecmd_input *handle, int cpu); +struct tep_record * +tracecmd_read_at(struct tracecmd_input *handle, unsigned long long offset, + int *cpu); +void tracecmd_free_record(struct tep_record *record); + +struct tep_handle *tracecmd_get_tep(struct tracecmd_input *handle); +unsigned long long tracecmd_get_traceid(struct tracecmd_input *handle); +int tracecmd_get_guest_cpumap(struct tracecmd_input *handle, + unsigned long long trace_id, + const char **name, + int *vcpu_count, const int **cpu_pid); +unsigned long long tracecmd_get_first_ts(struct tracecmd_input *handle); +void tracecmd_add_ts_offset(struct tracecmd_input *handle, long long offset); +int tracecmd_buffer_instances(struct tracecmd_input *handle); +const char *tracecmd_buffer_instance_name(struct tracecmd_input *handle, int indx); +struct tracecmd_input *tracecmd_buffer_instance_handle(struct tracecmd_input *handle, int indx); + +void tracecmd_set_loglevel(enum tep_loglevel level); + +#endif /* _TRACE_CMD_H */ diff --git a/include/version.h b/include/version.h new file mode 100644 index 00000000..fcf7ba02 --- /dev/null +++ b/include/version.h @@ -0,0 +1,12 @@ +#ifndef _VERSION_H +#define _VERSION_H + +#define VERSION(a, b) (((a) << 8) + (b)) + +#ifdef BUILDGUI +#include "ks_version.h" +#else +#include "tc_version.h" +#endif + +#endif /* _VERSION_H */ diff --git a/lib/trace-cmd/Makefile b/lib/trace-cmd/Makefile new file mode 100644 index 00000000..9374b163 --- /dev/null +++ b/lib/trace-cmd/Makefile @@ -0,0 +1,129 @@ +# SPDX-License-Identifier: GPL-2.0 + +include $(src)/scripts/utils.mk + +bdir:=$(obj)/lib/trace-cmd +ldir:=$(src)/lib/trace-cmd + +DEFAULT_TARGET = $(LIBTRACECMD_STATIC) + +OBJS = +OBJS += trace-hash.o +OBJS += trace-hooks.o +OBJS += trace-input.o +OBJS += trace-output.o +OBJS += trace-recorder.o +OBJS += trace-util.o +OBJS += trace-filter-hash.o +OBJS += trace-msg.o +OBJS += trace-plugin.o +ifeq ($(PERF_DEFINED), 1) +OBJS += trace-perf.o +endif +OBJS += trace-timesync.o +OBJS += trace-timesync-ptp.o +ifeq ($(VSOCK_DEFINED), 1) +OBJS += trace-timesync-kvm.o +endif +OBJS += trace-compress.o +ifeq ($(ZLIB_INSTALLED), 1) +OBJS += trace-compress-zlib.o +endif +ifeq ($(ZSTD_INSTALLED), 1) +OBJS += trace-compress-zstd.o +endif + +# Additional util objects +OBJS += trace-blk-hack.o +OBJS += trace-ftrace.o + +OBJS := $(OBJS:%.o=$(bdir)/%.o) +DEPS := $(OBJS:$(bdir)/%.o=$(bdir)/.%.d) + +all: $(DEFAULT_TARGET) + +$(bdir): + @mkdir -p $(bdir) + +$(OBJS): | $(bdir) +$(DEPS): | $(bdir) + +$(LIBTRACECMD_STATIC): $(OBJS) + $(Q)$(call do_build_static_lib) + +LIBS = $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) $(LIBZSTD_LDLAGS) -lpthread + +ifeq ($(ZLIB_INSTALLED), 1) +LIBS += -lz +endif + +$(LIBTRACECMD_SHARED_VERSION): $(LIBTRACECMD_SHARED) + @ln -sf $(<F) $@ + +$(LIBTRACECMD_SHARED_SO): $(LIBTRACECMD_SHARED_VERSION) + @ln -sf $(<F) $@ + +libtracecmd.so: force $(LIBTRACECMD_SHARED_SO) + +$(LIBTRACECMD_SHARED): $(OBJS) + $(Q)$(call do_compile_shared_library,$(notdir $(LIBTRACECMD_SHARED_VERSION))) + +$(bdir)/%.o: %.c + $(Q)$(call do_fpic_compile) + +$(DEPS): $(bdir)/.%.d: %.c + $(Q)$(CC) -M -MT $(bdir)/$*.o $(CPPFLAGS) $(CFLAGS) $< > $@ + +$(OBJS): $(bdir)/%.o : $(bdir)/.%.d + +ifeq ("$(DESTDIR)", "") +# If DESTDIR is not defined, then test if after installing the library +# and running ldconfig, if the library is visible by ld.so. +# If not, add the path to /etc/ld.so.conf.d/trace.conf and run ldconfig again. +define install_ld_config + if $(LDCONFIG); then \ + if ! grep -q "^$(libdir)$$" $(LD_SO_CONF_PATH)/* ; then \ + echo here;\ + $(CC) -o $(bdir)/test $(ldir)/test.c -I $(includedir_SQ) \ + -L $(libdir_SQ) -ltracecmd &> /dev/null; \ + if ! $(bdir)/test &> /dev/null; then \ + $(call print_install,trace.conf,$(LD_SO_CONF_PATH)) \ + echo $(libdir_SQ) >> $(LD_SO_CONF_PATH)/trace.conf; \ + $(LDCONFIG); \ + fi; \ + $(RM) $(bdir)/test; \ + fi; \ + fi +endef +else +# If installing to a location for another machine or package, do not bother +# with running ldconfig. +define install_ld_config +endef +endif # DESTDIR = "" + +install_pkgconfig: $(PKG_CONFIG_FILE) + $(Q)$(call do_install_pkgconfig_file,$(prefix)) + +install_libs: install_pkgconfig + $(Q)$(call do_install,$(LIBTRACECMD_SHARED),$(libdir_SQ)) + $(Q)$(call print_install,$(LIBTRACECMD_SHARED_VERSION),$(DESTDIR)$(libdir_SQ)) + $(Q)cp -fpR $(LIBTRACECMD_SHARED_VERSION) $(DESTDIR)$(libdir_SQ) + $(Q)$(call print_install,$(LIBTRACECMD_SHARED_SO),$(DESTDIR)$(libdir_SQ)) + $(Q)cp -fpR $(LIBTRACECMD_SHARED_SO) $(DESTDIR)$(libdir_SQ) + $(Q)$(call do_install,$(src)/include/trace-cmd/trace-cmd.h,$(includedir_SQ)/trace-cmd,644) + $(Q)$(call install_ld_config) + +dep_includes := $(wildcard $(DEPS)) + +ifneq ($(dep_includes),) + include $(dep_includes) +endif + +clean: + $(RM) $(bdir)/*.a $(bdir)/*.so $(bdir)/*.so.* $(bdir)/*.o $(bdir)/.*.d + +.PHONY: clean + +PHONY += force +force: diff --git a/lib/trace-cmd/include/private/trace-cmd-private.h b/lib/trace-cmd/include/private/trace-cmd-private.h new file mode 100644 index 00000000..3cc3e9dd --- /dev/null +++ b/lib/trace-cmd/include/private/trace-cmd-private.h @@ -0,0 +1,635 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#ifndef _TRACE_CMD_PRIVATE_H +#define _TRACE_CMD_PRIVATE_H + +#include <fcntl.h> /* for iovec */ +#include <sys/types.h> +#include "event-parse.h" +#include "trace-cmd/trace-cmd.h" + +#define __packed __attribute__((packed)) +#define __hidden __attribute__((visibility ("hidden"))) + +#define TRACECMD_MAGIC { 23, 8, 68 } + +#define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0])) +#define __weak __attribute__((weak)) +#define __noreturn __attribute__((noreturn)) + +#define TRACECMD_ERR_MSK ((unsigned long)(-1) & ~((1UL << 14) - 1)) +#define TRACECMD_ISERR(ptr) ((unsigned long)(ptr) > TRACECMD_ERR_MSK) +#define TRACECMD_ERROR(ret) ((void *)((unsigned long)(ret) | TRACECMD_ERR_MSK)) +#define TRACECMD_PTR2ERR(ptr) ((unisgned long)(ptr) & ~TRACECMD_ERR_MSK) + +#define TSCNSEC_CLOCK "tsc2nsec" + +struct tep_plugin_list *trace_load_plugins(struct tep_handle *tep, int flags); + +int *tracecmd_add_id(int *list, int id, int len); + +#define FILE_VERSION_MIN 6 +#define FILE_VERSION_MAX 7 + +#define FILE_VERSION_SECTIONS 7 +#define FILE_VERSION_COMPRESSION 7 + +enum { + RINGBUF_TYPE_PADDING = 29, + RINGBUF_TYPE_TIME_EXTEND = 30, + RINGBUF_TYPE_TIME_STAMP = 31, +}; + +/* Can be overridden */ +void tracecmd_debug(const char *fmt, ...); + +void tracecmd_record_ref(struct tep_record *record); + +void tracecmd_set_debug(bool set_debug); +bool tracecmd_get_debug(void); + +bool tracecmd_is_version_supported(unsigned int version); +int tracecmd_default_file_version(void); + +struct tracecmd_output; +struct tracecmd_recorder; +struct hook_list; + +/* --- tracecmd plugins --- */ + +enum tracecmd_context { + TRACECMD_INPUT, + TRACECMD_OUTPUT, +}; + +enum tracecmd_plugin_flag { + TRACECMD_DISABLE_SYS_PLUGINS = 1, + TRACECMD_DISABLE_PLUGINS = 1 << 1, +}; + +struct trace_plugin_context; + +struct trace_plugin_context * +tracecmd_plugin_context_create(enum tracecmd_context context, void *data); + +void tracecmd_plugin_set_flag(struct trace_plugin_context *context, + enum tracecmd_plugin_flag flag); + +#define TRACECMD_PLUGIN_LOADER tracecmd_plugin_loader +#define TRACECMD_PLUGIN_UNLOADER tracecmd_plugin_unloader +#define TRACECMD_PLUGIN_ALIAS tracecmd_plugin_alias +#define _MAKE_STR(x) #x +#define MAKE_STR(x) _MAKE_STR(x) +#define TRACECMD_PLUGIN_LOADER_NAME MAKE_STR(TRACECMD_PLUGIN_LOADER) +#define TRACECMD_PLUGIN_UNLOADER_NAME MAKE_STR(TRACECMD_PLUGIN_UNLOADER) +#define TRACECMD_PLUGIN_ALIAS_NAME MAKE_STR(TRACECMD_PLUGIN_ALIAS) + +typedef int (*tracecmd_plugin_load_func)(struct trace_plugin_context *trace); +typedef int (*tracecmd_plugin_unload_func)(struct trace_plugin_context *trace); + +struct tracecmd_input * +tracecmd_plugin_context_input(struct trace_plugin_context *trace_context); +struct tracecmd_output * +tracecmd_plugin_context_output(struct trace_plugin_context *trace_context); + +void tracecmd_set_quiet(struct tracecmd_output *handle, bool set_quiet); +bool tracecmd_get_quiet(struct tracecmd_output *handle); +void tracecmd_set_out_clock(struct tracecmd_output *handle, const char *clock); +const char *tracecmd_get_trace_clock(struct tracecmd_input *handle); + +const char *tracecmd_get_cpustats(struct tracecmd_input *handle); +const char *tracecmd_get_uname(struct tracecmd_input *handle); +const char *tracecmd_get_version(struct tracecmd_input *handle); +off64_t tracecmd_get_cpu_file_size(struct tracecmd_input *handle, int cpu); + +static inline int tracecmd_host_bigendian(void) +{ + unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 }; + unsigned int *ptr; + + ptr = (unsigned int *)str; + return *ptr == 0x01020304; +} + +/* --- Opening and Reading the trace.dat file --- */ + +enum tracecmd_file_states { + TRACECMD_FILE_ALLOCATED = 0, + TRACECMD_FILE_INIT, + TRACECMD_FILE_HEADERS, + TRACECMD_FILE_FTRACE_EVENTS, + TRACECMD_FILE_ALL_EVENTS, + TRACECMD_FILE_KALLSYMS, + TRACECMD_FILE_PRINTK, + TRACECMD_FILE_CMD_LINES, + TRACECMD_FILE_CPU_COUNT, + TRACECMD_FILE_OPTIONS, + TRACECMD_FILE_CPU_LATENCY, + TRACECMD_FILE_CPU_FLYRECORD, +}; + +enum { + TRACECMD_OPTION_DONE, + TRACECMD_OPTION_DATE, + TRACECMD_OPTION_CPUSTAT, + TRACECMD_OPTION_BUFFER, + TRACECMD_OPTION_TRACECLOCK, + TRACECMD_OPTION_UNAME, + TRACECMD_OPTION_HOOK, + TRACECMD_OPTION_OFFSET, + TRACECMD_OPTION_CPUCOUNT, + TRACECMD_OPTION_VERSION, + TRACECMD_OPTION_PROCMAPS, + TRACECMD_OPTION_TRACEID, + TRACECMD_OPTION_TIME_SHIFT, + TRACECMD_OPTION_GUEST, + TRACECMD_OPTION_TSC2NSEC, + TRACECMD_OPTION_STRINGS, + TRACECMD_OPTION_HEADER_INFO, + TRACECMD_OPTION_FTRACE_EVENTS, + TRACECMD_OPTION_EVENT_FORMATS, + TRACECMD_OPTION_KALLSYMS, + TRACECMD_OPTION_PRINTK, + TRACECMD_OPTION_CMDLINES, + TRACECMD_OPTION_BUFFER_TEXT, + TRACECMD_OPTION_MAX, +}; + +enum { + TRACECMD_FL_IGNORE_DATE = (1 << 0), + TRACECMD_FL_BUFFER_INSTANCE = (1 << 1), + TRACECMD_FL_IN_USECS = (1 << 2), + TRACECMD_FL_RAW_TS = (1 << 3), + TRACECMD_FL_SECTIONED = (1 << 4), + TRACECMD_FL_COMPRESSION = (1 << 5), +}; + +struct tracecmd_ftrace { + struct tracecmd_input *handle; + struct tep_event *fgraph_ret_event; + int fgraph_ret_id; + int long_size; +}; + +struct tracecmd_proc_addr_map { + unsigned long long start; + unsigned long long end; + char *lib_name; +}; + +typedef void (*tracecmd_show_data_func)(struct tracecmd_input *handle, + struct tep_record *record); +typedef void (*tracecmd_handle_init_func)(struct tracecmd_input *handle, + struct hook_list *hook, int global); + +struct tracecmd_input *tracecmd_alloc(const char *file, int flags); +struct tracecmd_input *tracecmd_alloc_fd(int fd, int flags); +void tracecmd_ref(struct tracecmd_input *handle); +int tracecmd_read_headers(struct tracecmd_input *handle, + enum tracecmd_file_states state); +int tracecmd_get_parsing_failures(struct tracecmd_input *handle); +int tracecmd_long_size(struct tracecmd_input *handle); +int tracecmd_page_size(struct tracecmd_input *handle); +int tracecmd_cpus(struct tracecmd_input *handle); +int tracecmd_copy_headers(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle, + enum tracecmd_file_states start_state, + enum tracecmd_file_states end_state); +int tracecmd_copy_buffer_descr(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle); +int tracecmd_copy_options(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle); +int tracecmd_copy_trace_data(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle); +void tracecmd_set_flag(struct tracecmd_input *handle, int flag); +void tracecmd_clear_flag(struct tracecmd_input *handle, int flag); +unsigned long tracecmd_get_flags(struct tracecmd_input *handle); +enum tracecmd_file_states tracecmd_get_file_state(struct tracecmd_input *handle); +int tracecmd_enable_tsync(struct tracecmd_input *handle, bool enable); + +void tracecmd_parse_trace_clock(struct tracecmd_input *handle, char *file, int size); + +int tracecmd_make_pipe(struct tracecmd_input *handle, int cpu, int fd, int cpus); + +int tracecmd_is_buffer_instance(struct tracecmd_input *handle); + +void tracecmd_set_ts_offset(struct tracecmd_input *handle, long long offset); +void tracecmd_set_ts2secs(struct tracecmd_input *handle, unsigned long long hz); + +void tracecmd_print_events(struct tracecmd_input *handle, const char *regex); + +struct hook_list *tracecmd_hooks(struct tracecmd_input *handle); + +void tracecmd_print_stats(struct tracecmd_input *handle); +void tracecmd_print_uname(struct tracecmd_input *handle); +void tracecmd_print_version(struct tracecmd_input *handle); + +struct tep_record * +tracecmd_peek_data(struct tracecmd_input *handle, int cpu); + +static inline struct tep_record * +tracecmd_peek_data_ref(struct tracecmd_input *handle, int cpu) +{ + struct tep_record *rec = tracecmd_peek_data(handle, cpu); + if (rec) + rec->ref_count++; + return rec; +} + +int tracecmd_latency_data_read(struct tracecmd_input *handle, char **buf, size_t *size); + +struct tep_record * +tracecmd_read_prev(struct tracecmd_input *handle, struct tep_record *record); + +struct tep_record * +tracecmd_read_next_data(struct tracecmd_input *handle, int *rec_cpu); + +struct tep_record * +tracecmd_peek_next_data(struct tracecmd_input *handle, int *rec_cpu); + +struct tep_record * +tracecmd_translate_data(struct tracecmd_input *handle, + void *ptr, int size); +struct tep_record * +tracecmd_read_cpu_last(struct tracecmd_input *handle, int cpu); +int tracecmd_refresh_record(struct tracecmd_input *handle, + struct tep_record *record); + +int tracecmd_set_cpu_to_timestamp(struct tracecmd_input *handle, + int cpu, unsigned long long ts); +void +tracecmd_set_all_cpus_to_timestamp(struct tracecmd_input *handle, + unsigned long long time); + +int tracecmd_set_cursor(struct tracecmd_input *handle, + int cpu, unsigned long long offset); +unsigned long long +tracecmd_get_cursor(struct tracecmd_input *handle, int cpu); + +unsigned long tracecmd_get_in_file_version(struct tracecmd_input *handle); +size_t tracecmd_get_options_offset(struct tracecmd_input *handle); +int tracecmd_get_file_compress_proto(struct tracecmd_input *handle, + const char **name, const char **version); + +int tracecmd_ftrace_overrides(struct tracecmd_input *handle, struct tracecmd_ftrace *finfo); +bool tracecmd_get_use_trace_clock(struct tracecmd_input *handle); +tracecmd_show_data_func +tracecmd_get_show_data_func(struct tracecmd_input *handle); +void tracecmd_set_show_data_func(struct tracecmd_input *handle, + tracecmd_show_data_func func); + +int tracecmd_record_at_buffer_start(struct tracecmd_input *handle, struct tep_record *record); +unsigned long long tracecmd_page_ts(struct tracecmd_input *handle, + struct tep_record *record); +unsigned int tracecmd_record_ts_delta(struct tracecmd_input *handle, + struct tep_record *record); + +struct tracecmd_proc_addr_map * +tracecmd_search_task_map(struct tracecmd_input *handle, + int pid, unsigned long long addr); +#ifndef SWIG +/* hack for function graph work around */ +extern __thread struct tracecmd_input *tracecmd_curr_thread_handle; +#endif + + +/* --- Creating and Writing the trace.dat file --- */ + +struct tracecmd_event_list { + struct tracecmd_event_list *next; + const char *glob; +}; + +struct tracecmd_option; +struct tracecmd_msg_handle; + +int tracecmd_output_set_msg(struct tracecmd_output *handle, + struct tracecmd_msg_handle *msg_handle); +int tracecmd_output_set_trace_dir(struct tracecmd_output *handle, const char *tracing_dir); +int tracecmd_output_set_kallsyms(struct tracecmd_output *handle, const char *kallsyms); +int tracecmd_output_set_from_input(struct tracecmd_output *handle, struct tracecmd_input *ihandle); +int tracecmd_output_set_version(struct tracecmd_output *handle, int file_version); +int tracecmd_output_set_compression(struct tracecmd_output *handle, const char *compression); +int tracecmd_output_write_headers(struct tracecmd_output *handle, + struct tracecmd_event_list *list); + +struct tracecmd_output *tracecmd_output_create(const char *output_file); +struct tracecmd_output *tracecmd_output_create_fd(int fd); +struct tracecmd_output *tracecmd_create_file_latency(const char *output_file, int cpus, + int file_version, const char *compression); + +struct tracecmd_option *tracecmd_add_option(struct tracecmd_output *handle, + unsigned short id, int size, + const void *data); +struct tracecmd_option * +tracecmd_add_option_v(struct tracecmd_output *handle, + unsigned short id, const struct iovec *vector, int count); + +int tracecmd_add_buffer_info(struct tracecmd_output *handle, const char *name, int cpus); +int tracecmd_write_buffer_info(struct tracecmd_output *handle); + +int tracecmd_write_cpus(struct tracecmd_output *handle, int cpus); +int tracecmd_write_cmdlines(struct tracecmd_output *handle); +int tracecmd_write_options(struct tracecmd_output *handle); +int tracecmd_write_meta_strings(struct tracecmd_output *handle); +int tracecmd_append_options(struct tracecmd_output *handle); +void tracecmd_output_close(struct tracecmd_output *handle); +void tracecmd_output_free(struct tracecmd_output *handle); +struct tracecmd_output *tracecmd_copy(struct tracecmd_input *ihandle, const char *file, + enum tracecmd_file_states state, int file_version, + const char *compression); + +int tracecmd_write_cpu_data(struct tracecmd_output *handle, + int cpus, char * const *cpu_data_files, const char *buff_name); +int tracecmd_append_cpu_data(struct tracecmd_output *handle, + int cpus, char * const *cpu_data_files); +int tracecmd_append_buffer_cpu_data(struct tracecmd_output *handle, + const char *name, int cpus, char * const *cpu_data_files); +struct tracecmd_output *tracecmd_get_output_handle_fd(int fd); +unsigned long tracecmd_get_out_file_version(struct tracecmd_output *handle); +unsigned long long tracecmd_get_out_file_offset(struct tracecmd_output *handle); + +/* --- Reading the Fly Recorder Trace --- */ + +enum { + TRACECMD_RECORD_NOSPLICE = (1 << 0), /* Use read instead of splice */ + TRACECMD_RECORD_SNAPSHOT = (1 << 1), /* Extract from snapshot */ + TRACECMD_RECORD_BLOCK_SPLICE = (1 << 2), /* Block on splice write */ + TRACECMD_RECORD_NOBRASS = (1 << 3), /* Splice directly without a brass pipe */ + TRACECMD_RECORD_POLL = (1 << 4), /* Use O_NONBLOCK, poll trace buffers */ +}; + +void tracecmd_free_recorder(struct tracecmd_recorder *recorder); +struct tracecmd_recorder *tracecmd_create_recorder(const char *file, int cpu, unsigned flags); +struct tracecmd_recorder *tracecmd_create_recorder_fd(int fd, int cpu, unsigned flags); +struct tracecmd_recorder *tracecmd_create_recorder_virt(const char *file, int cpu, unsigned flags, int trace_fd); +struct tracecmd_recorder *tracecmd_create_recorder_maxkb(const char *file, int cpu, unsigned flags, int maxkb); +struct tracecmd_recorder *tracecmd_create_buffer_recorder_fd(int fd, int cpu, unsigned flags, const char *buffer); +struct tracecmd_recorder *tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags, const char *buffer); +struct tracecmd_recorder *tracecmd_create_buffer_recorder_maxkb(const char *file, int cpu, unsigned flags, const char *buffer, int maxkb); + +int tracecmd_start_recording(struct tracecmd_recorder *recorder, unsigned long sleep); +void tracecmd_stop_recording(struct tracecmd_recorder *recorder); +long tracecmd_flush_recording(struct tracecmd_recorder *recorder); + +enum tracecmd_msg_flags { + TRACECMD_MSG_FL_USE_TCP = 1 << 0, + TRACECMD_MSG_FL_USE_VSOCK = 1 << 1, +}; + +/* for both client and server */ +#ifdef __ANDROID__ +#define MSG_CACHE_FILE "/data/local/tmp/trace_msg_cacheXXXXXX" +#else /* !__ANDROID__ */ +#define MSG_CACHE_FILE "/tmp/trace_msg_cacheXXXXXX" +#endif /* __ANDROID__ */ + +struct tracecmd_msg_handle { + int fd; + short cpu_count; + short version; /* Current protocol version */ + unsigned long flags; + bool done; + bool cache; + int cfd; + char cfile[sizeof(MSG_CACHE_FILE)]; +}; + +struct tracecmd_tsync_protos { + char **names; +}; + +struct tracecmd_msg_handle * +tracecmd_msg_handle_alloc(int fd, unsigned long flags); +int tracecmd_msg_handle_cache(struct tracecmd_msg_handle *msg_handle); + +/* Closes the socket and frees the handle */ +void tracecmd_msg_handle_close(struct tracecmd_msg_handle *msg_handle); + +/* for clients */ +int tracecmd_msg_send_init_data(struct tracecmd_msg_handle *msg_handle, + unsigned int **client_ports); +int tracecmd_msg_data_send(struct tracecmd_msg_handle *msg_handle, + const char *buf, int size); +int tracecmd_msg_finish_sending_data(struct tracecmd_msg_handle *msg_handle); +int tracecmd_msg_send_close_msg(struct tracecmd_msg_handle *msg_handle); +int tracecmd_msg_send_close_resp_msg(struct tracecmd_msg_handle *msg_handle); +int tracecmd_msg_wait_close(struct tracecmd_msg_handle *msg_handle); +int tracecmd_msg_wait_close_resp(struct tracecmd_msg_handle *msg_handle); + +/* for server */ +int tracecmd_msg_initial_setting(struct tracecmd_msg_handle *msg_handle); +int tracecmd_msg_send_port_array(struct tracecmd_msg_handle *msg_handle, + unsigned *ports); +int tracecmd_msg_read_data(struct tracecmd_msg_handle *msg_handle, int ofd); +int tracecmd_msg_collect_data(struct tracecmd_msg_handle *msg_handle, int ofd); +bool tracecmd_msg_done(struct tracecmd_msg_handle *msg_handle); +void tracecmd_msg_set_done(struct tracecmd_msg_handle *msg_handle); + +int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle, + int argc, char **argv, bool use_fifos, + unsigned long long trace_id, + struct tracecmd_tsync_protos *protos); +int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle, + int *argc, char ***argv, bool *use_fifos, + unsigned long long *trace_id, + struct tracecmd_tsync_protos **protos); + +int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle, + int nr_cpus, int page_size, + unsigned int *ports, bool use_fifos, + unsigned long long trace_id, + const char *tsync_proto, unsigned int tsync_port); +int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle, + int *nr_cpus, int *page_size, + unsigned int **ports, bool *use_fifos, + unsigned long long *trace_id, + char **tsync_proto, + unsigned int *tsync_port); + +int tracecmd_msg_send_time_sync(struct tracecmd_msg_handle *msg_handle, + char *sync_protocol, unsigned int sync_msg_id, + unsigned int payload_size, char *payload); +int tracecmd_msg_recv_time_sync(struct tracecmd_msg_handle *msg_handle, + char *sync_protocol, + unsigned int *sync_msg_id, + unsigned int *payload_size, char **payload); + +enum tracecmd_clocks { + TRACECMD_CLOCK_UNKNOWN = 0, + TRACECMD_CLOCK_LOCAL = 1, + TRACECMD_CLOCK_GLOBAL = 1 << 1, + TRACECMD_CLOCK_COUNTER = 1 << 2, + TRACECMD_CLOCK_UPTIME = 1 << 3, + TRACECMD_CLOCK_PERF = 1 << 4, + TRACECMD_CLOCK_MONO = 1 << 5, + TRACECMD_CLOCK_MONO_RAW = 1 << 6, + TRACECMD_CLOCK_BOOT = 1 << 7, + TRACECMD_CLOCK_X86_TSC = 1 << 8 +}; + +enum tracecmd_clocks tracecmd_clock_str2id(const char *clock); +const char *tracecmd_clock_id2str(enum tracecmd_clocks clock); + +/* --- Timestamp synchronization --- */ + +struct tracecmd_time_sync; +#define TRACECMD_TSYNC_PNAME_LENGTH 16 +#define TRACECMD_TSYNC_PROTO_NONE "none" + +enum{ + TRACECMD_TIME_SYNC_CMD_PROBE = 1, + TRACECMD_TIME_SYNC_CMD_STOP = 2, +}; + +enum tracecmd_time_sync_role { + TRACECMD_TIME_SYNC_ROLE_HOST = (1 << 0), + TRACECMD_TIME_SYNC_ROLE_GUEST = (1 << 1), + TRACECMD_TIME_SYNC_ROLE_CLIENT = (1 << 2), + TRACECMD_TIME_SYNC_ROLE_SERVER = (1 << 3), +}; + +/* Timestamp synchronization flags */ +#define TRACECMD_TSYNC_FLAG_INTERPOLATE 0x1 + +void tracecmd_tsync_init(void); +int tracecmd_tsync_proto_getall(struct tracecmd_tsync_protos **protos, const char *clock, int role); +bool tsync_proto_is_supported(const char *proto_name); +struct tracecmd_time_sync * +tracecmd_tsync_with_host(int fd, + const struct tracecmd_tsync_protos *tsync_protos, + const char *clock, int remote_id, int local_id); +int tracecmd_tsync_with_host_stop(struct tracecmd_time_sync *tsync); +struct tracecmd_time_sync * +tracecmd_tsync_with_guest(unsigned long long trace_id, int loop_interval, + unsigned int fd, int guest_pid, + int guest_cpus, const char *proto_name, const char *clock); +int tracecmd_tsync_with_guest_stop(struct tracecmd_time_sync *tsync); +int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu, + int *count, long long **ts, + long long **offsets, long long **scalings, long long **frac); +int tracecmd_tsync_get_selected_proto(struct tracecmd_time_sync *tsync, + char **selected_proto); +void tracecmd_tsync_free(struct tracecmd_time_sync *tsync); +int tracecmd_write_guest_time_shift(struct tracecmd_output *handle, + struct tracecmd_time_sync *tsync); + +/* --- Compression --- */ +struct tracecmd_compress_chunk { + unsigned int size; + unsigned int zsize; + off64_t zoffset; + off64_t offset; +}; +struct tracecmd_compression; +struct tracecmd_compression_proto { + int weight; + const char *name; + const char *version; + int (*compress)(void *ctx, const void *in, int in_bytes, void *out, int out_bytes); + int (*uncompress)(void *ctx, const void *in, int in_bytes, void *out, int out_bytes); + unsigned int (*compress_size)(void *ctx, unsigned int bytes); + bool (*is_supported)(const char *name, const char *version); + void *(*new_context)(void); + void (*free_context)(void *ctx); +}; + +struct tracecmd_compression *tracecmd_compress_alloc(const char *name, const char *version, + int fd, struct tep_handle *tep, + struct tracecmd_msg_handle *msg_handle); +void tracecmd_compress_destroy(struct tracecmd_compression *handle); +int tracecmd_compress_block(struct tracecmd_compression *handle); +int tracecmd_uncompress_block(struct tracecmd_compression *handle); +void tracecmd_compress_reset(struct tracecmd_compression *handle); +int tracecmd_compress_buffer_read(struct tracecmd_compression *handle, char *dst, int len); +int tracecmd_compress_pread(struct tracecmd_compression *handle, char *dst, int len, off_t offset); +int tracecmd_compress_buffer_write(struct tracecmd_compression *handle, + const void *data, unsigned long long size); +off64_t tracecmd_compress_lseek(struct tracecmd_compression *handle, off64_t offset, int whence); +int tracecmd_compress_proto_get_name(struct tracecmd_compression *compress, + const char **name, const char **version); +bool tracecmd_compress_is_supported(const char *name, const char *version); +int tracecmd_compress_protos_get(char ***names, char ***versions); +int tracecmd_compress_proto_register(struct tracecmd_compression_proto *proto); +int tracecmd_compress_copy_from(struct tracecmd_compression *handle, int fd, int chunk_size, + unsigned long long *read_size, unsigned long long *write_size); +int tracecmd_uncompress_copy_to(struct tracecmd_compression *handle, int fd, + unsigned long long *read_size, unsigned long long *write_size); +int tracecmd_uncompress_chunk(struct tracecmd_compression *handle, + struct tracecmd_compress_chunk *chunk, char *data); +int tracecmd_load_chunks_info(struct tracecmd_compression *handle, + struct tracecmd_compress_chunk **chunks_info); +/* --- Plugin handling --- */ +extern struct tep_plugin_option trace_ftrace_options[]; + +char **trace_util_find_plugin_files(const char *suffix); +void trace_util_free_plugin_files(char **files); + +/* Used for trace-cmd list */ +void tracecmd_ftrace_load_options(void); + +/* event hooks */ + +struct hook_list { + struct hook_list *next; + struct buffer_instance *instance; + const char *hook; + char *str; + char *start_system; + char *start_event; + char *start_match; + char *end_system; + char *end_event; + char *end_match; + char *pid; + int migrate; + int global; + int stack; +}; + +struct hook_list *tracecmd_create_event_hook(const char *arg); +void tracecmd_free_hooks(struct hook_list *hooks); + +void tracecmd_plog(const char *fmt, ...); +void tracecmd_plog_error(const char *fmt, ...); +int tracecmd_set_logfile(char *logfile); + +/* --- System --- */ +unsigned long long tracecmd_generate_traceid(void); +int tracecmd_count_cpus(void); + +/* --- Hack! --- */ +int tracecmd_blk_hack(struct tracecmd_input *handle); + +/* --- Stack tracer functions --- */ +int tracecmd_stack_tracer_status(int *status); + +/* --- Debugging --- */ +struct kbuffer *tracecmd_record_kbuf(struct tracecmd_input *handle, + struct tep_record *record); +void *tracecmd_record_page(struct tracecmd_input *handle, + struct tep_record *record); +void *tracecmd_record_offset(struct tracecmd_input *handle, + struct tep_record *record); +#ifdef PERF + +#include <linux/perf_event.h> + +/* trace-cmd Perf */ +struct trace_perf { + int fd; + int cpu; + int pid; + int pages; + struct perf_event_attr pe; + struct perf_event_mmap_page *mmap; +}; +int trace_perf_init(struct trace_perf *perf, int pages, int cpu, int pid); +int trace_perf_open(struct trace_perf *perf); +void trace_perf_close(struct trace_perf *perf); +#endif + +#endif /* _TRACE_CMD_PRIVATE_H */ diff --git a/lib/trace-cmd/include/private/trace-filter-hash.h b/lib/trace-cmd/include/private/trace-filter-hash.h new file mode 100644 index 00000000..4111c41e --- /dev/null +++ b/lib/trace-cmd/include/private/trace-filter-hash.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * Copyright (C) 2018 VMware Inc, Steven Rostedt <rostedt@goodmis.org> + * + */ +#ifndef _TRACE_FILTER_HASH_H +#define _TRACE_FILTER_HASH_H + +#include <stdint.h> + +struct tracecmd_filter_id_item { + struct tracecmd_filter_id_item *next; + int id; +}; + +struct tracecmd_filter_id { + struct tracecmd_filter_id_item **hash; + int count; +}; + +/** + * tracecmd_quick_hash - A quick (non secured) hash alogirthm + * @val: The value to perform the hash on + * @bits: The size in bits you need to return + * + * This is a quick hashing function adapted from Donald E. Knuth's 32 + * bit multiplicative hash. See The Art of Computer Programming (TAOCP). + * Multiplication by the Prime number, closest to the golden ratio of + * 2^32. + * + * @bits is used to max the result for use cases that require + * a power of 2 return value that is less than 32 bits. Any value + * of @bits greater than 31 (or zero), will simply return the full hash on @val. + */ +static inline uint32_t tracecmd_quick_hash(uint32_t val, unsigned int bits) +{ + val *= UINT32_C(2654435761); + + if (!bits || bits > 31) + return val; + + return val & ((1 << bits) - 1); +} + +struct tracecmd_filter_id_item * + tracecmd_filter_id_find(struct tracecmd_filter_id *hash, int id); +void tracecmd_filter_id_add(struct tracecmd_filter_id *hash, int id); +void tracecmd_filter_id_remove(struct tracecmd_filter_id *hash, int id); +void tracecmd_filter_id_clear(struct tracecmd_filter_id *hash); +struct tracecmd_filter_id *tracecmd_filter_id_hash_alloc(void); +void tracecmd_filter_id_hash_free(struct tracecmd_filter_id *hash); +struct tracecmd_filter_id * + tracecmd_filter_id_hash_copy(struct tracecmd_filter_id *hash); +int *tracecmd_filter_ids(struct tracecmd_filter_id *hash); +int tracecmd_filter_id_compare(struct tracecmd_filter_id *hash1, + struct tracecmd_filter_id *hash2); + +static inline int tracecmd_filter_task_count(struct tracecmd_filter_id *hash) +{ + return hash->count; +} + +#endif /* _TRACE_FILTER_HASH_H */ diff --git a/lib/trace-cmd/include/private/trace-hash.h b/lib/trace-cmd/include/private/trace-hash.h new file mode 100644 index 00000000..aa92cdfe --- /dev/null +++ b/lib/trace-cmd/include/private/trace-hash.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#ifndef _TRACE_HASH_H +#define _TRACE_HASH_H + +struct trace_hash_item { + struct trace_hash_item *next; + struct trace_hash_item *prev; + unsigned long long key; +}; + +struct trace_hash { + struct trace_hash_item **buckets; + int nr_buckets; + int power; +}; + +int trace_hash_init(struct trace_hash *hash, int buckets); +void trace_hash_free(struct trace_hash *hash); +int trace_hash_add(struct trace_hash *hash, struct trace_hash_item *item); +int trace_hash_empty(struct trace_hash *hash); + +static inline void trace_hash_del(struct trace_hash_item *item) +{ + struct trace_hash_item *prev = item->prev; + + prev->next = item->next; + if (item->next) + item->next->prev = prev; +} + +#define trace_hash_for_each_bucket(bucket, hash) \ + for (bucket = (hash)->buckets; \ + (bucket) < (hash)->buckets + (hash)->nr_buckets; (bucket)++) + +#define trace_hash_for_each_item(item, bucket) \ + for ((item = *(bucket)); item; item = (item)->next) + +#define trace_hash_for_each_item_safe(item, n, bucket) \ + for ((item = *(bucket)), n = item ? item->next : NULL; item; \ + item = n, n = item ? (item)->next : NULL) + +#define trace_hash_while_item(item, bucket) \ + while ((item = *(bucket))) + +typedef int (*trace_hash_func)(struct trace_hash_item *item, void *data); + +struct trace_hash_item * +trace_hash_find(struct trace_hash *hash, unsigned long long key, + trace_hash_func match, void *data); + +#endif /* _TRACE_HASH_H */ diff --git a/lib/trace-cmd/include/private/trace-msg.h b/lib/trace-cmd/include/private/trace-msg.h new file mode 100644 index 00000000..cfcf9615 --- /dev/null +++ b/lib/trace-cmd/include/private/trace-msg.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +#ifndef _TRACE_MSG_H_ +#define _TRACE_MSG_H_ + +#include <stdbool.h> + +#define UDP_MAX_PACKET (65536 - 20) +#define V3_MAGIC "766679\0" +#define V3_CPU "-1V3" + +#define V1_PROTOCOL 1 +#define V3_PROTOCOL 3 + +extern unsigned int page_size; + +#endif /* _TRACE_MSG_H_ */ diff --git a/lib/trace-cmd/include/trace-cmd-local.h b/lib/trace-cmd/include/trace-cmd-local.h new file mode 100644 index 00000000..6ac34137 --- /dev/null +++ b/lib/trace-cmd/include/trace-cmd-local.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#ifndef _TRACE_CMD_LOCAL_H +#define _TRACE_CMD_LOCAL_H + +#include <byteswap.h> +#include "trace-cmd-private.h" + +#define FILE_VERSION_DEFAULT 7 + +/* Can be overridden */ +void tracecmd_warning(const char *fmt, ...); +void tracecmd_critical(const char *fmt, ...); +void tracecmd_info(const char *fmt, ...); + +#ifndef htonll +# if __BYTE_ORDER == __LITTLE_ENDIAN +#define htonll(x) __bswap_64(x) +#define ntohll(x) __bswap_64(x) +#else +#define htonll(x) (x) +#define ntohll(x) (x) +#endif +#endif + +#ifdef HAVE_ZLIB +int tracecmd_zlib_init(void); +#endif + +#ifdef HAVE_ZSTD +int tracecmd_zstd_init(void); +#else +static inline int tracecmd_zstd_init(void) +{ + return 0; +} +#endif + +struct data_file_write { + unsigned long long file_size; + unsigned long long write_size; + /* offset in the trace file, where write_size is stored */ + unsigned long long file_write_size; + unsigned long long data_offset; + /* offset in the trace file, where data_offset is stored */ + unsigned long long file_data_offset; +}; + +void tracecmd_compress_init(void); +void tracecmd_compress_free(void); + +bool check_file_state(unsigned long file_version, int current_state, int new_state); +bool check_out_state(struct tracecmd_output *handle, int new_state); + +int out_uncompress_block(struct tracecmd_output *handle); +int out_compression_start(struct tracecmd_output *handle, bool compress); +int out_compression_end(struct tracecmd_output *handle, bool compress); +void out_compression_reset(struct tracecmd_output *handle, bool compress); +bool out_check_compression(struct tracecmd_output *handle); + +void out_set_file_state(struct tracecmd_output *handle, int new_state); +int out_save_options_offset(struct tracecmd_output *handle, + unsigned long long start); +unsigned long long out_copy_fd_compress(struct tracecmd_output *handle, + int fd, unsigned long long max, + unsigned long long *write_size, int page); +void in_uncompress_reset(struct tracecmd_input *handle); +int in_uncompress_block(struct tracecmd_input *handle); + +unsigned long long +out_write_section_header(struct tracecmd_output *handle, unsigned short header_id, + char *description, int flags, bool option); +int out_update_section_header(struct tracecmd_output *handle, unsigned long long offset); + +long long do_write_check(struct tracecmd_output *handle, const void *data, long long size); + +struct tracecmd_option * +out_add_buffer_option(struct tracecmd_output *handle, const char *name, + unsigned short id, unsigned long long data_offset, + int cpus, struct data_file_write *cpu_data, int page_size); + +struct cpu_data_source { + int fd; + int size; + off64_t offset; +}; + +int out_write_cpu_data(struct tracecmd_output *handle, int cpus, + struct cpu_data_source *data, const char *buff_name); +int out_write_emty_cpu_data(struct tracecmd_output *handle, int cpus); +off64_t msg_lseek(struct tracecmd_msg_handle *msg_handle, off64_t offset, int whence); +unsigned long long get_last_option_offset(struct tracecmd_input *handle); +unsigned int get_meta_strings_size(struct tracecmd_input *handle); + +#endif /* _TRACE_CMD_LOCAL_H */ diff --git a/lib/trace-cmd/include/trace-hash-local.h b/lib/trace-cmd/include/trace-hash-local.h new file mode 100644 index 00000000..70a0e76a --- /dev/null +++ b/lib/trace-cmd/include/trace-hash-local.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com> + * + */ +#ifndef _TRACE_HASH_LOCAL_H +#define _TRACE_HASH_LOCAL_H + +static inline unsigned int trace_hash(unsigned int val) +{ + unsigned int hash, tmp; + + hash = 12546869; /* random prime */ + + /* + * The following hash is based off of Paul Hsieh's super fast hash: + * http://www.azillionmonkeys.com/qed/hash.html + * Note, he released this code unde the GPL 2.0 license, which + * is the same as the license for the programs that use it here. + */ + + hash += (val & 0xffff); + tmp = (val >> 16) ^ hash; + hash = (hash << 16) ^ tmp; + hash += hash >> 11; + + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; +} + +static inline unsigned int trace_hash_str(char *str) +{ + int val = 0; + int i; + + for (i = 0; str[i]; i++) + val += ((int)str[i]) << (i & 0xf); + return trace_hash(val); +} +#endif /* _TRACE_HASH_LOCAL_H */ diff --git a/lib/trace-cmd/include/trace-tsync-local.h b/lib/trace-cmd/include/trace-tsync-local.h new file mode 100644 index 00000000..5bbc1db6 --- /dev/null +++ b/lib/trace-cmd/include/trace-tsync-local.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * Copyright (C) 2019, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com> + * + */ +#ifndef _TRACE_TSYNC_LOCAL_H +#define _TRACE_TSYNC_LOCAL_H + +#include <stdbool.h> + +struct tracecmd_time_sync { + pthread_t thread; + bool thread_running; + unsigned long long trace_id; + char *proto_name; + int loop_interval; + pthread_mutex_t lock; + pthread_cond_t cond; + pthread_barrier_t first_sync; + char *clock_str; + struct tracecmd_msg_handle *msg_handle; + void *context; + int guest_pid; + int vcpu_count; + int remote_id; + int local_id; +}; + +struct clock_sync_offsets { + /* Arrays with calculated time offsets at given time */ + int sync_size; /* Allocated size of sync_ts, + * sync_offsets, sync_scalings and sync_frac + */ + int sync_count; /* Number of elements in sync_ts, + * sync_offsets, sync_scalings and sync_frac + */ + long long *sync_ts; + long long *sync_offsets; + long long *sync_scalings; + long long *sync_frac; +}; + +struct clock_sync_context { + void *proto_data; /* time sync protocol specific data */ + bool is_server; /* server side time sync role */ + bool is_guest; /* guest or host time sync role */ + struct tracefs_instance *instance; /* ftrace buffer, used for time sync events */ + + int cpu_count; + struct clock_sync_offsets *offsets; /* Array of size cpu_count + * calculated offsets per CPU + */ + + /* Identifiers of local and remote time sync peers */ + unsigned int local_id; + unsigned int remote_id; +}; + +int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int roles, + int supported_clocks, unsigned int flags, + int (*init)(struct tracecmd_time_sync *), + int (*free)(struct tracecmd_time_sync *), + int (*calc)(struct tracecmd_time_sync *, + long long *, long long *, long long*, + long long *, unsigned int)); +int tracecmd_tsync_proto_unregister(char *proto_name); +int ptp_clock_sync_register(void); + +#ifdef VSOCK +int kvm_clock_sync_register(void); +#else +static inline int kvm_clock_sync_register(void) +{ + return 0; +} +#endif + +#endif /* _TRACE_TSYNC_LOCAL_H */ diff --git a/lib/trace-cmd/include/trace-write-local.h b/lib/trace-cmd/include/trace-write-local.h new file mode 100644 index 00000000..046992f1 --- /dev/null +++ b/lib/trace-cmd/include/trace-write-local.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#ifndef _TRACE_WRITE_LOCAL_H +#define _TRACE_WRITE_LOCAL_H + +/* Local for trace-input.c, trace-output.c and trace-msg.c */ + +static inline ssize_t __do_write(int fd, const void *data, size_t size) +{ + ssize_t tot = 0; + ssize_t w; + + do { + w = write(fd, data + tot, size - tot); + tot += w; + + if (!w) + break; + if (w < 0) + return w; + } while (tot != size); + + return tot; +} + +static inline ssize_t +__do_write_check(int fd, const void *data, size_t size) +{ + ssize_t ret; + + ret = __do_write(fd, data, size); + if (ret < 0) + return ret; + if (ret != size) + return -1; + + return 0; +} + +#endif /* _TRACE_WRITE_LOCAL_H */ diff --git a/lib/trace-cmd/plugins/Makefile b/lib/trace-cmd/plugins/Makefile new file mode 100644 index 00000000..ed5a9cea --- /dev/null +++ b/lib/trace-cmd/plugins/Makefile @@ -0,0 +1,58 @@ +include $(src)/scripts/utils.mk + +bdir:=$(obj)/lib/trace-cmd/plugins + +PLUGIN_OBJS = + +PLUGIN_OBJS := $(PLUGIN_OBJS:%.o=$(bdir)/%.o) +PLUGIN_BUILD := $(PLUGIN_OBJS:$(bdir)/%.o=$(bdir)/%.so) + +PLUGINS := $(PLUGIN_BUILD) + +DEPS := $(PLUGIN_OBJS:$(bdir)/%.o=$(bdir)/.%.d) + +all: $(PLUGINS) + +$(bdir): + @mkdir -p $(bdir) + +$(PLUGIN_OBJS): | $(bdir) +$(DEPS): | $(bdir) + +$(PLUGIN_OBJS): $(bdir)/%.o : %.c + $(Q)$(do_compile_plugin_obj) + +$(PLUGIN_BUILD): $(bdir)/%.so: $(bdir)/%.o + $(Q)$(do_plugin_build) + +$(DEPS): $(bdir)/.%.d: %.c + $(Q)$(CC) -M -MT $(bdir)/$*.o $(CPPFLAGS) $(CFLAGS) $< > $@ + +$(PLUGIN_OBJS): $(bdir)/%.o : $(bdir)/.%.d + +PLUGINS_INSTALL = $(subst .so,.install,$(PLUGINS)) + +$(PLUGINS_INSTALL): $(bdir)/%.install : $(bdir)/%.so force + $(Q)$(call do_install_data,$<,$(plugin_tracecmd_dir_SQ)) + +install_plugins: $(PLUGINS_INSTALL) + +# The following targets are necessary to trigger a rebuild when +# $(PLUGIN_DIR_TRACECMD) change. Without them, a full clean build would +# necessary in order to get the binaries updated. + +$(bdir)/tracecmd_plugin_dir: $(bdir) force + $(Q)$(N)$(call update_dir, 'PLUGIN_DIR_TRACECMD=$(PLUGIN_DIR_TRACECMD)') + +dep_includes := $(wildcard $(DEPS)) + +ifneq ($(dep_includes),) + include $(dep_includes) +endif + +clean: + $(RM) -f $(bdir)/*.a $(bdir)/*.so $(bdir)/*.o $(bdir)/.*.d\ + $(bdir)/tracecmd_plugin_dir + +force: +.PHONY: clean force diff --git a/lib/trace-cmd/test.c b/lib/trace-cmd/test.c new file mode 100644 index 00000000..3db029aa --- /dev/null +++ b/lib/trace-cmd/test.c @@ -0,0 +1,7 @@ +#include <trace-cmd/trace-cmd.h> + +int main() +{ + tracecmd_open_head("trace.dat", 0); + return 0; +} diff --git a/lib/trace-cmd/trace-blk-hack.c b/lib/trace-cmd/trace-blk-hack.c new file mode 100644 index 00000000..2a05cf98 --- /dev/null +++ b/lib/trace-cmd/trace-blk-hack.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdio.h> +#include "trace-cmd.h" +#include "trace-local.h" + +static const char blk_event_start[] = + "name: blktrace\n" + "ID: %d\n" + "format:\n" + "\tfield:unsigned short common_type;\toffset:0;\tsize:2;\n" + "\tfield:unsigned char common_flags;\toffset:2;\tsize:1;\n" + "\tfield:unsigned char common_preempt_count;\toffset:3;\tsize:1;\n" + "\tfield:int common_pid;\toffset:4;\tsize:4;\n"; + +static const char blk_body[] = "\n" + "\tfield:u64 sector;\toffset:16;\tsize:8;\n" + "\tfield:int bytes;\toffset:24;\tsize:4;\n" + "\tfield:int action;\toffset:28;\tsize:4;\n" + "\tfield:int pid;\toffset:32;\tsize:4;\n" + "\tfield:int device;\toffset:36;\tsize:4;\n" + "\tfield:int cpu;\toffset:40;\tsize:4;\n" + "\tfield:short error;\toffset:44;\tsize:2;\n" + "\tfield:short pdu_len;\toffset:46;\tsize:2;\n" + "\tfield:void data;\toffset:48;\tsize:0;\n" + "\n" + "print fmt: \"%%d\", REC->pid\n"; + +int tracecmd_blk_hack(struct tracecmd_input *handle) +{ + struct tep_handle *pevent; + struct tep_event *event; + struct tep_format_field *field; + char buf[4096]; /* way more than enough! */ + int id; + int l; + int r; + + pevent = tracecmd_get_tep(handle); + + /* + * Unfortunately, the TRACE_BLK has changed a bit. + * We need to test if various events exist to try + * to guess what event id TRACE_BLK would be. + */ + + /* It was originally behind the "power" event */ + event = tep_find_event_by_name(pevent, "ftrace", "power"); + if (event) { + id = event->id + 1; + goto found; + } + + /* + * But the power tracer is now in perf. + * Then it was after kmem_free + */ + event = tep_find_event_by_name(pevent, "ftrace", "kmem_free"); + if (event) { + id = event->id + 1; + goto found; + } + + /* + * But that then went away. + * Currently it should be behind the user stack. + */ + event = tep_find_event_by_name(pevent, "ftrace", "user_stack"); + if (event) { + id = event->id + 1; + goto found; + } + /* Give up :( */ + return -1; + + found: + /* + * Blk events are not exported in the events directory. + * This is a hack to attempt to create a block event + * that we can read. + * + * We'll make a format file to look like this: + * + * name: blktrace + * ID: 13 + * format: + * field:unsigned short common_type; offset:0; size:2; + * field:unsigned char common_flags; offset:2; size:1; + * field:unsigned char common_preempt_count; offset:3; size:1; + * field:int common_pid; offset:4; size:4; + * field:int common_lock_depth; offset:8; size:4; + * + * field:u64 sector; offset:16; size:8; + * field:int bytes; offset:32; size:4; + * field:int action; offset:36; size:4; + * field:int pid; offset:40; size:4; + * field:int device; offset:44; size:4; + * field:int cpu; offset:48; size:4; + * field:short error; offset:52; size:2; + * field:short pdu_len; offset:54; size:2; + * field:void data; offset:60; size:0; + * + * print fmt: "%d", REC->pid + * + * Note: the struct blk_io_trace is used directly and + * just the first parts of the struct are not used in order + * to not write over the ftrace data. + */ + + /* Make sure the common fields exist */ + field = tep_find_common_field(event, "common_type"); + if (!field || field->offset != 0 || field->size != 2) + goto fail; + field = tep_find_common_field(event, "common_flags"); + if (!field || field->offset != 2 || field->size != 1) + goto fail; + field = tep_find_common_field(event, "common_preempt_count"); + if (!field || field->offset != 3 || field->size != 1) + goto fail; + field = tep_find_common_field(event, "common_pid"); + if (!field || field->offset != 4 || field->size != 4) + goto fail; + r = sprintf(buf, blk_event_start, id); + l = r; + + /* lock depth is optional */ + field = tep_find_common_field(event, "common_lock_depth"); + if (field) { + if (field->offset != 8 || field->size != 4) + return -1; + r = sprintf(buf+l, "\tfield:int common_lock_depth;\toffset:8;\tsize:4;\n"); + l += r; + } + + r = sprintf(buf+l, blk_body); + + /* Parse this event */ + l += r; + tep_parse_event(pevent, buf, l, "ftrace"); + + return 0; + + fail: + return -1; +} diff --git a/lib/trace-cmd/trace-compress-zlib.c b/lib/trace-cmd/trace-compress-zlib.c new file mode 100644 index 00000000..413a0764 --- /dev/null +++ b/lib/trace-cmd/trace-compress-zlib.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2021, VMware, Tzvetomir Stoyanov tz.stoyanov@gmail.com> + * + */ +#include <stdlib.h> +#include <dlfcn.h> +#include <zlib.h> +#include <errno.h> + +#include "trace-cmd-private.h" + +#define __ZLIB_NAME "zlib" +#define __ZLIB_WEIGTH 10 + +static int zlib_compress(void *ctx, const void *in, int in_bytes, void *out, int out_bytes) +{ + unsigned long obytes = out_bytes; + int ret; + + ret = compress2((unsigned char *)out, &obytes, + (unsigned char *)in, (unsigned long)in_bytes, Z_BEST_COMPRESSION); + switch (ret) { + case Z_OK: + return obytes; + case Z_BUF_ERROR: + errno = -ENOBUFS; + break; + case Z_MEM_ERROR: + errno = -ENOMEM; + break; + case Z_STREAM_ERROR: + errno = -EINVAL; + break; + case Z_ERRNO: + break; + default: + errno = -EFAULT; + break; + } + + return -1; +} + +static int zlib_decompress(void *ctx, const void *in, int in_bytes, void *out, int out_bytes) +{ + unsigned long obytes = out_bytes; + int ret; + + ret = uncompress((unsigned char *)out, &obytes, + (unsigned char *)in, (unsigned long)in_bytes); + switch (ret) { + case Z_OK: + return obytes; + case Z_BUF_ERROR: + errno = -ENOBUFS; + break; + case Z_MEM_ERROR: + errno = -ENOMEM; + break; + case Z_DATA_ERROR: + errno = -EINVAL; + break; + case Z_ERRNO: + break; + default: + errno = -EFAULT; + break; + } + + return -1; +} + +static unsigned int zlib_compress_bound(void *ctx, unsigned int in_bytes) +{ + return compressBound(in_bytes); +} + +static bool zlib_is_supported(const char *name, const char *version) +{ + const char *zver; + + if (!name) + return false; + if (strlen(name) != strlen(__ZLIB_NAME) || strcmp(name, __ZLIB_NAME)) + return false; + + if (!version) + return true; + + zver = zlibVersion(); + if (!zver) + return false; + + /* Compare the major version number */ + if (atoi(version) <= atoi(zver)) + return true; + + return false; +} + +int tracecmd_zlib_init(void) +{ + struct tracecmd_compression_proto proto; + + memset(&proto, 0, sizeof(proto)); + proto.name = __ZLIB_NAME; + proto.version = zlibVersion(); + proto.weight = __ZLIB_WEIGTH; + proto.compress = zlib_compress; + proto.uncompress = zlib_decompress; + proto.is_supported = zlib_is_supported; + proto.compress_size = zlib_compress_bound; + + return tracecmd_compress_proto_register(&proto); +} diff --git a/lib/trace-cmd/trace-compress-zstd.c b/lib/trace-cmd/trace-compress-zstd.c new file mode 100644 index 00000000..10ae7a4c --- /dev/null +++ b/lib/trace-cmd/trace-compress-zstd.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2022, Sebastian Andrzej Siewior <sebastian@breakpoint.cc> + * + */ +#include <stdlib.h> +#include <zstd.h> +#include <errno.h> + +#include "trace-cmd-private.h" + +#define __ZSTD_NAME "zstd" +#define __ZSTD_WEIGTH 5 + +struct zstd_context { + ZSTD_CCtx *ctx_c; + ZSTD_DCtx *ctx_d; +}; + +static int zstd_compress(void *ctx, const void *in, int in_bytes, void *out, int out_bytes) +{ + struct zstd_context *context = ctx; + size_t ret; + + if (!ctx) + return -1; + + ret = ZSTD_compress2(context->ctx_c, out, out_bytes, in, in_bytes); + if (ZSTD_isError(ret)) + return -1; + + return ret; +} + +static int zstd_decompress(void *ctx, const void *in, int in_bytes, void *out, int out_bytes) +{ + struct zstd_context *context = ctx; + size_t ret; + + if (!ctx) + return -1; + + ret = ZSTD_decompressDCtx(context->ctx_d, out, out_bytes, in, in_bytes); + if (ZSTD_isError(ret)) { + errno = -EINVAL; + return -1; + } + + return ret; +} + +static unsigned int zstd_compress_bound(void *ctx, unsigned int in_bytes) +{ + return ZSTD_compressBound(in_bytes); +} + +static bool zstd_is_supported(const char *name, const char *version) +{ + if (!name) + return false; + if (strcmp(name, __ZSTD_NAME)) + return false; + + return true; +} + +static void *new_zstd_context(void) +{ + struct zstd_context *context; + size_t r; + + context = calloc(1, sizeof(*context)); + if (!context) + return NULL; + + context->ctx_c = ZSTD_createCCtx(); + context->ctx_d = ZSTD_createDCtx(); + if (!context->ctx_c || !context->ctx_d) + goto err; + + r = ZSTD_CCtx_setParameter(context->ctx_c, ZSTD_c_contentSizeFlag, 0); + if (ZSTD_isError(r)) + goto err; + + return context; +err: + ZSTD_freeCCtx(context->ctx_c); + ZSTD_freeDCtx(context->ctx_d); + free(context); + return NULL; +} +static void free_zstd_context(void *ctx) +{ + struct zstd_context *context = ctx; + + if (!ctx) + return; + + ZSTD_freeCCtx(context->ctx_c); + ZSTD_freeDCtx(context->ctx_d); + free(context); +} + +int tracecmd_zstd_init(void) +{ + struct tracecmd_compression_proto proto; + + memset(&proto, 0, sizeof(proto)); + proto.name = __ZSTD_NAME; + proto.version = ZSTD_versionString(); + proto.weight = __ZSTD_WEIGTH; + proto.compress = zstd_compress; + proto.uncompress = zstd_decompress; + proto.is_supported = zstd_is_supported; + proto.compress_size = zstd_compress_bound; + proto.new_context = new_zstd_context; + proto.free_context = free_zstd_context; + + return tracecmd_compress_proto_register(&proto); +} diff --git a/lib/trace-cmd/trace-compress.c b/lib/trace-cmd/trace-compress.c new file mode 100644 index 00000000..a63295e6 --- /dev/null +++ b/lib/trace-cmd/trace-compress.c @@ -0,0 +1,991 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2021, VMware, Tzvetomir Stoyanov tz.stoyanov@gmail.com> + * + */ +#include <stdlib.h> +#include <sys/time.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> + +#include "trace-cmd-private.h" +#include "trace-cmd-local.h" + +struct compress_proto { + struct compress_proto *next; + char *proto_name; + char *proto_version; + int weight; + + int (*compress_block)(void *ctx, const void *in, int in_bytes, void *out, int out_bytes); + int (*uncompress_block)(void *ctx, const void *in, int in_bytes, void *out, int out_bytes); + unsigned int (*compress_size)(void *ctx, unsigned int bytes); + bool (*is_supported)(const char *name, const char *version); + void *(*new_context)(void); + void (*free_context)(void *ctx); +}; + +static struct compress_proto *proto_list; + +struct tracecmd_compression { + int fd; + unsigned int capacity; + unsigned int capacity_read; + unsigned long pointer; + char *buffer; + struct compress_proto *proto; + struct tep_handle *tep; + struct tracecmd_msg_handle *msg_handle; + void *context; +}; + +static int read_fd(int fd, char *dst, int len) +{ + size_t size = 0; + int r; + + do { + r = read(fd, dst+size, len); + if (r > 0) { + size += r; + len -= r; + } else + break; + } while (r > 0); + + if (len) + return -1; + return size; +} + +static long long write_fd(int fd, const void *data, size_t size) +{ + long long tot = 0; + long long w; + + do { + w = write(fd, data + tot, size - tot); + tot += w; + + if (!w) + break; + if (w < 0) + return w; + } while (tot != size); + + return tot; +} + +static long long do_write(struct tracecmd_compression *handle, + const void *data, unsigned long long size) +{ + int ret; + + if (handle->msg_handle) { + ret = tracecmd_msg_data_send(handle->msg_handle, data, size); + if (ret) + return -1; + return size; + } + return write_fd(handle->fd, data, size); +} + +static inline int buffer_extend(struct tracecmd_compression *handle, unsigned int size) +{ + int extend; + char *buf; + + if (size <= handle->capacity) + return 0; + + extend = (size / BUFSIZ + 1) * BUFSIZ; + buf = realloc(handle->buffer, extend); + if (!buf) + return -1; + handle->buffer = buf; + handle->capacity = extend; + + return 0; +} + +/** + * tracecmd_compress_lseek - Move the read/write pointer into the compression buffer + * @handle: compression handle + * @offset: number of bytes to move the pointer, can be negative or positive + * @whence: the starting position of the pointer movement, + * + * Returns the new file pointer on success, or -1 in case of an error. + */ +off64_t tracecmd_compress_lseek(struct tracecmd_compression *handle, off64_t offset, int whence) +{ + unsigned long p; + + if (!handle || !handle->buffer) + return (off64_t)-1; + + switch (whence) { + case SEEK_CUR: + p = handle->pointer + offset; + break; + case SEEK_END: + p = handle->capacity + offset; + break; + case SEEK_SET: + p = offset; + break; + default: + return (off64_t)-1; + } + + if (buffer_extend(handle, p)) + return (off64_t)-1; + + handle->pointer = p; + + return p; +} + +static int compress_read(struct tracecmd_compression *handle, char *dst, int len) +{ + + if (handle->pointer > handle->capacity_read) + return -1; + + if (handle->pointer + len > handle->capacity_read) + len = handle->capacity_read - handle->pointer; + + memcpy(dst, handle->buffer + handle->pointer, len); + + return len; +} + +/** + * tracecmd_compress_pread - pread() on compression buffer + * @handle: compression handle + * @dst: return, store the read data + * @len: length of data to be read + * @offset: offset in the buffer of data to be read + * + * Read a @len of data from the compression buffer at given @offset, + * without updating the buffer pointer. + * + * On success returns the number of bytes read, or -1 on failure. + */ +int tracecmd_compress_pread(struct tracecmd_compression *handle, char *dst, int len, off_t offset) +{ + int ret; + + if (!handle || !handle->buffer || offset > handle->capacity_read) + return -1; + + ret = tracecmd_compress_lseek(handle, offset, SEEK_SET); + if (ret < 0) + return ret; + return compress_read(handle, dst, len); +} + +/** + * tracecmd_compress_buffer_read - read() from compression buffer + * @handle: compression handle + * @dst: return, store the read data + * @len: length of data to be read + * + * Read a @len of data from the compression buffer + * + * On success returns the number of bytes read, or -1 on failure. + */ +int tracecmd_compress_buffer_read(struct tracecmd_compression *handle, char *dst, int len) +{ + int ret; + + if (!handle || !handle->buffer) + return -1; + + ret = compress_read(handle, dst, len); + if (ret > 0) + handle->pointer += ret; + + return ret; +} + +/** + * tracecmd_compress_reset - Reset the compression buffer + * @handle: compression handle + * + * Reset the compression buffer, any data currently in the buffer + * will be destroyed. + * + */ +void tracecmd_compress_reset(struct tracecmd_compression *handle) +{ + if (!handle) + return; + + free(handle->buffer); + handle->buffer = NULL; + handle->pointer = 0; + handle->capacity_read = 0; + handle->capacity = 0; +} + +/** + * tracecmd_uncompress_block - uncompress a memory block + * @handle: compression handle + * + * Read compressed memory block from the file and uncompress it into + * internal buffer. The tracecmd_compress_buffer_read() can be used + * to read the uncompressed data from the buffer. + * + * Returns 0 on success, or -1 in case of an error. + */ +int tracecmd_uncompress_block(struct tracecmd_compression *handle) +{ + unsigned int s_uncompressed; + unsigned int s_compressed; + char *bytes = NULL; + char buf[4]; + int size; + int ret; + + if (!handle || !handle->proto || !handle->proto->uncompress_block) + return -1; + + tracecmd_compress_reset(handle); + + if (read(handle->fd, buf, 4) != 4) + return -1; + + s_compressed = tep_read_number(handle->tep, buf, 4); + if (read(handle->fd, buf, 4) != 4) + return -1; + + s_uncompressed = tep_read_number(handle->tep, buf, 4); + size = s_uncompressed > s_compressed ? s_uncompressed : s_compressed; + + handle->buffer = malloc(size); + if (!handle->buffer) + return -1; + + bytes = malloc(s_compressed); + if (!bytes) + goto error; + + if (read_fd(handle->fd, bytes, s_compressed) < 0) + goto error; + + ret = handle->proto->uncompress_block(handle->context, + bytes, s_compressed, handle->buffer, size); + if (ret < 0) + goto error; + + free(bytes); + handle->pointer = 0; + handle->capacity_read = ret; + handle->capacity = size; + return 0; +error: + tracecmd_compress_reset(handle); + free(bytes); + return -1; +} + +/** + * tracecmd_compress_block - compress a memory block + * @handle: compression handle + * + * Compress the content of the internal memory buffer and write + * the compressed data in the file. The tracecmd_compress_buffer_write() + * can be used to write data into the internal memory buffer, + * before calling this API. + * + * Returns 0 on success, or -1 in case of an error. + */ +int tracecmd_compress_block(struct tracecmd_compression *handle) +{ + unsigned int size, real_size; + char *buf; + int endian4; + int ret; + + if (!handle || !handle->proto || + !handle->proto->compress_size || !handle->proto->compress_block) + return -1; + + size = handle->proto->compress_size(handle->context, handle->pointer); + + buf = malloc(size); + if (!buf) + return -1; + + real_size = handle->proto->compress_block(handle->context, handle->buffer, handle->pointer, buf, size); + if (real_size < 0) { + ret = real_size; + goto out; + } + + /* Write compressed data size */ + endian4 = tep_read_number(handle->tep, &real_size, 4); + ret = do_write(handle, &endian4, 4); + if (ret != 4) + goto out; + + /* Write uncompressed data size */ + endian4 = tep_read_number(handle->tep, &handle->pointer, 4); + ret = do_write(handle, &endian4, 4); + if (ret != 4) { + ret = -1; + goto out; + } + + /* Write compressed data */ + ret = do_write(handle, buf, real_size); + if (ret != real_size) { + ret = -1; + goto out; + } + + ret = 0; + tracecmd_compress_reset(handle); +out: + free(buf); + return ret; +} + +/** + * tracecmd_compress_buffer_write - write() to compression buffer + * @handle: compression handle + * @data: data to be written + * @size: size of @data + * + * Write @data of @size in the compression buffer + * + * Returns 0 on success, or -1 on failure. + */ +int tracecmd_compress_buffer_write(struct tracecmd_compression *handle, + const void *data, unsigned long long size) +{ + if (!handle) + return -1; + + if (buffer_extend(handle, handle->pointer + size)) + return -1; + + memcpy(&handle->buffer[handle->pointer], data, size); + handle->pointer += size; + if (handle->capacity_read < handle->pointer) + handle->capacity_read = handle->pointer; + + return 0; +} + +/** + * tracecmd_compress_init - initialize the library with available compression algorithms + */ +void tracecmd_compress_init(void) +{ + struct timeval time; + + gettimeofday(&time, NULL); + srand((time.tv_sec * 1000) + (time.tv_usec / 1000)); + +#ifdef HAVE_ZLIB + tracecmd_zlib_init(); +#endif + tracecmd_zstd_init(); +} + +static struct compress_proto *compress_proto_select(void) +{ + struct compress_proto *proto = proto_list; + struct compress_proto *selected = NULL; + + while (proto) { + if (!selected || selected->weight > proto->weight) + selected = proto; + proto = proto->next; + } + + return selected; +} + +/** + * tracecmd_compress_alloc - Allocate a new compression context + * @name: name of the compression algorithm. + * If NULL - auto select the best available algorithm + * @version: version of the compression algorithm, can be NULL + * @fd: file descriptor for reading / writing data + * @tep: tep handle, used to encode the data + * @msg_handle: message handle, use it for reading / writing data instead of @fd + * + * Returns NULL on failure or pointer to allocated compression context. + * The returned context must be freed by tracecmd_compress_destroy() + */ +struct tracecmd_compression *tracecmd_compress_alloc(const char *name, const char *version, + int fd, struct tep_handle *tep, + struct tracecmd_msg_handle *msg_handle) +{ + struct tracecmd_compression *new; + struct compress_proto *proto; + + if (name) { + proto = proto_list; + while (proto) { + if (proto->is_supported && proto->is_supported(name, version)) + break; + proto = proto->next; + } + } else { + proto = compress_proto_select(); + } + if (!proto) + return NULL; + + new = calloc(1, sizeof(*new)); + if (!new) + return NULL; + + new->fd = fd; + new->tep = tep; + new->msg_handle = msg_handle; + new->proto = proto; + if (proto->new_context) + new->context = proto->new_context(); + + return new; +} + +/** + * tracecmd_compress_destroy - Free a compression context + * @handle: handle to the compression context that will be freed + */ +void tracecmd_compress_destroy(struct tracecmd_compression *handle) +{ + if (!handle) + return; + + tracecmd_compress_reset(handle); + + if (handle->proto && handle->proto->free_context) + handle->proto->free_context(handle->context); + + free(handle); +} + +/** + * tracecmd_compress_is_supported - check if compression algorithm is supported + * @name: name of the compression algorithm. + * @version: version of the compression algorithm. + * + * Checks if compression algorithm with given name and version is supported. + * Returns true if the algorithm is supported or false if it is not. + */ +bool tracecmd_compress_is_supported(const char *name, const char *version) +{ + struct compress_proto *proto = proto_list; + + if (!name) + return NULL; + + while (proto) { + if (proto->is_supported && proto->is_supported(name, version)) + return true; + proto = proto->next; + } + return false; +} + +/** + * tracecmd_compress_proto_get_name - get name and version of compression algorithm + * @compress: compression handle. + * @name: return, name of the compression algorithm. + * @version: return, version of the compression algorithm. + * + * Returns 0 on success, or -1 in case of an error. If 0 is returned, the name + * and version of the algorithm are stored in @name and @version. The returned + * strings must *not* be freed. + */ +int tracecmd_compress_proto_get_name(struct tracecmd_compression *compress, + const char **name, const char **version) +{ + if (!compress || !compress->proto) + return -1; + + if (name) + *name = compress->proto->proto_name; + if (version) + *version = compress->proto->proto_version; + + return 0; +} + +/** + * tracecmd_compress_proto_register - register a new compression algorithm + * @name: name of the compression algorithm. + * @version: version of the compression algorithm. + * @weight: weight of the compression algorithm, lower is better. + * @compress: compression hook, called to compress a memory block. + * @uncompress: uncompression hook, called to uncompress a memory block. + * @compress_size: hook, called to get the required minimum size of the buffer + * for compression given number of bytes. + * @is_supported: check hook, called to check if compression with given name and + * version is supported by this plugin. + * + * Returns 0 on success, or -1 in case of an error. If algorithm with given name + * and version is already registered, -1 is returned. + */ +int tracecmd_compress_proto_register(struct tracecmd_compression_proto *proto) +{ + struct compress_proto *new; + + if (!proto || !proto->name || !proto->compress || !proto->uncompress) + return -1; + + if (tracecmd_compress_is_supported(proto->name, proto->version)) + return -1; + + new = calloc(1, sizeof(*new)); + if (!new) + return -1; + + new->proto_name = strdup(proto->name); + if (!new->proto_name) + goto error; + + new->proto_version = strdup(proto->version); + if (!new->proto_version) + goto error; + + new->compress_block = proto->compress; + new->uncompress_block = proto->uncompress; + new->compress_size = proto->compress_size; + new->is_supported = proto->is_supported; + new->weight = proto->weight; + new->next = proto_list; + new->new_context = proto->new_context; + new->free_context = proto->free_context; + proto_list = new; + return 0; + +error: + free(new->proto_name); + free(new->proto_version); + free(new); + return -1; +} + +/** + * tracecmd_compress_free - free the library resources, related to available compression algorithms + * + */ +void tracecmd_compress_free(void) +{ + struct compress_proto *proto = proto_list; + struct compress_proto *del; + + while (proto) { + del = proto; + proto = proto->next; + free(del->proto_name); + free(del->proto_version); + free(del); + } + proto_list = NULL; +} + +/** + * tracecmd_compress_protos_get - get a list of all supported compression algorithms and versions + * @names: return, array with names of all supported compression algorithms + * @versions: return, array with versions of all supported compression algorithms + * + * On success, the size of @names and @versions arrays is returned. + * Those arrays are allocated by the API and must be freed with free() by the + * caller. Both arrays are with same size, each name from @names corresponds to + * a version from @versions. The last element in both arrays is a NULL pointer. + * On error -1 is returned and @names and @versions arrays are not allocated. + */ +int tracecmd_compress_protos_get(char ***names, char ***versions) +{ + struct compress_proto *proto = proto_list; + char **n = NULL; + char **v = NULL; + int c, i; + + for (c = 0; proto; proto = proto->next) + c++; + + if (c < 1) + return c; + + n = calloc(c + 1, sizeof(char *)); + if (!n) + goto error; + v = calloc(c + 1, sizeof(char *)); + if (!v) + goto error; + + proto = proto_list; + for (i = 0; i < c && proto; i++) { + n[i] = proto->proto_name; + v[i] = proto->proto_version; + proto = proto->next; + } + + n[i] = NULL; + v[i] = NULL; + *names = n; + *versions = v; + return c; + +error: + free(n); + free(v); + return -1; +} + +/** + * tracecmd_compress_copy_from - Copy and compress data from a file + * @handle: compression handle + * @fd: file descriptor to uncompressed data to copy from + * @chunk_size: size of one compression chunk + * @read_size: Pointer to max bytes to read from. The pointer is updated + * with the actual size of compressed data read. If 0 is passed, + * read until the EOF is reached. + * @write_size: return, size of the compressed data written into @handle + * + * This function reads uncompressed data from given @fd, compresses the data + * using the @handle compression context and writes the compressed data into the + * fd associated with the @handle. The data is compressed on chunks with given + * @chunk_size size. The compressed data is written in the format: + * - 4 bytes, chunks count + * - for each chunk: + * - 4 bytes, size of compressed data in this chunk + * - 4 bytes, uncompressed size of the data in this chunk + * - data, bytes of <size of compressed data in this chunk> + * + * On success 0 is returned, @read_size and @write_size are updated with the size of + * read and written data. + */ +int tracecmd_compress_copy_from(struct tracecmd_compression *handle, int fd, int chunk_size, + unsigned long long *read_size, unsigned long long *write_size) +{ + unsigned int rchunk = 0; + unsigned int chunks = 0; + unsigned int wsize = 0; + unsigned int rsize = 0; + unsigned int rmax = 0; + unsigned int csize; + unsigned int size; + unsigned int all; + unsigned int r; + off64_t offset; + char *buf_from; + char *buf_to; + int endian4; + int ret; + + if (!handle || !handle->proto || + !handle->proto->compress_block || !handle->proto->compress_size) + return 0; + + if (read_size) + rmax = *read_size; + csize = handle->proto->compress_size(handle->context, chunk_size); + buf_from = malloc(chunk_size); + if (!buf_from) + return -1; + + buf_to = malloc(csize); + if (!buf_to) + return -1; + + /* save the initial offset and write 0 as initial chunk count */ + offset = lseek64(handle->fd, 0, SEEK_CUR); + write_fd(handle->fd, &chunks, 4); + + do { + all = 0; + if (rmax > 0 && (rmax - rsize) < chunk_size) + rchunk = (rmax - rsize); + else + rchunk = chunk_size; + + do { + r = read(fd, buf_from + all, rchunk - all); + all += r; + + if (r <= 0) + break; + } while (all != rchunk); + + + if (r < 0 || (rmax > 0 && rsize >= rmax)) + break; + rsize += all; + size = csize; + if (all > 0) { + ret = handle->proto->compress_block(handle->context, + buf_from, all, buf_to, size); + if (ret < 0) { + if (errno == EINTR) + continue; + break; + } + size = ret; + /* Write compressed data size */ + endian4 = tep_read_number(handle->tep, &size, 4); + ret = write_fd(handle->fd, &endian4, 4); + if (ret != 4) + break; + + /* Write uncompressed data size */ + endian4 = tep_read_number(handle->tep, &all, 4); + ret = write_fd(handle->fd, &endian4, 4); + if (ret != 4) + break; + + /* Write the compressed data */ + ret = write_fd(handle->fd, buf_to, size); + if (ret != size) + break; + /* data + compress header */ + wsize += (size + 8); + chunks++; + } + } while (all > 0); + + free(buf_from); + free(buf_to); + + if (all) + return -1; + + if (lseek64(handle->fd, offset, SEEK_SET) == (off_t)-1) + return -1; + + endian4 = tep_read_number(handle->tep, &chunks, 4); + /* write chunks count*/ + write_fd(handle->fd, &chunks, 4); + if (lseek64(handle->fd, 0, SEEK_END) == (off_t)-1) + return -1; + + if (read_size) + *read_size = rsize; + if (write_size) + *write_size = wsize; + return 0; +} + +/** + * tracecmd_load_chunks_info - Read compression chunks information from the file + * @handle: compression handle + * @chunks_info: return, array with compression chunks information + * + * This function reads information of all compression chunks in the current + * compression block from the file and fills that information in a newly + * allocated array @chunks_info which is returned. + * + * On success count of compression chunks is returned. Array of that count is + * allocated and returned in @chunks_info. Each entry describes one compression + * chunk. On error -1 is returned. In case of success, @chunks_info must be + * freed by free(). + */ +int tracecmd_load_chunks_info(struct tracecmd_compression *handle, + struct tracecmd_compress_chunk **chunks_info) +{ + struct tracecmd_compress_chunk *chunks = NULL; + unsigned long long size = 0; + unsigned int count = 0; + off64_t offset; + int ret = -1; + char buf[4]; + int i; + + if (!handle) + return -1; + + offset = lseek64(handle->fd, 0, SEEK_CUR); + if (offset == (off64_t)-1) + return -1; + + if (read(handle->fd, buf, 4) != 4) + return -1; + + count = tep_read_number(handle->tep, buf, 4); + if (!count) { + ret = 0; + goto out; + } + + chunks = calloc(count, sizeof(struct tracecmd_compress_chunk)); + if (!chunks) + goto out; + + for (i = 0; i < count; i++) { + chunks[i].zoffset = lseek64(handle->fd, 0, SEEK_CUR); + if (chunks[i].zoffset == (off_t)-1) + goto out; + if (read(handle->fd, buf, 4) != 4) + goto out; + chunks[i].zsize = tep_read_number(handle->tep, buf, 4); + chunks[i].offset = size; + if (read(handle->fd, buf, 4) != 4) + goto out; + chunks[i].size = tep_read_number(handle->tep, buf, 4); + size += chunks[i].size; + if (lseek64(handle->fd, chunks[i].zsize, SEEK_CUR) == (off64_t)-1) + goto out; + } + + ret = count; +out: + if (lseek64(handle->fd, offset, SEEK_SET) == (off64_t)-1) + ret = -1; + + if (ret > 0 && chunks_info) + *chunks_info = chunks; + else + free(chunks); + + return ret; +} + +/** + * tracecmd_uncompress_chunk - Uncompress given compression chunk. + * @handle: compression handle + * @chunk: chunk, that will be uncompressed in @data + * @data: Preallocated memory for uncompressed data. Must have enough space + * to hold the uncompressed data. + * + * This function uncompresses the chunk described by @chunk and stores + * the uncompressed data in the preallocated memory @data. + * + * On success 0 is returned and the uncompressed data is stored in @data. + * On error -1 is returned. + */ +int tracecmd_uncompress_chunk(struct tracecmd_compression *handle, + struct tracecmd_compress_chunk *chunk, char *data) +{ + char *bytes_in = NULL; + int ret = -1; + + if (!handle || !handle->proto || !handle->proto->uncompress_block || !chunk || !data) + return -1; + + if (lseek64(handle->fd, chunk->zoffset + 8, SEEK_SET) == (off_t)-1) + return -1; + + bytes_in = malloc(chunk->zsize); + if (!bytes_in) + return -1; + + if (read_fd(handle->fd, bytes_in, chunk->zsize) < 0) + goto out; + + if (handle->proto->uncompress_block(handle->context, + bytes_in, chunk->zsize, data, chunk->size) < 0) + goto out; + + ret = 0; +out: + free(bytes_in); + return ret; +} + +/** + * tracecmd_uncompress_copy_to - Uncompress data and copy to a file + * @handle: compression handle + * @fd: file descriptor to uncompressed data to copy into + * @read_size: return, size of the compressed data read from @handle + * @write_size: return, size of the uncompressed data written into @fd + * + * This function reads compressed data from the fd, associated with @handle, + * uncompresses it using the @handle compression context and writes + * the uncompressed data into the fd. The compressed data must be in the format: + * - 4 bytes, chunks count + * - for each chunk: + * - 4 bytes, size of compressed data in this chunk + * - 4 bytes, uncompressed size of the data in this chunk + * - data, bytes of <size of compressed data in this chunk> + * + * On success 0 is returned, @read_size and @write_size are updated with + * the size of read and written data. + */ +int tracecmd_uncompress_copy_to(struct tracecmd_compression *handle, int fd, + unsigned long long *read_size, unsigned long long *write_size) +{ + unsigned int s_uncompressed; + unsigned int s_compressed; + unsigned int rsize = 0; + unsigned int wsize = 0; + char *bytes_out = NULL; + char *bytes_in = NULL; + int size_out = 0; + int size_in = 0; + int chunks; + char buf[4]; + char *tmp; + int ret; + + if (!handle || !handle->proto || !handle->proto->uncompress_block) + return -1; + + if (read(handle->fd, buf, 4) != 4) + return -1; + + chunks = tep_read_number(handle->tep, buf, 4); + rsize += 4; + + while (chunks) { + if (read(handle->fd, buf, 4) != 4) + break; + + s_compressed = tep_read_number(handle->tep, buf, 4); + rsize += 4; + if (read(handle->fd, buf, 4) != 4) + break; + + s_uncompressed = tep_read_number(handle->tep, buf, 4); + rsize += 4; + if (!bytes_in || size_in < s_compressed) { + tmp = realloc(bytes_in, s_compressed); + if (!tmp) + break; + + bytes_in = tmp; + size_in = s_compressed; + } + + if (!bytes_out || size_out < s_uncompressed) { + tmp = realloc(bytes_out, s_uncompressed); + if (!tmp) + break; + bytes_out = tmp; + size_out = s_uncompressed; + } + + if (read_fd(handle->fd, bytes_in, s_compressed) < 0) + break; + + rsize += s_compressed; + ret = handle->proto->uncompress_block(handle->context, bytes_in, s_compressed, + bytes_out, s_uncompressed); + if (ret < 0) + break; + + write_fd(fd, bytes_out, ret); + wsize += ret; + chunks--; + } + free(bytes_in); + free(bytes_out); + if (chunks) + return -1; + + if (read_size) + *read_size = rsize; + if (write_size) + *write_size = wsize; + + return 0; +} diff --git a/lib/trace-cmd/trace-filter-hash.c b/lib/trace-cmd/trace-filter-hash.c new file mode 100644 index 00000000..f5f0fb09 --- /dev/null +++ b/lib/trace-cmd/trace-filter-hash.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com> + * Copyright (C) 2018 VMware Inc, Steven Rostedt <rostedt@goodmis.org> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <assert.h> + +#include "trace-filter-hash.h" + +#define FILTER_HASH_BITS 8 +#define FILTER_HASH_SIZE (1 << FILTER_HASH_BITS) + +struct tracecmd_filter_id_item * +tracecmd_filter_id_find(struct tracecmd_filter_id *hash, int id) +{ + int key = tracecmd_quick_hash(id, FILTER_HASH_BITS); + struct tracecmd_filter_id_item *item = hash->hash[key]; + + while (item) { + if (item->id == id) + break; + item = item->next; + } + + return item; +} + +void tracecmd_filter_id_add(struct tracecmd_filter_id *hash, int id) +{ + int key = tracecmd_quick_hash(id, FILTER_HASH_BITS); + struct tracecmd_filter_id_item *item; + + item = calloc(1, sizeof(*item)); + assert(item); + + item->id = id; + item->next = hash->hash[key]; + hash->hash[key] = item; + + hash->count++; +} + +void tracecmd_filter_id_remove(struct tracecmd_filter_id *hash, int id) +{ + int key = tracecmd_quick_hash(id, FILTER_HASH_BITS); + struct tracecmd_filter_id_item **next = &hash->hash[key]; + struct tracecmd_filter_id_item *item; + + while (*next) { + if ((*next)->id == id) + break; + next = &(*next)->next; + } + + if (!*next) + return; + + assert(hash->count); + hash->count--; + + item = *next; + + *next = item->next; + + free(item); +} + +void tracecmd_filter_id_clear(struct tracecmd_filter_id *hash) +{ + struct tracecmd_filter_id_item *item, *next; + int i; + + for (i = 0; i < FILTER_HASH_SIZE; i++) { + next = hash->hash[i]; + if (!next) + continue; + + hash->hash[i] = NULL; + while (next) { + item = next; + next = item->next; + free(item); + } + } + + hash->count = 0; +} + +struct tracecmd_filter_id *tracecmd_filter_id_hash_alloc(void) +{ + struct tracecmd_filter_id *hash; + + hash = calloc(1, sizeof(*hash)); + assert(hash); + hash->hash = calloc(FILTER_HASH_SIZE, sizeof(*hash->hash)); + hash->count = 0; + + return hash; +} + +void tracecmd_filter_id_hash_free(struct tracecmd_filter_id *hash) +{ + if (!hash) + return; + + tracecmd_filter_id_clear(hash); + free(hash->hash); + free(hash); +} + +struct tracecmd_filter_id * +tracecmd_filter_id_hash_copy(struct tracecmd_filter_id *hash) +{ + struct tracecmd_filter_id *new_hash; + struct tracecmd_filter_id_item *item, **pitem; + int i; + + if (!hash) + return NULL; + + new_hash = tracecmd_filter_id_hash_alloc(); + assert(new_hash); + + for (i = 0; i < FILTER_HASH_SIZE; i++) { + item = hash->hash[i]; + if (!item) + continue; + + pitem = &new_hash->hash[i]; + + while (item) { + *pitem = calloc(1, sizeof(*item)); + assert(*pitem); + **pitem = *item; + + pitem = &(*pitem)->next; + item = item->next; + } + } + + new_hash->count = hash->count; + return new_hash; +} + +int *tracecmd_filter_ids(struct tracecmd_filter_id *hash) +{ + struct tracecmd_filter_id_item *item; + int *ids; + int count = 0; + int i; + + if (!hash->count) + return NULL; + + ids = malloc(sizeof(*ids) * (hash->count + 1)); + if (!ids) + return NULL; + + for (i = 0; i < FILTER_HASH_SIZE; i++) { + item = hash->hash[i]; + while (item) { + ids[count++] = item->id; + item = item->next; + } + } + + ids[count] = -1; + return ids; +} + +/** + * filter_id_compare - compare two id hashes to see if they are equal + * @hash1: one hash to compare + * @hash2: another hash to compare to @hash1 + * + * Returns 1 if the two hashes are the same, 0 otherwise. + */ +int tracecmd_filter_id_compare(struct tracecmd_filter_id *hash1, + struct tracecmd_filter_id *hash2) +{ + int *ids; + int ret = 0; + int i; + + /* If counts don't match, then they obviously are not the same */ + if (hash1->count != hash2->count) + return 0; + + /* If both hashes are empty, they are the same */ + if (!hash1->count && !hash2->count) + return 1; + + /* Now compare the pids of one hash with the other */ + ids = tracecmd_filter_ids(hash1); + for (i = 0; ids[i] >= 0; i++) { + if (!tracecmd_filter_id_find(hash2, ids[i])) + break; + } + + if (ids[i] == -1) + ret = 1; + + free(ids); + + return ret; +} diff --git a/lib/trace-cmd/trace-ftrace.c b/lib/trace-cmd/trace-ftrace.c new file mode 100644 index 00000000..f74f7c2e --- /dev/null +++ b/lib/trace-cmd/trace-ftrace.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/param.h> + +#include "trace-cmd-private.h" + +struct tep_plugin_option trace_ftrace_options[] = { + { + .name = "tailprint", + .plugin_alias = "fgraph", + .description = + "Print function name at function exit in function graph", + }, + { + .name = "depth", + .plugin_alias = "fgraph", + .description = + "Show the depth of each entry", + }, + { + .name = NULL, + } +}; + +static struct tep_plugin_option *fgraph_tail = &trace_ftrace_options[0]; +static struct tep_plugin_option *fgraph_depth = &trace_ftrace_options[1]; + +static int find_ret_event(struct tracecmd_ftrace *finfo, struct tep_handle *pevent) +{ + struct tep_event *event; + + /* Store the func ret id and event for later use */ + event = tep_find_event_by_name(pevent, "ftrace", "funcgraph_exit"); + if (!event) + return -1; + + finfo->fgraph_ret_id = event->id; + finfo->fgraph_ret_event = event; + return 0; +} + +#define ret_event_check(finfo, pevent) \ + do { \ + if (!finfo->fgraph_ret_event && find_ret_event(finfo, pevent) < 0) \ + return -1; \ + } while (0) + +static int function_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct tep_handle *pevent = event->tep; + unsigned long long function; + const char *func; + + if (tep_get_field_val(s, event, "ip", record, &function, 1)) + return trace_seq_putc(s, '!'); + + func = tep_find_function(pevent, function); + if (func) + trace_seq_printf(s, "%s <-- ", func); + else + trace_seq_printf(s, "0x%llx", function); + + if (tep_get_field_val(s, event, "parent_ip", record, &function, 1)) + return trace_seq_putc(s, '!'); + + func = tep_find_function(pevent, function); + if (func) + trace_seq_printf(s, "%s", func); + else + trace_seq_printf(s, "0x%llx", function); + + return 0; +} + +#define TRACE_GRAPH_INDENT 2 + +static struct tep_record * +get_return_for_leaf(struct trace_seq *s, int cpu, int cur_pid, + unsigned long long cur_func, struct tep_record *next, + struct tracecmd_ftrace *finfo) +{ + unsigned long long val; + unsigned long long type; + unsigned long long pid; + + /* Searching a common field, can use any event */ + if (tep_get_common_field_val(s, finfo->fgraph_ret_event, "common_type", next, &type, 1)) + return NULL; + + if (type != finfo->fgraph_ret_id) + return NULL; + + if (tep_get_common_field_val(s, finfo->fgraph_ret_event, "common_pid", next, &pid, 1)) + return NULL; + + if (cur_pid != pid) + return NULL; + + /* We aleady know this is a funcgraph_ret_event */ + if (tep_get_field_val(s, finfo->fgraph_ret_event, "func", next, &val, 1)) + return NULL; + + if (cur_func != val) + return NULL; + + /* this is a leaf, now advance the iterator */ + return tracecmd_read_data(tracecmd_curr_thread_handle, cpu); +} + +/* Signal a overhead of time execution to the output */ +static void print_graph_overhead(struct trace_seq *s, + unsigned long long duration) +{ + /* Non nested entry or return */ + if (duration == ~0ULL) + return (void)trace_seq_printf(s, " "); + + /* Duration exceeded 1 sec */ + if (duration > 1000000000ULL) + return (void)trace_seq_printf(s, "$ "); + + /* Duration exceeded 1000 usecs */ + if (duration > 1000000ULL) + return (void)trace_seq_printf(s, "# "); + + /* Duration exceeded 100 usecs */ + if (duration > 100000ULL) + return (void)trace_seq_printf(s, "! "); + + /* Duration exceeded 10 usecs */ + if (duration > 10000ULL) + return (void)trace_seq_printf(s, "+ "); + + trace_seq_printf(s, " "); +} + +static void print_graph_duration(struct trace_seq *s, unsigned long long duration) +{ + unsigned long usecs = duration / 1000; + unsigned long nsecs_rem = duration % 1000; + /* log10(ULONG_MAX) + '\0' */ + char msecs_str[21]; + char nsecs_str[5]; + int len; + int i; + + sprintf(msecs_str, "%lu", usecs); + + /* Print msecs */ + len = s->len; + trace_seq_printf(s, "%lu", usecs); + + /* Print nsecs (we don't want to exceed 7 numbers) */ + if ((s->len - len) < 7) { + snprintf(nsecs_str, MIN(sizeof(nsecs_str), 8 - len), "%03lu", nsecs_rem); + trace_seq_printf(s, ".%s", nsecs_str); + } + + len = s->len - len; + + trace_seq_puts(s, " us "); + + /* Print remaining spaces to fit the row's width */ + for (i = len; i < 7; i++) + trace_seq_putc(s, ' '); + + trace_seq_puts(s, "| "); +} + +static int +print_graph_entry_leaf(struct trace_seq *s, + struct tep_event *event, + struct tep_record *record, + struct tep_record *ret_rec, + struct tracecmd_ftrace *finfo) +{ + struct tep_handle *pevent = event->tep; + unsigned long long rettime, calltime; + unsigned long long duration, depth; + unsigned long long val; + const char *func; + int ret; + int i; + + if (tep_get_field_val(s, finfo->fgraph_ret_event, "rettime", ret_rec, &rettime, 1)) + return trace_seq_putc(s, '!'); + + if (tep_get_field_val(s, finfo->fgraph_ret_event, "calltime", ret_rec, &calltime, 1)) + return trace_seq_putc(s, '!'); + + duration = rettime - calltime; + + /* Overhead */ + print_graph_overhead(s, duration); + + /* Duration */ + print_graph_duration(s, duration); + + if (tep_get_field_val(s, event, "depth", record, &depth, 1)) + return trace_seq_putc(s, '!'); + + /* Function */ + for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++) + trace_seq_putc(s, ' '); + + if (tep_get_field_val(s, event, "func", record, &val, 1)) + return trace_seq_putc(s, '!'); + func = tep_find_function(pevent, val); + + if (func) + ret = trace_seq_printf(s, "%s();", func); + else + ret = trace_seq_printf(s, "%llx();", val); + + if (ret && fgraph_depth->set) + ret = trace_seq_printf(s, " (%lld)", depth); + + return ret; +} + +static int print_graph_nested(struct trace_seq *s, + struct tep_event *event, + struct tep_record *record) +{ + struct tep_handle *pevent = event->tep; + unsigned long long depth; + unsigned long long val; + const char *func; + int ret; + int i; + + /* No overhead */ + print_graph_overhead(s, -1); + + /* No time */ + trace_seq_puts(s, " | "); + + if (tep_get_field_val(s, event, "depth", record, &depth, 1)) + return trace_seq_putc(s, '!'); + + /* Function */ + for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++) + trace_seq_putc(s, ' '); + + if (tep_get_field_val(s, event, "func", record, &val, 1)) + return trace_seq_putc(s, '!'); + + func = tep_find_function(pevent, val); + + if (func) + ret = trace_seq_printf(s, "%s() {", func); + else + ret = trace_seq_printf(s, "%llx() {", val); + + if (ret && fgraph_depth->set) + ret = trace_seq_printf(s, " (%lld)", depth); + + return ret; +} + +static int +fgraph_ent_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct tracecmd_ftrace *finfo = context; + struct tep_record *rec; + unsigned long long val, pid; + int cpu; + + ret_event_check(finfo, event->tep); + + if (tep_get_common_field_val(s, event, "common_pid", record, &pid, 1)) + return trace_seq_putc(s, '!'); + + if (tep_get_field_val(s, event, "func", record, &val, 1)) + return trace_seq_putc(s, '!'); + + rec = tracecmd_peek_next_data(tracecmd_curr_thread_handle, &cpu); + if (rec) + rec = get_return_for_leaf(s, cpu, pid, val, rec, finfo); + + if (rec) { + /* + * If this is a leaf function, then get_return_for_leaf + * returns the return of the function + */ + print_graph_entry_leaf(s, event, record, rec, finfo); + tracecmd_free_record(rec); + } else + print_graph_nested(s, event, record); + + return 0; +} + +static int +fgraph_ret_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct tracecmd_ftrace *finfo = context; + unsigned long long rettime, calltime; + unsigned long long duration, depth; + unsigned long long val; + const char *func; + int i; + + ret_event_check(finfo, event->tep); + + if (tep_get_field_val(s, event, "rettime", record, &rettime, 1)) + return trace_seq_putc(s, '!'); + + if (tep_get_field_val(s, event, "calltime", record, &calltime, 1)) + return trace_seq_putc(s, '!'); + + duration = rettime - calltime; + + /* Overhead */ + print_graph_overhead(s, duration); + + /* Duration */ + print_graph_duration(s, duration); + + if (tep_get_field_val(s, event, "depth", record, &depth, 1)) + return trace_seq_putc(s, '!'); + + /* Function */ + for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++) + trace_seq_putc(s, ' '); + + trace_seq_putc(s, '}'); + + if (fgraph_tail->set) { + if (tep_get_field_val(s, event, "func", record, &val, 0)) + return 0; + func = tep_find_function(event->tep, val); + if (!func) + return 0; + trace_seq_printf(s, " /* %s */", func); + } + + if (fgraph_depth->set) + trace_seq_printf(s, " (%lld)", depth); + + return 0; +} + +/** + * tracecmd_ftrace_load_options - load the ftrace options + * + * This routine is used for trace-cmd list, to load the builtin + * ftrace options in order to list them. As the list command does + * not load a trace.dat file where this would normally be loaded. + */ +void tracecmd_ftrace_load_options(void) +{ + tep_plugin_add_options("ftrace", trace_ftrace_options); +} + +int tracecmd_ftrace_overrides(struct tracecmd_input *handle, + struct tracecmd_ftrace *finfo) +{ + struct tep_handle *pevent; + struct tep_event *event; + + finfo->handle = handle; + + pevent = tracecmd_get_tep(handle); + + tep_register_event_handler(pevent, -1, "ftrace", "function", + function_handler, NULL); + + tep_register_event_handler(pevent, -1, "ftrace", "funcgraph_entry", + fgraph_ent_handler, finfo); + + tep_register_event_handler(pevent, -1, "ftrace", "funcgraph_exit", + fgraph_ret_handler, finfo); + + tep_plugin_add_options("ftrace", trace_ftrace_options); + + /* Store the func ret id and event for later use */ + event = tep_find_event_by_name(pevent, "ftrace", "funcgraph_exit"); + if (!event) + return 0; + + finfo->long_size = tracecmd_long_size(handle); + + finfo->fgraph_ret_id = event->id; + finfo->fgraph_ret_event = event; + + return 0; +} diff --git a/lib/trace-cmd/trace-hash.c b/lib/trace-cmd/trace-hash.c new file mode 100644 index 00000000..bed97323 --- /dev/null +++ b/lib/trace-cmd/trace-hash.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2014, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <errno.h> + +#include "trace-cmd-private.h" +#include "trace-hash.h" + +int __hidden trace_hash_init(struct trace_hash *hash, int buckets) +{ + memset(hash, 0, sizeof(*hash)); + + hash->buckets = calloc(sizeof(*hash->buckets), buckets); + if (!hash->buckets) + return -ENOMEM; + hash->nr_buckets = buckets; + + /* If a power of two then we can shortcut */ + if (!(buckets & (buckets - 1))) + hash->power = buckets - 1; + + return 0; +} + +void __hidden trace_hash_free(struct trace_hash *hash) +{ + free(hash->buckets); +} + +int __hidden trace_hash_empty(struct trace_hash *hash) +{ + struct trace_hash_item **bucket; + + trace_hash_for_each_bucket(bucket, hash) + if (*bucket) + return 0; + return 1; +} + +int __hidden trace_hash_add(struct trace_hash *hash, struct trace_hash_item *item) +{ + struct trace_hash_item *next; + int bucket = hash->power ? item->key & hash->power : + item->key % hash->nr_buckets; + + if (hash->buckets[bucket]) { + next = hash->buckets[bucket]; + next->prev = item; + } else + next = NULL; + + item->next = next; + item->prev = (struct trace_hash_item *)&hash->buckets[bucket]; + + hash->buckets[bucket] = item; + + return 1; +} + + __hidden struct trace_hash_item * +trace_hash_find(struct trace_hash *hash, unsigned long long key, + trace_hash_func match, void *data) +{ + struct trace_hash_item *item; + int bucket = hash->power ? key & hash->power : + key % hash->nr_buckets; + + for (item = hash->buckets[bucket]; item; item = item->next) { + if (item->key == key) { + if (!match) + return item; + if (match(item, data)) + return item; + } + } + + return NULL; +} diff --git a/lib/trace-cmd/trace-hooks.c b/lib/trace-cmd/trace-hooks.c new file mode 100644 index 00000000..a58b5356 --- /dev/null +++ b/lib/trace-cmd/trace-hooks.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2015 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> + +#include "trace-cmd-private.h" +#include "trace-cmd-local.h" +#include "event-utils.h" + +struct hook_list *tracecmd_create_event_hook(const char *arg) +{ + struct hook_list *hook; + char *system = NULL; + char *event; + char *match; + char *flags = NULL; + char *pid = NULL; + char *str; + char *tok; + int index; + int ch; + int i; + + hook = malloc(sizeof(*hook)); + if (!hook) + return NULL; + memset(hook, 0, sizeof(*hook)); + + str = strdup(arg); + if (!str) { + free(hook); + return NULL; + } + + hook->str = str; + hook->hook = arg; + + /* + * Hooks are in the form of: + * [<start_system>:]<start_event>,<start_match>[,<start_pid>]/ + * [<end_system>:]<end_event>,<end_match>[,<flags>] + * + * Where start_system, start_pid, end_system, and flags are all + * optional. + * + * Flags are (case insensitive): + * P - pinned to cpu (wont migrate) + * G - global, not hooked to task - currently ignored. + * S - save stacks for this event. + */ + tok = strtok(str, ":,"); + if (!tok) + goto invalid_tok; + + /* See what the token was from the original arg */ + index = strlen(tok); + if (arg[index] == ':') { + /* this is a system, the next token must be ',' */ + system = tok; + tok = strtok(NULL, ","); + if (!tok) + goto invalid_tok; + } + event = tok; + + tok = strtok(NULL, ",/"); + if (!tok) + goto invalid_tok; + match = tok; + index = strlen(tok) + tok - str; + if (arg[index] == ',') { + tok = strtok(NULL, "/"); + if (!tok) + goto invalid_tok; + pid = tok; + } + + hook->start_system = system; + hook->start_event = event; + hook->start_match = match; + hook->pid = pid; + + /* Now process the end event */ + system = NULL; + + tok = strtok(NULL, ":,"); + if (!tok) + goto invalid_tok; + + /* See what the token was from the original arg */ + index = tok - str + strlen(tok); + if (arg[index] == ':') { + /* this is a system, the next token must be ',' */ + system = tok; + tok = strtok(NULL, ","); + if (!tok) + goto invalid_tok; + } + event = tok; + + tok = strtok(NULL, ","); + if (!tok) + goto invalid_tok; + match = tok; + index = strlen(tok) + tok - str; + if (arg[index] == ',') { + tok = strtok(NULL, ""); + if (!tok) + goto invalid_tok; + flags = tok; + } + + hook->end_system = system; + hook->end_event = event; + hook->end_match = match; + hook->migrate = 1; + if (flags) { + for (i = 0; flags[i]; i++) { + ch = tolower(flags[i]); + switch (ch) { + case 'p': + hook->migrate = 0; + break; + case 'g': + hook->global = 1; + break; + case 's': + hook->stack = 1; + break; + default: + tracecmd_warning("unknown flag %c", flags[i]); + } + } + } + + printf("start %s:%s:%s (%s) end %s:%s:%s (%s)\n", + hook->start_system, + hook->start_event, + hook->start_match, + hook->pid, + hook->end_system, + hook->end_event, + hook->end_match, + flags); + return hook; + +invalid_tok: + tracecmd_warning("Invalid hook format '%s'", arg); + return NULL; +} + +void tracecmd_free_hooks(struct hook_list *hooks) +{ + struct hook_list *hook; + + while (hooks) { + hook = hooks; + hooks = hooks->next; + + free(hook->str); + free(hook); + } +} diff --git a/lib/trace-cmd/trace-input.c b/lib/trace-cmd/trace-input.c new file mode 100644 index 00000000..8ffdf04b --- /dev/null +++ b/lib/trace-cmd/trace-input.c @@ -0,0 +1,5886 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#define _LARGEFILE64_SOURCE +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <regex.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> + +#include <linux/time64.h> + +#include "trace-write-local.h" +#include "trace-cmd-local.h" +#include "trace-local.h" +#include "kbuffer.h" +#include "list.h" + +#define _STRINGIFY(x) #x +#define STRINGIFY(x) _STRINGIFY(x) + +#define MISSING_EVENTS (1 << 31) +#define MISSING_STORED (1 << 30) + +#define COMMIT_MASK ((1 << 27) - 1) + +/* force uncompressing in memory */ +#define INMEMORY_DECOMPRESS + +/* for debugging read instead of mmap */ +static int force_read = 0; + +struct page_map { + struct list_head list; + off64_t offset; + off64_t size; + void *map; + int ref_count; +}; + +struct page { + struct list_head list; + off64_t offset; + struct tracecmd_input *handle; + struct page_map *page_map; + void *map; + int ref_count; + int cpu; + long long lost_events; +#if DEBUG_RECORD + struct tep_record *records; +#endif +}; + +struct zchunk_cache { + struct list_head list; + struct tracecmd_compress_chunk *chunk; + void *map; + int ref; +}; + +struct cpu_zdata { + /* uncompressed cpu data */ + int fd; +#ifdef __ANDROID__ + char file[37]; /* strlen(COMPR_TEMP_FILE) */ +#else /* !__ANDROID__ */ + char file[26]; /* strlen(COMPR_TEMP_FILE) */ +#endif /* __ANDROID__ */ + + unsigned int count; + unsigned int last_chunk; + struct list_head cache; + struct tracecmd_compress_chunk *chunks; +}; + +#ifdef __ANDROID__ +#define COMPR_TEMP_FILE "/data/local/tmp/trace_cpu_dataXXXXXX" +#else /* !__ANDROID__ */ +#define COMPR_TEMP_FILE "/tmp/trace_cpu_dataXXXXXX" +#endif /* __ANDROID__ */ + +struct cpu_data { + /* the first two never change */ + unsigned long long file_offset; + unsigned long long file_size; + unsigned long long offset; + unsigned long long size; + unsigned long long timestamp; + unsigned long long first_ts; + struct list_head page_maps; + struct page_map *page_map; + struct page **pages; + struct tep_record *next; + struct page *page; + struct kbuffer *kbuf; + int nr_pages; + int page_cnt; + int cpu; + int pipe_fd; + struct cpu_zdata compress; +}; + +struct cpu_file_data { + int cpu; + unsigned long long offset; + unsigned long long size; +}; + +struct input_buffer_instance { + char *name; + size_t offset; + char *clock; + bool latency; + int page_size; + int cpus; + struct cpu_file_data *cpu_data; +}; + +struct ts_offset_sample { + long long time; + long long offset; + long long scaling; + long long fraction; +}; + +struct guest_trace_info { + struct guest_trace_info *next; + char *name; + unsigned long long trace_id; + int vcpu_count; + int *cpu_pid; +}; + +struct timesync_offsets { + int ts_samples_count; + struct ts_offset_sample *ts_samples; +}; + +struct host_trace_info { + unsigned long long peer_trace_id; + unsigned int flags; + bool sync_enable; + int ts_samples_count; + struct ts_offset_sample *ts_samples; + int cpu_count; + struct timesync_offsets *ts_offsets; +}; + +struct tsc2nsec { + int mult; + int shift; + unsigned long long offset; +}; + +struct file_section { + unsigned long long section_offset; + unsigned long long data_offset; + int id; + int flags; + struct file_section *next; +}; + +struct tracecmd_input { + struct tep_handle *pevent; + struct tep_plugin_list *plugin_list; + struct tracecmd_input *parent; + unsigned long file_state; + unsigned long long trace_id; + unsigned long long next_offset; + unsigned long flags; + int fd; + int long_size; + int page_size; + int page_map_size; + int max_cpu; + int cpus; + int ref; + int nr_buffers; /* buffer instances */ + bool use_trace_clock; + bool read_page; + bool use_pipe; + bool read_zpage; /* uncompress pages in memory, do not use tmp files */ + bool cpu_compressed; + int file_version; + unsigned int cpustats_size; + struct cpu_zdata latz; + struct cpu_data *cpu_data; + long long ts_offset; + struct tsc2nsec tsc_calc; + + unsigned int strings_size; /* size of the metadata strings */ + char *strings; /* metadata strings */ + + bool read_compress; + struct tracecmd_compression *compress; + + struct host_trace_info host; + double ts2secs; + char * cpustats; + char * uname; + char * version; + char * trace_clock; + struct input_buffer_instance top_buffer; + struct input_buffer_instance *buffers; + int parsing_failures; + struct guest_trace_info *guest; + + struct tracecmd_ftrace finfo; + + struct hook_list *hooks; + struct pid_addr_maps *pid_maps; + /* file information */ + struct file_section *sections; + bool options_init; + unsigned long long options_start; + unsigned long long options_last_offset; + size_t total_file_size; + + /* For custom profilers. */ + tracecmd_show_data_func show_data_func; +}; + +__thread struct tracecmd_input *tracecmd_curr_thread_handle; + +#define CHECK_READ_STATE(H, S) ((H)->file_version < FILE_VERSION_SECTIONS && (H)->file_state >= (S)) +#define HAS_SECTIONS(H) ((H)->flags & TRACECMD_FL_SECTIONED) +#define HAS_COMPRESSION(H) ((H)->flags & TRACECMD_FL_COMPRESSION) + +static int read_options_type(struct tracecmd_input *handle); + +void tracecmd_set_flag(struct tracecmd_input *handle, int flag) +{ + handle->flags |= flag; +} + +void tracecmd_clear_flag(struct tracecmd_input *handle, int flag) +{ + handle->flags &= ~flag; +} + +unsigned long tracecmd_get_flags(struct tracecmd_input *handle) +{ + return handle->flags; +} + +enum tracecmd_file_states tracecmd_get_file_state(struct tracecmd_input *handle) +{ + return handle->file_state; +} + +#if DEBUG_RECORD +static void remove_record(struct page *page, struct tep_record *record) +{ + if (record->prev) + record->prev->next = record->next; + else + page->records = record->next; + if (record->next) + record->next->prev = record->prev; +} +static void add_record(struct page *page, struct tep_record *record) +{ + if (page->records) + page->records->prev = record; + record->next = page->records; + record->prev = NULL; + page->records = record; +} +static const char *show_records(struct page **pages, int nr_pages) +{ + static char buf[BUFSIZ + 1]; + struct tep_record *record; + struct page *page; + int len; + int i; + + memset(buf, 0, sizeof(buf)); + len = 0; + for (i = 0; i < nr_pages; i++) { + page = pages[i]; + if (!page) + continue; + for (record = page->records; record; record = record->next) { + int n; + n = snprintf(buf+len, BUFSIZ - len, " 0x%lx", record->alloc_addr); + len += n; + if (len >= BUFSIZ) + break; + } + } + return buf; +} +#else +static inline void remove_record(struct page *page, struct tep_record *record) {} +static inline void add_record(struct page *page, struct tep_record *record) {} +static const char *show_records(struct page **pages, int nr_pages) +{ + return ""; +} +#endif + +static int init_cpu(struct tracecmd_input *handle, int cpu); + +static ssize_t do_read_fd(int fd, void *data, size_t size) +{ + ssize_t tot = 0; + ssize_t r; + + do { + r = read(fd, data + tot, size - tot); + tot += r; + + if (!r) + break; + if (r < 0) + return r; + } while (tot != size); + + return tot; +} + +static inline int do_lseek(struct tracecmd_input *handle, int offset, int whence) +{ + if (handle->read_compress) + return tracecmd_compress_lseek(handle->compress, offset, whence); + else + return lseek(handle->fd, offset, whence); +} + +static inline ssize_t do_read(struct tracecmd_input *handle, void *data, size_t size) +{ + if (handle->read_compress) + return tracecmd_compress_buffer_read(handle->compress, data, size); + else + return do_read_fd(handle->fd, data, size); +} + +static ssize_t +do_read_check(struct tracecmd_input *handle, void *data, size_t size) +{ + ssize_t ret; + + ret = do_read(handle, data, size); + if (ret < 0) + return ret; + if (ret != size) + return -1; + + return 0; +} + +static char *read_string(struct tracecmd_input *handle) +{ + char buf[BUFSIZ]; + char *str = NULL; + size_t size = 0; + ssize_t i; + ssize_t r; + + for (;;) { + r = do_read(handle, buf, BUFSIZ); + if (r <= 0) + goto fail; + + for (i = 0; i < r; i++) { + if (!buf[i]) + break; + } + if (i < r) + break; + + if (str) { + size += BUFSIZ; + str = realloc(str, size); + if (!str) + return NULL; + memcpy(str + (size - BUFSIZ), buf, BUFSIZ); + } else { + size = BUFSIZ; + str = malloc(size); + if (!str) + return NULL; + memcpy(str, buf, size); + } + } + + /* move the file descriptor to the end of the string */ + r = do_lseek(handle, -(r - (i+1)), SEEK_CUR); + if (r < 0) + goto fail; + + if (str) { + size += i + 1; + str = realloc(str, size); + if (!str) + return NULL; + memcpy(str + (size - i), buf, i); + str[size] = 0; + } else { + size = i + 1; + str = malloc(size); + if (!str) + return NULL; + memcpy(str, buf, i); + str[i] = 0; + } + + return str; + + fail: + if (str) + free(str); + return NULL; +} + +static int read2(struct tracecmd_input *handle, unsigned short *size) +{ + struct tep_handle *pevent = handle->pevent; + unsigned short data; + + if (do_read_check(handle, &data, 2)) + return -1; + + *size = tep_read_number(pevent, &data, 2); + return 0; +} + +static int read4(struct tracecmd_input *handle, unsigned int *size) +{ + struct tep_handle *pevent = handle->pevent; + unsigned int data; + + if (do_read_check(handle, &data, 4)) + return -1; + + *size = tep_read_number(pevent, &data, 4); + return 0; +} + +static int read8(struct tracecmd_input *handle, unsigned long long *size) +{ + struct tep_handle *pevent = handle->pevent; + unsigned long long data; + + if (do_read_check(handle, &data, 8)) + return -1; + + *size = tep_read_number(pevent, &data, 8); + return 0; +} + +__hidden void in_uncompress_reset(struct tracecmd_input *handle) +{ + if (handle->compress) { + handle->read_compress = false; + tracecmd_compress_reset(handle->compress); + } +} + +__hidden int in_uncompress_block(struct tracecmd_input *handle) +{ + int ret = 0; + + if (handle->compress) { + ret = tracecmd_uncompress_block(handle->compress); + if (!ret) + handle->read_compress = true; + } + return ret; +} + +static struct file_section *section_get(struct tracecmd_input *handle, int id) +{ + struct file_section *sec; + + for (sec = handle->sections; sec; sec = sec->next) { + if (sec->id == id) + return sec; + } + + return NULL; +} + +static struct file_section *section_open(struct tracecmd_input *handle, int id) +{ + struct file_section *sec = section_get(handle, id); + + if (!sec) + return NULL; + + if (lseek64(handle->fd, sec->data_offset, SEEK_SET) == (off64_t)-1) + return NULL; + + if ((sec->flags & TRACECMD_SEC_FL_COMPRESS) && in_uncompress_block(handle)) + return NULL; + + return sec; +} + +static void section_close(struct tracecmd_input *handle, struct file_section *sec) +{ + if (sec->flags & TRACECMD_SEC_FL_COMPRESS) + in_uncompress_reset(handle); +} + +static int section_add_or_update(struct tracecmd_input *handle, int id, int flags, + unsigned long long section_offset, + unsigned long long data_offset) +{ + struct file_section *sec = section_get(handle, id); + + if (!sec) { + sec = calloc(1, sizeof(struct file_section)); + if (!sec) + return -1; + sec->next = handle->sections; + handle->sections = sec; + sec->id = id; + } + + if (section_offset) + sec->section_offset = section_offset; + if (data_offset) + sec->data_offset = data_offset; + if (flags >= 0) + sec->flags = flags; + + return 0; +} + +static int read_header_files(struct tracecmd_input *handle) +{ + struct tep_handle *pevent = handle->pevent; + unsigned long long size; + char *header; + char buf[BUFSIZ]; + + if (CHECK_READ_STATE(handle, TRACECMD_FILE_HEADERS)) + return 0; + + if (!HAS_SECTIONS(handle)) + section_add_or_update(handle, TRACECMD_OPTION_HEADER_INFO, 0, 0, + lseek64(handle->fd, 0, SEEK_CUR)); + + if (do_read_check(handle, buf, 12)) + return -1; + + if (memcmp(buf, "header_page", 12) != 0) + return -1; + + if (read8(handle, &size) < 0) + return -1; + + header = malloc(size); + if (!header) + return -1; + + if (do_read_check(handle, header, size)) + goto failed_read; + + tep_parse_header_page(pevent, header, size, handle->long_size); + free(header); + + /* + * The size field in the page is of type long, + * use that instead, since it represents the kernel. + */ + handle->long_size = tep_get_header_page_size(pevent); + + if (do_read_check(handle, buf, 13)) + return -1; + + if (memcmp(buf, "header_event", 13) != 0) + return -1; + + if (read8(handle, &size) < 0) + return -1; + + header = malloc(size); + if (!header) + return -1; + + if (do_read_check(handle, header, size)) + goto failed_read; + + free(header); + + handle->file_state = TRACECMD_FILE_HEADERS; + + return 0; + + failed_read: + free(header); + return -1; +} + +static int regex_event_buf(const char *file, int size, regex_t *epreg) +{ + char *buf; + char *line; + int ret; + + buf = malloc(size + 1); + if (!buf) { + tracecmd_warning("Insufficient memory"); + return 0; + } + + strncpy(buf, file, size); + buf[size] = 0; + + /* get the name from the first line */ + line = strtok(buf, "\n"); + if (!line) { + tracecmd_warning("No newline found in '%s'", buf); + return 0; + } + /* skip name if it is there */ + if (strncmp(line, "name: ", 6) == 0) + line += 6; + + ret = regexec(epreg, line, 0, NULL, 0) == 0; + + free(buf); + + return ret; +} + +static int read_ftrace_file(struct tracecmd_input *handle, + unsigned long long size, + int print, regex_t *epreg) +{ + struct tep_handle *pevent = handle->pevent; + char *buf; + + buf = malloc(size); + if (!buf) + return -1; + if (do_read_check(handle, buf, size)) { + free(buf); + return -1; + } + + if (epreg) { + if (print || regex_event_buf(buf, size, epreg)) + printf("%.*s\n", (int)size, buf); + } else { + if (tep_parse_event(pevent, buf, size, "ftrace")) + handle->parsing_failures++; + } + free(buf); + + return 0; +} + +static int read_event_file(struct tracecmd_input *handle, + char *system, unsigned long long size, + int print, int *sys_printed, + regex_t *epreg) +{ + struct tep_handle *pevent = handle->pevent; + char *buf; + + buf = malloc(size); + if (!buf) + return -1; + + if (do_read_check(handle, buf, size)) { + free(buf); + return -1; + } + + if (epreg) { + if (print || regex_event_buf(buf, size, epreg)) { + if (!*sys_printed) { + printf("\nsystem: %s\n", system); + *sys_printed = 1; + } + printf("%.*s\n", (int)size, buf); + } + } else { + if (tep_parse_event(pevent, buf, size, system)) + handle->parsing_failures++; + } + free(buf); + + return 0; +} + +static int make_preg_files(const char *regex, regex_t *system, + regex_t *event, int *unique) +{ + char *buf; + char *sstr; + char *estr; + int ret; + + /* unique is set if a colon is found */ + *unique = 0; + + /* split "system:event" into "system" and "event" */ + + buf = strdup(regex); + if (!buf) + return -ENOMEM; + + sstr = strtok(buf, ":"); + estr = strtok(NULL, ":"); + + /* If no colon is found, set event == system */ + if (!estr) + estr = sstr; + else + *unique = 1; + + ret = regcomp(system, sstr, REG_ICASE|REG_NOSUB); + if (ret) { + tracecmd_warning("Bad regular expression '%s'", sstr); + goto out; + } + + ret = regcomp(event, estr, REG_ICASE|REG_NOSUB); + if (ret) { + tracecmd_warning("Bad regular expression '%s'", estr); + goto out; + } + + out: + free(buf); + return ret; +} + +static int read_ftrace_files(struct tracecmd_input *handle, const char *regex) +{ + unsigned long long size; + regex_t spreg; + regex_t epreg; + regex_t *sreg = NULL; + regex_t *ereg = NULL; + unsigned int count, i; + int print_all = 0; + int unique; + int ret; + + if (CHECK_READ_STATE(handle, TRACECMD_FILE_FTRACE_EVENTS)) + return 0; + + if (!HAS_SECTIONS(handle)) + section_add_or_update(handle, TRACECMD_OPTION_FTRACE_EVENTS, 0, 0, + lseek64(handle->fd, 0, SEEK_CUR)); + + if (regex) { + sreg = &spreg; + ereg = &epreg; + ret = make_preg_files(regex, sreg, ereg, &unique); + if (ret) + return -1; + + if (regexec(sreg, "ftrace", 0, NULL, 0) == 0) { + /* + * If the system matches a regex that did + * not contain a colon, then print all events. + */ + if (!unique) + print_all = 1; + } else if (unique) { + /* + * The user specified a unique event that did + * not match the ftrace system. Don't print any + * events here. + */ + regfree(sreg); + regfree(ereg); + sreg = NULL; + ereg = NULL; + } + } + + ret = read4(handle, &count); + if (ret < 0) + goto out; + + for (i = 0; i < count; i++) { + ret = read8(handle, &size); + if (ret < 0) + goto out; + ret = read_ftrace_file(handle, size, print_all, ereg); + if (ret < 0) + goto out; + } + + handle->file_state = TRACECMD_FILE_FTRACE_EVENTS; + ret = 0; +out: + if (sreg) { + regfree(sreg); + regfree(ereg); + } + + return ret; +} + +static int read_event_files(struct tracecmd_input *handle, const char *regex) +{ + unsigned long long size; + char *system = NULL; + regex_t spreg; + regex_t epreg; + regex_t *sreg = NULL; + regex_t *ereg = NULL; + regex_t *reg; + unsigned int systems; + unsigned int count; + unsigned int i, x; + int print_all; + int sys_printed; + int unique; + int ret; + + if (CHECK_READ_STATE(handle, TRACECMD_FILE_ALL_EVENTS)) + return 0; + + if (!HAS_SECTIONS(handle)) + section_add_or_update(handle, TRACECMD_OPTION_EVENT_FORMATS, 0, 0, + lseek64(handle->fd, 0, SEEK_CUR)); + + if (regex) { + sreg = &spreg; + ereg = &epreg; + ret = make_preg_files(regex, sreg, ereg, &unique); + if (ret) + return -1; + } + + ret = read4(handle, &systems); + if (ret < 0) + goto out; + + for (i = 0; i < systems; i++) { + system = read_string(handle); + if (!system) { + ret = -1; + goto out; + } + + sys_printed = 0; + print_all = 0; + reg = ereg; + + if (sreg) { + if (regexec(sreg, system, 0, NULL, 0) == 0) { + /* + * If the user passed in a regex that + * did not contain a colon, then we can + * print all the events of this system. + */ + if (!unique) + print_all = 1; + } else if (unique) { + /* + * The user passed in a unique event that + * specified a specific system and event. + * Since this system doesn't match this + * event, then we don't print any events + * for this system. + */ + reg = NULL; + } + } + + ret = read4(handle, &count); + if (ret < 0) + goto out; + + for (x=0; x < count; x++) { + ret = read8(handle, &size); + if (ret < 0) + goto out; + + ret = read_event_file(handle, system, size, + print_all, &sys_printed, + reg); + if (ret < 0) + goto out; + } + free(system); + } + system = NULL; + + handle->file_state = TRACECMD_FILE_ALL_EVENTS; + ret = 0; + out: + if (sreg) { + regfree(sreg); + regfree(ereg); + } + + free(system); + return ret; +} + +static int read_proc_kallsyms(struct tracecmd_input *handle) +{ + struct tep_handle *tep = handle->pevent; + unsigned int size; + char *buf; + + if (CHECK_READ_STATE(handle, TRACECMD_FILE_KALLSYMS)) + return 0; + if (!HAS_SECTIONS(handle)) + section_add_or_update(handle, TRACECMD_OPTION_KALLSYMS, 0, 0, + lseek64(handle->fd, 0, SEEK_CUR)); + + if (read4(handle, &size) < 0) + return -1; + if (!size) { + handle->file_state = TRACECMD_FILE_KALLSYMS; + return 0; /* OK? */ + } + + buf = malloc(size+1); + if (!buf) + return -1; + if (do_read_check(handle, buf, size)){ + free(buf); + return -1; + } + buf[size] = 0; + + tep_parse_kallsyms(tep, buf); + + free(buf); + + handle->file_state = TRACECMD_FILE_KALLSYMS; + + return 0; +} + +static int read_ftrace_printk(struct tracecmd_input *handle) +{ + unsigned int size; + char *buf; + + if (CHECK_READ_STATE(handle, TRACECMD_FILE_PRINTK)) + return 0; + + if (!HAS_SECTIONS(handle)) + section_add_or_update(handle, TRACECMD_OPTION_PRINTK, 0, 0, + lseek64(handle->fd, 0, SEEK_CUR)); + + if (read4(handle, &size) < 0) + return -1; + if (!size) { + handle->file_state = TRACECMD_FILE_PRINTK; + return 0; /* OK? */ + } + + buf = malloc(size + 1); + if (!buf) + return -1; + if (do_read_check(handle, buf, size)) { + free(buf); + return -1; + } + + buf[size] = 0; + + tep_parse_printk_formats(handle->pevent, buf); + + free(buf); + + handle->file_state = TRACECMD_FILE_PRINTK; + + return 0; +} + +static int read_and_parse_cmdlines(struct tracecmd_input *handle); + +/** + * tracecmd_get_parsing_failures - get the count of parsing failures + * @handle: input handle for the trace.dat file + * + * This returns the count of failures while parsing the event files + */ +int tracecmd_get_parsing_failures(struct tracecmd_input *handle) +{ + if (handle) + return handle->parsing_failures; + return 0; +} + +static int read_cpus(struct tracecmd_input *handle) +{ + unsigned int cpus; + + if (CHECK_READ_STATE(handle, TRACECMD_FILE_CPU_COUNT)) + return 0; + + if (read4(handle, &cpus) < 0) + return -1; + + handle->cpus = cpus; + handle->max_cpu = cpus; + tep_set_cpus(handle->pevent, handle->cpus); + handle->file_state = TRACECMD_FILE_CPU_COUNT; + + return 0; +} + +static int read_headers_v6(struct tracecmd_input *handle, enum tracecmd_file_states state, + const char *regex) +{ + int ret; + + /* Set to read all if state is zero */ + if (!state) + state = TRACECMD_FILE_OPTIONS; + + if (state <= handle->file_state) + return 0; + + handle->parsing_failures = 0; + + ret = read_header_files(handle); + if (ret < 0) + return -1; + + if (state <= handle->file_state) + return 0; + + ret = read_ftrace_files(handle, NULL); + if (ret < 0) + return -1; + + if (state <= handle->file_state) + return 0; + + ret = read_event_files(handle, regex); + if (ret < 0) + return -1; + + if (state <= handle->file_state) + return 0; + + ret = read_proc_kallsyms(handle); + if (ret < 0) + return -1; + + if (state <= handle->file_state) + return 0; + + ret = read_ftrace_printk(handle); + if (ret < 0) + return -1; + + if (state <= handle->file_state) + return 0; + + if (read_and_parse_cmdlines(handle) < 0) + return -1; + + if (state <= handle->file_state) + return 0; + + if (read_cpus(handle) < 0) + return -1; + + if (state <= handle->file_state) + return 0; + + if (read_options_type(handle) < 0) + return -1; + + return 0; +} + +static int handle_options(struct tracecmd_input *handle); + +static const char *get_metadata_string(struct tracecmd_input *handle, int offset) +{ + if (!handle || !handle->strings || offset < 0 || handle->strings_size >= offset) + return NULL; + + return handle->strings + offset; +} + +static int read_section_header(struct tracecmd_input *handle, unsigned short *id, + unsigned short *flags, unsigned long long *size, const char **description) +{ + unsigned short fl; + unsigned short sec_id; + unsigned long long sz; + int desc; + + if (read2(handle, &sec_id)) + return -1; + if (read2(handle, &fl)) + return -1; + if (read4(handle, (unsigned int *)&desc)) + return -1; + if (read8(handle, &sz)) + return -1; + + if (id) + *id = sec_id; + if (flags) + *flags = fl; + if (size) + *size = sz; + if (description) + *description = get_metadata_string(handle, desc); + + return 0; +} + +static int handle_section(struct tracecmd_input *handle, struct file_section *section, + const char *regex) +{ + unsigned short id, flags; + unsigned long long size; + int ret; + + if (lseek64(handle->fd, section->section_offset, SEEK_SET) == (off_t)-1) + return -1; + if (read_section_header(handle, &id, &flags, &size, NULL)) + return -1; + section->flags = flags; + if (id != section->id) + return -1; + + section->data_offset = lseek64(handle->fd, 0, SEEK_CUR); + if ((section->flags & TRACECMD_SEC_FL_COMPRESS) && in_uncompress_block(handle)) + return -1; + + switch (section->id) { + case TRACECMD_OPTION_HEADER_INFO: + ret = read_header_files(handle); + break; + case TRACECMD_OPTION_FTRACE_EVENTS: + ret = read_ftrace_files(handle, NULL); + break; + case TRACECMD_OPTION_EVENT_FORMATS: + ret = read_event_files(handle, regex); + break; + case TRACECMD_OPTION_KALLSYMS: + ret = read_proc_kallsyms(handle); + break; + case TRACECMD_OPTION_PRINTK: + ret = read_ftrace_printk(handle); + break; + case TRACECMD_OPTION_CMDLINES: + ret = read_and_parse_cmdlines(handle); + break; + default: + ret = 0; + break; + } + + if (section->flags & TRACECMD_SEC_FL_COMPRESS) + in_uncompress_reset(handle); + + return ret; +} + +static int read_headers(struct tracecmd_input *handle, const char *regex) +{ + struct file_section *section; + + if (handle->options_init) + return 0; + + if (!handle->options_start) + return -1; + + if (lseek64(handle->fd, handle->options_start, SEEK_SET) == (off64_t)-1) { + tracecmd_warning("Filed to goto options offset %lld", handle->options_start); + return -1; + } + + if (handle_options(handle)) + return -1; + + section = handle->sections; + while (section) { + if (handle_section(handle, section, NULL)) + return -1; + section = section->next; + } + + handle->options_init = true; + return 0; +} + +/** + * tracecmd_read_headers - read the header information from trace.dat + * @handle: input handle for the trace.dat file + * @state: The state to read up to or zero to read up to options. + * + * This reads the trace.dat file for various information. Like the + * format of the ring buffer, event formats, ftrace formats, kallsyms + * and printk. This may be called multiple times with different @state + * values, to read partial data at a time. It will always continue + * where it left off. + */ +int tracecmd_read_headers(struct tracecmd_input *handle, + enum tracecmd_file_states state) +{ + if (!HAS_SECTIONS(handle)) + return read_headers_v6(handle, state, NULL); + return read_headers(handle, NULL); +} + +static unsigned long long calc_page_offset(struct tracecmd_input *handle, + unsigned long long offset) +{ + return offset & ~(handle->page_size - 1); +} + +static int read_page(struct tracecmd_input *handle, off64_t offset, + int cpu, void *map) +{ + off64_t save_seek; + off64_t ret; + + if (handle->use_pipe) { + ret = read(handle->cpu_data[cpu].pipe_fd, map, handle->page_size); + /* Set EAGAIN if the pipe is empty */ + if (ret < 0) { + errno = EAGAIN; + return -1; + + } else if (ret == 0) { + /* Set EINVAL when the pipe has closed */ + errno = EINVAL; + return -1; + } + return 0; + } + + /* other parts of the code may expect the pointer to not move */ + save_seek = lseek64(handle->fd, 0, SEEK_CUR); + + ret = lseek64(handle->fd, offset, SEEK_SET); + if (ret < 0) + return -1; + ret = read(handle->fd, map, handle->page_size); + if (ret < 0) + return -1; + + /* reset the file pointer back */ + lseek64(handle->fd, save_seek, SEEK_SET); + + return 0; +} + +/* page_map_size must be a power of two */ +static unsigned long long normalize_size(unsigned long long size) +{ + /* From Hacker's Delight: or bits after first set bit to all 1s */ + size |= (size >> 1); + size |= (size >> 2); + size |= (size >> 4); + size |= (size >> 8); + size |= (size >> 16); + size |= (size >> 32); + + /* Clear all bits except first one for previous power of two */ + return size - (size >> 1); +} + +static void free_page_map(struct page_map *page_map) +{ + page_map->ref_count--; + if (page_map->ref_count) + return; + + munmap(page_map->map, page_map->size); + list_del(&page_map->list); + free(page_map); +} + +#define CHUNK_CHECK_OFFSET(C, O) ((O) >= (C)->offset && (O) < ((C)->offset + (C)->size)) + +static int chunk_cmp(const void *A, const void *B) +{ + const struct tracecmd_compress_chunk *a = A; + const struct tracecmd_compress_chunk *b = B; + + if (CHUNK_CHECK_OFFSET(b, a->offset)) + return 0; + + if (b->offset < a->offset) + return -1; + + return 1; +} + +static struct tracecmd_compress_chunk *get_zchunk(struct cpu_data *cpu, off64_t offset) +{ + struct cpu_zdata *cpuz = &cpu->compress; + struct tracecmd_compress_chunk *chunk; + struct tracecmd_compress_chunk key; + + if (!cpuz->chunks) + return NULL; + + if (offset > (cpuz->chunks[cpuz->count - 1].offset + cpuz->chunks[cpuz->count - 1].size)) + return NULL; + + /* check if the requested offset is in the last requested chunk or in the next chunk */ + if (CHUNK_CHECK_OFFSET(cpuz->chunks + cpuz->last_chunk, offset)) + return cpuz->chunks + cpuz->last_chunk; + + cpuz->last_chunk++; + if (cpuz->last_chunk < cpuz->count && + CHUNK_CHECK_OFFSET(cpuz->chunks + cpuz->last_chunk, offset)) + return cpuz->chunks + cpuz->last_chunk; + + key.offset = offset; + chunk = bsearch(&key, cpuz->chunks, cpuz->count, sizeof(*chunk), chunk_cmp); + + if (!chunk) /* should never happen */ + return NULL; + + cpuz->last_chunk = chunk - cpuz->chunks; + return chunk; +} + +static void free_zpage(struct cpu_data *cpu_data, void *map) +{ + struct zchunk_cache *cache; + + list_for_each_entry(cache, &cpu_data->compress.cache, list) { + if (map <= cache->map && map > (cache->map + cache->chunk->size)) + goto found; + } + return; + +found: + cache->ref--; + if (cache->ref) + return; + list_del(&cache->list); + free(cache->map); + free(cache); +} + +static void *read_zpage(struct tracecmd_input *handle, int cpu, off64_t offset) +{ + struct cpu_data *cpu_data = &handle->cpu_data[cpu]; + struct tracecmd_compress_chunk *chunk; + struct zchunk_cache *cache; + void *map = NULL; + int pindex; + int size; + + offset -= cpu_data->file_offset; + + /* Look in the cache of already loaded chunks */ + list_for_each_entry(cache, &cpu_data->compress.cache, list) { + if (CHUNK_CHECK_OFFSET(cache->chunk, offset)) { + cache->ref++; + goto out; + } + } + + chunk = get_zchunk(cpu_data, offset); + if (!chunk) + return NULL; + + size = handle->page_size > chunk->size ? handle->page_size : chunk->size; + map = malloc(size); + if (!map) + return NULL; + + if (tracecmd_uncompress_chunk(handle->compress, chunk, map) < 0) + goto error; + + cache = calloc(1, sizeof(struct zchunk_cache)); + if (!cache) + goto error; + + cache->ref = 1; + cache->chunk = chunk; + cache->map = map; + list_add(&cache->list, &cpu_data->compress.cache); + + /* a chunk can hold multiple pages, get the requested one */ +out: + pindex = (offset - cache->chunk->offset) / handle->page_size; + return cache->map + (pindex * handle->page_size); +error: + free(map); + return NULL; +} + +static void *allocate_page_map(struct tracecmd_input *handle, + struct page *page, int cpu, off64_t offset) +{ + struct cpu_data *cpu_data = &handle->cpu_data[cpu]; + struct page_map *page_map; + off64_t map_size; + off64_t map_offset; + void *map; + int ret; + int fd; + + if (handle->cpu_compressed && handle->read_zpage) + return read_zpage(handle, cpu, offset); + + if (handle->read_page) { + map = malloc(handle->page_size); + if (!map) + return NULL; + ret = read_page(handle, offset, cpu, map); + if (ret < 0) { + free(map); + return NULL; + } + return map; + } + + map_size = handle->page_map_size; + map_offset = offset & ~(map_size - 1); + + if (map_offset < cpu_data->file_offset) { + map_size -= cpu_data->file_offset - map_offset; + map_offset = cpu_data->file_offset; + } + + page_map = cpu_data->page_map; + + if (page_map && page_map->offset == map_offset) + goto out; + + list_for_each_entry(page_map, &cpu_data->page_maps, list) { + if (page_map->offset == map_offset) + goto out; + } + + page_map = calloc(1, sizeof(*page_map)); + if (!page_map) + return NULL; + + if (map_offset + map_size > cpu_data->file_offset + cpu_data->file_size) + map_size -= map_offset + map_size - + (cpu_data->file_offset + cpu_data->file_size); + + if (cpu_data->compress.fd >= 0) { + map_offset -= cpu_data->file_offset; + fd = cpu_data->compress.fd; + } else + fd = handle->fd; + again: + page_map->size = map_size; + page_map->offset = map_offset; + + page_map->map = mmap(NULL, map_size, PROT_READ, MAP_PRIVATE, fd, map_offset); + + if (page_map->map == MAP_FAILED) { + /* Try a smaller map */ + map_size >>= 1; + if (map_size < handle->page_size) { + free(page_map); + return NULL; + } + handle->page_map_size = map_size; + map_offset = offset & ~(map_size - 1); + /* + * Note, it is now possible to get duplicate memory + * maps. But that's fine, the previous maps with + * larger sizes will eventually be unmapped. + */ + goto again; + } + + list_add(&page_map->list, &cpu_data->page_maps); + out: + if (cpu_data->page_map != page_map) { + struct page_map *old_map = cpu_data->page_map; + cpu_data->page_map = page_map; + page_map->ref_count++; + if (old_map) + free_page_map(old_map); + } + page->page_map = page_map; + page_map->ref_count++; + return page_map->map + offset - page_map->offset; +} + +static struct page *allocate_page(struct tracecmd_input *handle, + int cpu, off64_t offset) +{ + struct cpu_data *cpu_data = &handle->cpu_data[cpu]; + struct page **pages; + struct page *page; + int index; + + index = (offset - cpu_data->file_offset) / handle->page_size; + if (index >= cpu_data->nr_pages) { + pages = realloc(cpu_data->pages, (index + 1) * sizeof(*cpu_data->pages)); + if (!pages) + return NULL; + memset(pages + cpu_data->nr_pages, 0, + (index + 1 - cpu_data->nr_pages) * sizeof(*cpu_data->pages)); + cpu_data->pages = pages; + cpu_data->nr_pages = index + 1; + } + if (cpu_data->pages[index]) { + cpu_data->pages[index]->ref_count++; + return cpu_data->pages[index]; + } + + page = malloc(sizeof(*page)); + if (!page) + return NULL; + + memset(page, 0, sizeof(*page)); + page->offset = offset; + page->handle = handle; + page->cpu = cpu; + + page->map = allocate_page_map(handle, page, cpu, offset); + + if (!page->map) { + free(page); + return NULL; + } + + cpu_data->pages[index] = page; + cpu_data->page_cnt++; + page->ref_count = 1; + + return page; +} + +static void __free_page(struct tracecmd_input *handle, struct page *page) +{ + struct cpu_data *cpu_data = &handle->cpu_data[page->cpu]; + struct page **pages; + int index; + + if (!page->ref_count) { + tracecmd_critical("Page ref count is zero!"); + return; + } + + page->ref_count--; + if (page->ref_count) + return; + + if (handle->read_page) + free(page->map); + else if (handle->read_zpage) + free_zpage(cpu_data, page->map); + else + free_page_map(page->page_map); + + index = (page->offset - cpu_data->file_offset) / handle->page_size; + cpu_data->pages[index] = NULL; + cpu_data->page_cnt--; + + free(page); + + if (handle->use_pipe) { + for (index = cpu_data->nr_pages - 1; index > 0; index--) + if (cpu_data->pages[index]) + break; + if (index < (cpu_data->nr_pages - 1)) { + pages = realloc(cpu_data->pages, (index + 1) * sizeof(*cpu_data->pages)); + if (!pages) + return; + cpu_data->pages = pages; + cpu_data->nr_pages = index + 1; + } + } +} + +static void free_page(struct tracecmd_input *handle, int cpu) +{ + if (!handle->cpu_data || cpu >= handle->cpus || + !handle->cpu_data[cpu].page) + return; + + __free_page(handle, handle->cpu_data[cpu].page); + + handle->cpu_data[cpu].page = NULL; +} + +static void __free_record(struct tep_record *record) +{ + if (record->priv) { + struct page *page = record->priv; + remove_record(page, record); + __free_page(page->handle, page); + } + + free(record); +} + +void tracecmd_free_record(struct tep_record *record) +{ + if (!record) + return; + + if (!record->ref_count) { + tracecmd_critical("record ref count is zero!"); + return; + } + + record->ref_count--; + + if (record->ref_count) + return; + + if (record->locked) { + tracecmd_critical("freeing record when it is locked!"); + return; + } + + record->data = NULL; + + __free_record(record); +} + +void tracecmd_record_ref(struct tep_record *record) +{ + record->ref_count++; +#if DEBUG_RECORD + /* Update locating of last reference */ + record->alloc_addr = (unsigned long)__builtin_return_address(0); +#endif +} + +static void free_next(struct tracecmd_input *handle, int cpu) +{ + struct tep_record *record; + + if (!handle->cpu_data || cpu >= handle->cpus) + return; + + record = handle->cpu_data[cpu].next; + if (!record) + return; + + handle->cpu_data[cpu].next = NULL; + + record->locked = 0; + tracecmd_free_record(record); +} + +/* This functions was taken from the Linux kernel */ +static unsigned long long mul_u64_u32_shr(unsigned long long a, + unsigned long long mul, unsigned int shift) +{ + unsigned int ah, al; + unsigned long long ret; + + al = a; + ah = a >> 32; + + ret = (al * mul) >> shift; + if (ah) + ret += (ah * mul) << (32 - shift); + + return ret; +} + +static inline unsigned long long +timestamp_correction_calc(unsigned long long ts, unsigned int flags, + struct ts_offset_sample *min, + struct ts_offset_sample *max) +{ + long long tscor; + + if (flags & TRACECMD_TSYNC_FLAG_INTERPOLATE) { + long long delta = max->time - min->time; + long long offset = ((long long)ts - min->time) * + (max->offset - min->offset); + + tscor = min->offset + (offset + delta / 2) / delta; + } else { + tscor = min->offset; + } + + ts = (ts * min->scaling) >> min->fraction; + if (tscor < 0) + return ts - llabs(tscor); + + return ts + tscor; +} + +static unsigned long long timestamp_host_sync(unsigned long long ts, int cpu, + struct tracecmd_input *handle) +{ + struct timesync_offsets *tsync; + int min, mid, max; + + if (cpu >= handle->host.cpu_count) + return ts; + tsync = &handle->host.ts_offsets[cpu]; + + /* We have one sample, nothing to calc here */ + if (tsync->ts_samples_count == 1) + return ts + tsync->ts_samples[0].offset; + + /* We have two samples, nothing to search here */ + if (tsync->ts_samples_count == 2) + return timestamp_correction_calc(ts, handle->host.flags, + &tsync->ts_samples[0], + &tsync->ts_samples[1]); + + /* We have more than two samples */ + if (ts <= tsync->ts_samples[0].time) + return timestamp_correction_calc(ts, handle->host.flags, + &tsync->ts_samples[0], + &tsync->ts_samples[1]); + else if (ts >= tsync->ts_samples[tsync->ts_samples_count-1].time) + return timestamp_correction_calc(ts, handle->host.flags, + &tsync->ts_samples[tsync->ts_samples_count-2], + &tsync->ts_samples[tsync->ts_samples_count-1]); + min = 0; + max = tsync->ts_samples_count-1; + mid = (min + max)/2; + while (min <= max) { + if (ts < tsync->ts_samples[mid].time) + max = mid - 1; + else if (ts > tsync->ts_samples[mid].time) + min = mid + 1; + else + break; + mid = (min + max)/2; + } + + return timestamp_correction_calc(ts, handle->host.flags, + &tsync->ts_samples[mid], + &tsync->ts_samples[mid+1]); +} + +static unsigned long long timestamp_calc(unsigned long long ts, int cpu, + struct tracecmd_input *handle) +{ + /* do not modify raw timestamps */ + if (handle->flags & TRACECMD_FL_RAW_TS) + return ts; + + /* Guest trace file, sync with host timestamps */ + if (handle->host.sync_enable) + ts = timestamp_host_sync(ts, cpu, handle); + + if (handle->ts2secs) { + /* user specified clock frequency */ + ts *= handle->ts2secs; + } else if (handle->tsc_calc.mult) { + /* auto calculated TSC clock frequency */ + ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift); + } + + /* User specified time offset with --ts-offset or --date options */ + ts += handle->ts_offset; + + return ts; +} + +/* + * Page is mapped, now read in the page header info. + */ +static int update_page_info(struct tracecmd_input *handle, int cpu) +{ + struct tep_handle *pevent = handle->pevent; + void *ptr = handle->cpu_data[cpu].page->map; + struct kbuffer *kbuf = handle->cpu_data[cpu].kbuf; + + /* FIXME: handle header page */ + if (tep_get_header_timestamp_size(pevent) != 8) { + tracecmd_warning("expected a long long type for timestamp"); + return -1; + } + + kbuffer_load_subbuffer(kbuf, ptr); + if (kbuffer_subbuffer_size(kbuf) > handle->page_size) { + tracecmd_warning("bad page read, with size of %d", kbuffer_subbuffer_size(kbuf)); + return -1; + } + handle->cpu_data[cpu].timestamp = timestamp_calc(kbuffer_timestamp(kbuf), + cpu, handle); + + return 0; +} + +/* + * get_page maps a page for a given cpu. + * + * Returns 1 if the page was already mapped, + * 0 if it mapped successfully + * -1 on error + */ +static int get_page(struct tracecmd_input *handle, int cpu, + off64_t offset) +{ + /* Don't map if the page is already where we want */ + if (handle->cpu_data[cpu].offset == offset && + handle->cpu_data[cpu].page) + return 1; + + /* Do not map no data for CPU */ + if (!handle->cpu_data[cpu].size) + return -1; + + if (offset & (handle->page_size - 1)) { + errno = -EINVAL; + tracecmd_critical("bad page offset %llx", offset); + return -1; + } + + if (offset < handle->cpu_data[cpu].file_offset || + offset > handle->cpu_data[cpu].file_offset + + handle->cpu_data[cpu].file_size) { + errno = -EINVAL; + tracecmd_critical("bad page offset %llx", offset); + return -1; + } + + handle->cpu_data[cpu].offset = offset; + handle->cpu_data[cpu].size = (handle->cpu_data[cpu].file_offset + + handle->cpu_data[cpu].file_size) - + offset; + + free_page(handle, cpu); + + handle->cpu_data[cpu].page = allocate_page(handle, cpu, offset); + if (!handle->cpu_data[cpu].page) + return -1; + + if (update_page_info(handle, cpu)) + return -1; + + return 0; +} + +static int get_next_page(struct tracecmd_input *handle, int cpu) +{ + off64_t offset; + + if (!handle->cpu_data[cpu].page && !handle->use_pipe) + return 0; + + free_page(handle, cpu); + + if (handle->cpu_data[cpu].size <= handle->page_size) { + handle->cpu_data[cpu].offset = 0; + return 0; + } + + offset = handle->cpu_data[cpu].offset + handle->page_size; + + return get_page(handle, cpu, offset); +} + +static struct tep_record * +peek_event(struct tracecmd_input *handle, unsigned long long offset, + int cpu) +{ + struct tep_record *record = NULL; + + /* + * Since the timestamp is calculated from the beginning + * of the page and through each event, we reset the + * page to the beginning. This is just used by + * tracecmd_read_at. + */ + update_page_info(handle, cpu); + + do { + free_next(handle, cpu); + record = tracecmd_peek_data(handle, cpu); + if (record && (record->offset + record->record_size) > offset) + break; + } while (record); + + return record; +} + +static struct tep_record * +read_event(struct tracecmd_input *handle, unsigned long long offset, + int cpu) +{ + struct tep_record *record; + + record = peek_event(handle, offset, cpu); + if (record) + record = tracecmd_read_data(handle, cpu); + return record; +} + +static struct tep_record * +find_and_peek_event(struct tracecmd_input *handle, unsigned long long offset, + int *pcpu) +{ + unsigned long long page_offset; + int cpu; + + /* find the cpu that this offset exists in */ + for (cpu = 0; cpu < handle->cpus; cpu++) { + if (offset >= handle->cpu_data[cpu].file_offset && + offset < handle->cpu_data[cpu].file_offset + + handle->cpu_data[cpu].file_size) + break; + } + + /* Not found? */ + if (cpu == handle->cpus) + return NULL; + + /* Move this cpu index to point to this offest */ + page_offset = calc_page_offset(handle, offset); + + if (get_page(handle, cpu, page_offset) < 0) + return NULL; + + if (pcpu) + *pcpu = cpu; + + return peek_event(handle, offset, cpu); +} + + +static struct tep_record * +find_and_read_event(struct tracecmd_input *handle, unsigned long long offset, + int *pcpu) +{ + struct tep_record *record; + int cpu; + + record = find_and_peek_event(handle, offset, &cpu); + if (record) { + record = tracecmd_read_data(handle, cpu); + if (pcpu) + *pcpu = cpu; + } + return record; +} + +/** + * tracecmd_read_at - read a record from a specific offset + * @handle: input handle for the trace.dat file + * @offset: the offset into the file to find the record + * @pcpu: pointer to a variable to store the CPU id the record was found in + * + * This function is useful when looking for a previous record. + * You can store the offset of the record "record->offset" and use that + * offset to retreive the record again without needing to store any + * other information about the record. + * + * The record returned must be freed. + */ +struct tep_record * +tracecmd_read_at(struct tracecmd_input *handle, unsigned long long offset, + int *pcpu) +{ + unsigned long long page_offset; + int cpu; + + page_offset = calc_page_offset(handle, offset); + + /* check to see if we have this page already */ + for (cpu = 0; cpu < handle->cpus; cpu++) { + if (handle->cpu_data[cpu].offset == page_offset && + handle->cpu_data[cpu].file_size) + break; + } + + if (cpu < handle->cpus && handle->cpu_data[cpu].page) { + if (pcpu) + *pcpu = cpu; + return read_event(handle, offset, cpu); + } else + return find_and_read_event(handle, offset, pcpu); +} + +/** + * tracecmd_refresh_record - remaps the records data + * @handle: input handle for the trace.dat file + * @record: the record to be refreshed + * + * A record data points to a mmap section of memory. + * by reading new records the mmap section may be unmapped. + * This will refresh the record's data mapping. + * + * ===== OBSOLETED BY PAGE REFERENCES ===== + * + * Returns 1 if page is still mapped (does not modify CPU iterator) + * 0 on successful mapping (was not mapped before, + * This will update CPU iterator to point to + * the next record) + * -1 on error. + */ +int tracecmd_refresh_record(struct tracecmd_input *handle, + struct tep_record *record) +{ + unsigned long long page_offset; + int cpu = record->cpu; + struct cpu_data *cpu_data = &handle->cpu_data[cpu]; + int index; + int ret; + + page_offset = calc_page_offset(handle, record->offset); + index = record->offset & (handle->page_size - 1); + + ret = get_page(handle, record->cpu, page_offset); + if (ret < 0) + return -1; + + /* If the page is still mapped, there's nothing to do */ + if (ret) + return 1; + + record->data = kbuffer_read_at_offset(cpu_data->kbuf, index, &record->ts); + cpu_data->timestamp = record->ts; + + return 0; +} + +/** + * tracecmd_read_cpu_first - get the first record in a CPU + * @handle: input handle for the trace.dat file + * @cpu: the CPU to search + * + * This returns the first (by time) record entry in a given CPU. + * + * The record returned must be freed. + */ +struct tep_record * +tracecmd_read_cpu_first(struct tracecmd_input *handle, int cpu) +{ + unsigned long long page_offset; + int ret; + + if (cpu >= handle->cpus) + return NULL; + + page_offset = calc_page_offset(handle, handle->cpu_data[cpu].file_offset); + + ret = get_page(handle, cpu, page_offset); + if (ret < 0) + return NULL; + + /* If the page was already mapped, we need to reset it */ + if (ret) + update_page_info(handle, cpu); + + free_next(handle, cpu); + + return tracecmd_read_data(handle, cpu); +} + +/** + * tracecmd_read_cpu_last - get the last record in a CPU + * @handle: input handle for the trace.dat file + * @cpu: the CPU to search + * + * This returns the last (by time) record entry in a given CPU. + * + * The record returned must be freed. + */ +struct tep_record * +tracecmd_read_cpu_last(struct tracecmd_input *handle, int cpu) +{ + struct tep_record *record = NULL; + off64_t offset, page_offset; + + offset = handle->cpu_data[cpu].file_offset + + handle->cpu_data[cpu].file_size; + + if (offset & (handle->page_size - 1)) + offset &= ~(handle->page_size - 1); + else + offset -= handle->page_size; + + page_offset = offset; + + again: + if (get_page(handle, cpu, page_offset) < 0) + return NULL; + + offset = page_offset; + + do { + tracecmd_free_record(record); + record = tracecmd_read_data(handle, cpu); + if (record) + offset = record->offset; + } while (record); + + record = tracecmd_read_at(handle, offset, NULL); + + /* + * It is possible that a page has just a timestamp + * or just padding on it. + */ + if (!record) { + if (page_offset == handle->cpu_data[cpu].file_offset) + return NULL; + page_offset -= handle->page_size; + goto again; + } + + return record; +} + +/** + * tracecmd_set_cpu_to_timestamp - set the CPU iterator to a given time + * @handle: input handle for the trace.dat file + * @cpu: the CPU pointer to set + * @ts: the timestamp to set the CPU at. + * + * This sets the CPU iterator used by tracecmd_read_data and + * tracecmd_peek_data to a location in the CPU storage near + * a given timestamp. It will try to set the iterator to a time before + * the time stamp and not actually at a given time. + * + * To use this to find a record in a time field, call this function + * first, than iterate with tracecmd_read_data to find the records + * you need. + */ +int +tracecmd_set_cpu_to_timestamp(struct tracecmd_input *handle, int cpu, + unsigned long long ts) +{ + struct cpu_data *cpu_data = &handle->cpu_data[cpu]; + off64_t start, end, next; + + if (cpu < 0 || cpu >= handle->cpus) { + errno = -EINVAL; + return -1; + } + + if (!cpu_data->size) + return -1; + + if (!cpu_data->page) { + if (init_cpu(handle, cpu)) + return -1; + } + + if (cpu_data->timestamp == ts) { + /* + * If a record is cached, then that record is most + * likely the matching timestamp. Otherwise we need + * to start from the beginning of the index; + */ + if (!cpu_data->next || + cpu_data->next->ts != ts) + update_page_info(handle, cpu); + return 0; + } + + /* Set to the first record on current page */ + update_page_info(handle, cpu); + + if (cpu_data->timestamp < ts) { + start = cpu_data->offset; + end = cpu_data->file_offset + cpu_data->file_size; + if (end & (handle->page_size - 1)) + end &= ~(handle->page_size - 1); + else + end -= handle->page_size; + next = end; + } else { + end = cpu_data->offset; + start = cpu_data->file_offset; + next = start; + } + + while (start < end) { + if (get_page(handle, cpu, next) < 0) + return -1; + + if (cpu_data->timestamp == ts) + break; + + if (cpu_data->timestamp < ts) + start = next; + else + end = next; + + next = start + (end - start) / 2; + next = calc_page_offset(handle, next); + + /* Prevent an infinite loop if start and end are a page off */ + if (next == start) + start = next += handle->page_size; + } + + /* + * We need to end up on a page before the time stamp. + * We go back even if the timestamp is the same. This is because + * we want the event with the timestamp, not the page. The page + * can start with the timestamp we are looking for, but the event + * may be on the previous page. + */ + if (cpu_data->timestamp >= ts && + cpu_data->offset > cpu_data->file_offset) + get_page(handle, cpu, cpu_data->offset - handle->page_size); + + return 0; +} + +/** + * tracecmd_set_all_cpus_to_timestamp - set all CPUs iterator to a given time + * @handle: input handle for the trace.dat file + * @cpu: the CPU pointer to set + * @ts: the timestamp to set the CPU at. + * + * This sets the CPU iterator used by tracecmd_read_data and + * tracecmd_peek_data to a location in the CPU storage near + * a given timestamp. It will try to set the iterator to a time before + * the time stamp and not actually at a given time. + * + * To use this to find a record in a time field, call this function + * first, than iterate with tracecmd_read_next_data to find the records + * you need. + */ +void +tracecmd_set_all_cpus_to_timestamp(struct tracecmd_input *handle, + unsigned long long time) +{ + int cpu; + + for (cpu = 0; cpu < handle->cpus; cpu++) + tracecmd_set_cpu_to_timestamp(handle, cpu, time); +} + +/** + * tracecmd_set_cursor - set the offset for the next tracecmd_read_data + * @handle: input handle for the trace.dat file + * @cpu: the CPU pointer to set + * @offset: the offset to place the cursor + * + * Set the pointer to the next read or peek. This is useful when + * needing to read sequentially and then look at another record + * out of sequence without breaking the iteration. This is done with: + * + * record = tracecmd_peek_data() + * offset = record->offset; + * record = tracecmd_read_at(); + * - do what ever with record - + * tracecmd_set_cursor(handle, cpu, offset); + * + * Now the next tracecmd_peek_data or tracecmd_read_data will return + * the original record. + */ +int tracecmd_set_cursor(struct tracecmd_input *handle, + int cpu, unsigned long long offset) +{ + struct cpu_data *cpu_data = &handle->cpu_data[cpu]; + unsigned long long page_offset; + + if (cpu < 0 || cpu >= handle->cpus) + return -1; + + if (offset < cpu_data->file_offset || + offset > cpu_data->file_offset + cpu_data->file_size) + return -1; /* cpu does not have this offset. */ + + /* Move this cpu index to point to this offest */ + page_offset = calc_page_offset(handle, offset); + + if (get_page(handle, cpu, page_offset) < 0) + return -1; + + peek_event(handle, offset, cpu); + + return 0; +} + +/** + * tracecmd_get_cursor - get the offset for the next tracecmd_read_data + * @handle: input handle for the trace.dat file + * @cpu: the CPU pointer to get the cursor from + * + * Returns the offset of the next record that would be read. + */ +unsigned long long +tracecmd_get_cursor(struct tracecmd_input *handle, int cpu) +{ + struct cpu_data *cpu_data = &handle->cpu_data[cpu]; + struct kbuffer *kbuf = cpu_data->kbuf; + + if (cpu < 0 || cpu >= handle->cpus) + return 0; + + /* + * Use the next pointer if it exists and matches the + * current timestamp. + */ + if (cpu_data->next && + cpu_data->next->ts == cpu_data->timestamp) + return cpu_data->next->offset; + + /* + * Either the next point does not exist, or it does + * not match the timestamp. The next read will use the + * current page. + * + * If the offset is at the end, then return that. + */ + if (cpu_data->offset >= cpu_data->file_offset + + cpu_data->file_size) + return cpu_data->offset; + + return cpu_data->offset + kbuffer_curr_offset(kbuf); +} + +/** + * tracecmd_translate_data - create a record from raw data + * @handle: input handle for the trace.dat file + * @ptr: raw data to read + * @size: the size of the data + * + * This function tries to create a record from some given + * raw data. The data does not need to be from the trace.dat file. + * It can be stored from another location. + * + * Note, since the timestamp is calculated from within the trace + * buffer, the timestamp for the record will be zero, since it + * can't calculate it. + * + * The record returned must be freed. + */ +struct tep_record * +tracecmd_translate_data(struct tracecmd_input *handle, + void *ptr, int size) +{ + struct tep_handle *pevent = handle->pevent; + struct tep_record *record; + unsigned int length; + int swap = 1; + + /* minimum record read is 8, (warn?) (TODO: make 8 into macro) */ + if (size < 8) + return NULL; + + record = malloc(sizeof(*record)); + if (!record) + return NULL; + memset(record, 0, sizeof(*record)); + + record->ref_count = 1; + if (tep_is_local_bigendian(pevent) == tep_is_file_bigendian(pevent)) + swap = 0; + record->data = kbuffer_translate_data(swap, ptr, &length); + record->size = length; + if (record->data) + record->record_size = record->size + (record->data - ptr); + + return record; +} + + +/** + * tracecmd_peek_data - return the record at the current location. + * @handle: input handle for the trace.dat file + * @cpu: the CPU to pull from + * + * This returns the record at the current location of the CPU + * iterator. It does not increment the CPU iterator. + */ +struct tep_record * +tracecmd_peek_data(struct tracecmd_input *handle, int cpu) +{ + struct tep_record *record; + unsigned long long ts; + struct kbuffer *kbuf; + struct page *page; + int index; + void *data; + + if (cpu >= handle->cpus) + return NULL; + + page = handle->cpu_data[cpu].page; + kbuf = handle->cpu_data[cpu].kbuf; + + /* Hack to work around function graph read ahead */ + tracecmd_curr_thread_handle = handle; + + if (handle->cpu_data[cpu].next) { + + record = handle->cpu_data[cpu].next; + if (!record->data) { + tracecmd_critical("Something freed the record"); + return NULL; + } + + if (handle->cpu_data[cpu].timestamp == record->ts) + return record; + + /* + * The timestamp changed, which means the cached + * record is no longer valid. Reread a new record. + */ + free_next(handle, cpu); + } + +read_again: + if (!page) { + if (handle->use_pipe) { + get_next_page(handle, cpu); + page = handle->cpu_data[cpu].page; + } + if (!page) + return NULL; + } + + data = kbuffer_read_event(kbuf, &ts); + if (!data) { + if (get_next_page(handle, cpu)) + return NULL; + page = handle->cpu_data[cpu].page; + goto read_again; + } + + handle->cpu_data[cpu].timestamp = timestamp_calc(ts, cpu, handle); + + index = kbuffer_curr_offset(kbuf); + + record = malloc(sizeof(*record)); + if (!record) + return NULL; + memset(record, 0, sizeof(*record)); + + record->ts = handle->cpu_data[cpu].timestamp; + record->size = kbuffer_event_size(kbuf); + record->cpu = handle->cpu_data[cpu].cpu; + record->data = data; + record->offset = handle->cpu_data[cpu].offset + index; + record->missed_events = kbuffer_missed_events(kbuf); + record->ref_count = 1; + record->locked = 1; + + handle->cpu_data[cpu].next = record; + + record->record_size = kbuffer_curr_size(kbuf); + record->priv = page; + add_record(page, record); + page->ref_count++; + + kbuffer_next_event(kbuf, NULL); + + return record; +} + +/** + * tracecmd_read_data - read the next record and increment + * @handle: input handle for the trace.dat file + * @cpu: the CPU to pull from + * + * This returns the record at the current location of the CPU + * iterator and increments the CPU iterator. + * + * The record returned must be freed. + */ +struct tep_record * +tracecmd_read_data(struct tracecmd_input *handle, int cpu) +{ + struct tep_record *record; + + if (cpu >= handle->cpus) + return NULL; + + record = tracecmd_peek_data(handle, cpu); + handle->cpu_data[cpu].next = NULL; + if (record) { + record->locked = 0; +#if DEBUG_RECORD + record->alloc_addr = (unsigned long)__builtin_return_address(0); +#endif + } + return record; +} + +/** + * tracecmd_read_next_data - read the next record + * @handle: input handle to the trace.dat file + * @rec_cpu: return pointer to the CPU that the record belongs to + * + * This returns the next record by time. This is different than + * tracecmd_read_data in that it looks at all CPUs. It does a peek + * at each CPU and the record with the earliest time stame is + * returned. If @rec_cpu is not NULL it gets the CPU id the record was + * on. The CPU cursor of the returned record is moved to the + * next record. + * + * Multiple reads of this function will return a serialized list + * of all records for all CPUs in order of time stamp. + * + * The record returned must be freed. + */ +struct tep_record * +tracecmd_read_next_data(struct tracecmd_input *handle, int *rec_cpu) +{ + struct tep_record *record; + int next_cpu; + + record = tracecmd_peek_next_data(handle, &next_cpu); + if (!record) + return NULL; + + if (rec_cpu) + *rec_cpu = next_cpu; + + return tracecmd_read_data(handle, next_cpu); +} + +/** + * tracecmd_peek_next_data - return the next record + * @handle: input handle to the trace.dat file + * @rec_cpu: return pointer to the CPU that the record belongs to + * + * This returns the next record by time. This is different than + * tracecmd_peek_data in that it looks at all CPUs. It does a peek + * at each CPU and the record with the earliest time stame is + * returned. If @rec_cpu is not NULL it gets the CPU id the record was + * on. It does not increment the CPU iterator. + */ +struct tep_record * +tracecmd_peek_next_data(struct tracecmd_input *handle, int *rec_cpu) +{ + unsigned long long ts; + struct tep_record *record, *next_record = NULL; + int next_cpu; + int cpu; + + if (rec_cpu) + *rec_cpu = -1; + + next_cpu = -1; + ts = 0; + + for (cpu = 0; cpu < handle->cpus; cpu++) { + record = tracecmd_peek_data(handle, cpu); + if (record && (!next_record || record->ts < ts)) { + ts = record->ts; + next_cpu = cpu; + next_record = record; + } + } + + if (next_record) { + if (rec_cpu) + *rec_cpu = next_cpu; + return next_record; + } + + return NULL; +} + +/** + * tracecmd_read_prev - read the record before the given record + * @handle: input handle to the trace.dat file + * @record: the record to use to find the previous record. + * + * This returns the record before the @record on its CPU. If + * @record is the first record, NULL is returned. The cursor is set + * as if the previous record was read by tracecmd_read_data(). + * + * @record can not be NULL, otherwise NULL is returned; the + * record ownership goes to this function. + * + * Note, this is not that fast of an algorithm, since it needs + * to build the timestamp for the record. + * + * The record returned must be freed with tracecmd_free_record(). + */ +struct tep_record * +tracecmd_read_prev(struct tracecmd_input *handle, struct tep_record *record) +{ + unsigned long long offset, page_offset;; + struct cpu_data *cpu_data; + int index; + int cpu; + + if (!record) + return NULL; + + cpu = record->cpu; + offset = record->offset; + cpu_data = &handle->cpu_data[cpu]; + + page_offset = calc_page_offset(handle, offset); + index = offset - page_offset; + + /* Note, the record passed in could have been a peek */ + free_next(handle, cpu); + + /* Reset the cursor */ + /* Should not happen */ + if (get_page(handle, cpu, page_offset) < 0) + return NULL; + + update_page_info(handle, cpu); + + /* Find the record before this record */ + index = 0; + for (;;) { + record = tracecmd_read_data(handle, cpu); + /* Should not happen! */ + if (!record) + return NULL; + if (record->offset == offset) + break; + index = record->offset - page_offset; + tracecmd_free_record(record); + } + tracecmd_free_record(record); + + if (index) + /* we found our record */ + return tracecmd_read_at(handle, page_offset + index, NULL); + + /* reset the index to start at the beginning of the page */ + update_page_info(handle, cpu); + + /* The previous record is on the previous page */ + for (;;) { + /* check if this is the first page */ + if (page_offset == cpu_data->file_offset) + return NULL; + page_offset -= handle->page_size; + + /* Updating page to a new page will reset index to 0 */ + get_page(handle, cpu, page_offset); + + record = NULL; + index = 0; + do { + if (record) { + index = record->offset - page_offset; + tracecmd_free_record(record); + } + record = tracecmd_read_data(handle, cpu); + /* Should not happen */ + if (!record) + return NULL; + } while (record->offset != offset); + tracecmd_free_record(record); + + if (index) + /* we found our record */ + return tracecmd_read_at(handle, page_offset + index, NULL); + } + + /* Not reached */ +} + +static int init_cpu_zfile(struct tracecmd_input *handle, int cpu) +{ + struct cpu_data *cpu_data; + unsigned long long size; + off64_t offset; + + cpu_data = &handle->cpu_data[cpu]; + offset = lseek64(handle->fd, 0, SEEK_CUR); + if (lseek64(handle->fd, cpu_data->file_offset, SEEK_SET) == (off_t)-1) + return -1; + + strcpy(cpu_data->compress.file, COMPR_TEMP_FILE); + cpu_data->compress.fd = mkstemp(cpu_data->compress.file); + if (cpu_data->compress.fd < 0) + return -1; + + if (tracecmd_uncompress_copy_to(handle->compress, cpu_data->compress.fd, NULL, &size)) + return -1; + + if (lseek64(handle->fd, offset, SEEK_SET) == (off_t)-1) + return -1; + + cpu_data->file_offset = handle->next_offset; + handle->next_offset = (handle->next_offset + size + handle->page_size - 1) & + ~(handle->page_size - 1); + cpu_data->offset = cpu_data->file_offset; + + cpu_data->file_size = size; + cpu_data->size = size; + return 0; +} + +static int init_cpu_zpage(struct tracecmd_input *handle, int cpu) +{ + struct cpu_data *cpu_data = &handle->cpu_data[cpu]; + int count; + int i; + + if (lseek64(handle->fd, cpu_data->file_offset, SEEK_SET) == (off_t)-1) + return -1; + + count = tracecmd_load_chunks_info(handle->compress, &cpu_data->compress.chunks); + if (count < 0) + return -1; + + cpu_data->compress.count = count; + cpu_data->compress.last_chunk = 0; + + cpu_data->file_offset = handle->next_offset; + + for (i = 0; i < count; i++) + cpu_data->file_size += cpu_data->compress.chunks[i].size; + + cpu_data->offset = cpu_data->file_offset; + cpu_data->size = cpu_data->file_size; + handle->next_offset = (handle->next_offset + cpu_data->size + handle->page_size - 1) & + ~(handle->page_size - 1); + return 0; +} + +static int init_cpu(struct tracecmd_input *handle, int cpu) +{ + struct cpu_data *cpu_data = &handle->cpu_data[cpu]; + int ret; + int i; + + if (handle->cpu_compressed && cpu_data->file_size > 0) { + if (handle->read_zpage) + ret = init_cpu_zpage(handle, cpu); + else + ret = init_cpu_zfile(handle, cpu); + if (ret) + return ret; + } else { + cpu_data->offset = cpu_data->file_offset; + cpu_data->size = cpu_data->file_size; + } + cpu_data->timestamp = 0; + + list_head_init(&cpu_data->page_maps); + list_head_init(&cpu_data->compress.cache); + + if (!cpu_data->size) { + tracecmd_info("CPU %d is empty", cpu); + return 0; + } + + cpu_data->nr_pages = (cpu_data->size + handle->page_size - 1) / handle->page_size; + if (!cpu_data->nr_pages) + cpu_data->nr_pages = 1; + cpu_data->pages = calloc(cpu_data->nr_pages, sizeof(*cpu_data->pages)); + if (!cpu_data->pages) + return -1; + + if (handle->use_pipe) { + /* Just make a page, it will be nuked later */ + cpu_data->page = malloc(sizeof(*cpu_data->page)); + if (!cpu_data->page) + goto fail; + + memset(cpu_data->page, 0, sizeof(*cpu_data->page)); + cpu_data->pages[0] = cpu_data->page; + cpu_data->page_cnt = 1; + cpu_data->page->ref_count = 1; + return 0; + } + + cpu_data->page = allocate_page(handle, cpu, cpu_data->offset); + if (!cpu_data->page && !handle->read_page) { + perror("mmap"); + fprintf(stderr, "Can not mmap file, will read instead\n"); + + if (cpu) { + /* + * If the other CPUs had size and was able to mmap + * then bail. + */ + for (i = 0; i < cpu; i++) { + if (handle->cpu_data[i].size) + goto fail; + } + } + + /* try again without mmapping, just read it directly */ + handle->read_page = true; + cpu_data->page = allocate_page(handle, cpu, cpu_data->offset); + if (!cpu_data->page) + /* Still no luck, bail! */ + goto fail; + } + + if (update_page_info(handle, cpu)) + goto fail; + cpu_data->first_ts = cpu_data->timestamp; + + return 0; + fail: + free(cpu_data->pages); + cpu_data->pages = NULL; + free(cpu_data->page); + cpu_data->page = NULL; + return -1; +} + +void tracecmd_set_ts_offset(struct tracecmd_input *handle, + long long offset) +{ + handle->ts_offset = offset; +} + +/** + * tracecmd_add_ts_offset - Add value to the offset which will be applied to the timestamps of all + * events from given trace file + * @handle: input handle to the trace.dat file + * @offset: value, that will be added to the offset + */ +void tracecmd_add_ts_offset(struct tracecmd_input *handle, + long long offset) +{ + handle->ts_offset += offset; +} + +void tracecmd_set_ts2secs(struct tracecmd_input *handle, + unsigned long long hz) +{ + double ts2secs; + + ts2secs = (double)NSEC_PER_SEC / (double)hz; + handle->ts2secs = ts2secs; + handle->use_trace_clock = false; +} + +static int tsync_offset_cmp(const void *a, const void *b) +{ + struct ts_offset_sample *ts_a = (struct ts_offset_sample *)a; + struct ts_offset_sample *ts_b = (struct ts_offset_sample *)b; + + if (ts_a->time > ts_b->time) + return 1; + if (ts_a->time < ts_b->time) + return -1; + return 0; +} + +#define safe_read(R, C) \ + do { \ + if ((C) > size) \ + return -EFAULT; \ + (R) = tep_read_number(tep, buf, (C)); \ + buf += (C); \ + size -= (C); \ + } while (0) + +#define safe_read_loop(type) \ + do { \ + int ii; \ + for (ii = 0; ii < ts_offsets->ts_samples_count; ii++) \ + safe_read(ts_offsets->ts_samples[ii].type, 8); \ + } while (0) + +static int tsync_cpu_offsets_load(struct tracecmd_input *handle, char *buf, int size) +{ + struct tep_handle *tep = handle->pevent; + struct timesync_offsets *ts_offsets; + int i, j, k; + + safe_read(handle->host.cpu_count, 4); + handle->host.ts_offsets = calloc(handle->host.cpu_count, + sizeof(struct timesync_offsets)); + if (!handle->host.ts_offsets) + return -ENOMEM; + for (i = 0; i < handle->host.cpu_count; i++) { + ts_offsets = &handle->host.ts_offsets[i]; + safe_read(ts_offsets->ts_samples_count, 4); + ts_offsets->ts_samples = calloc(ts_offsets->ts_samples_count, + sizeof(struct ts_offset_sample)); + if (!ts_offsets->ts_samples) + return -ENOMEM; + safe_read_loop(time); + safe_read_loop(offset); + safe_read_loop(scaling); + } + + if (size > 0) { + for (i = 0; i < handle->host.cpu_count; i++) { + ts_offsets = &handle->host.ts_offsets[i]; + safe_read_loop(fraction); + } + } + + for (i = 0; i < handle->host.cpu_count; i++) { + ts_offsets = &handle->host.ts_offsets[i]; + qsort(ts_offsets->ts_samples, ts_offsets->ts_samples_count, + sizeof(struct ts_offset_sample), tsync_offset_cmp); + /* Filter possible samples with equal time */ + for (k = 0, j = 0; k < ts_offsets->ts_samples_count; k++) { + if (k == 0 || ts_offsets->ts_samples[k].time != ts_offsets->ts_samples[k-1].time) + ts_offsets->ts_samples[j++] = ts_offsets->ts_samples[k]; + } + ts_offsets->ts_samples_count = j; + } + + return 0; +} + +static void trace_tsync_offset_free(struct host_trace_info *host) +{ + int i; + + if (host->ts_offsets) { + for (i = 0; i < host->cpu_count; i++) + free(host->ts_offsets[i].ts_samples); + free(host->ts_offsets); + host->ts_offsets = NULL; + } +} + +static int trace_pid_map_cmp(const void *a, const void *b) +{ + struct tracecmd_proc_addr_map *m_a = (struct tracecmd_proc_addr_map *)a; + struct tracecmd_proc_addr_map *m_b = (struct tracecmd_proc_addr_map *)b; + + if (m_a->start > m_b->start) + if (m_a->start < m_b->start) + return -1; + return 0; +} + +static void procmap_free(struct pid_addr_maps *maps) +{ + int i; + + if (!maps) + return; + if (maps->lib_maps) { + for (i = 0; i < maps->nr_lib_maps; i++) + free(maps->lib_maps[i].lib_name); + free(maps->lib_maps); + } + free(maps->proc_name); + free(maps); +} + +static void trace_guests_free(struct tracecmd_input *handle) +{ + struct guest_trace_info *guest; + + while (handle->guest) { + guest = handle->guest; + handle->guest = handle->guest->next; + free(guest->name); + free(guest->cpu_pid); + free(guest); + } +} + +static int trace_guest_load(struct tracecmd_input *handle, char *buf, int size) +{ + struct guest_trace_info *guest = NULL; + int cpu; + int i; + + guest = calloc(1, sizeof(struct guest_trace_info)); + if (!guest) + goto error; + + /* + * Guest name, null terminated string + * long long (8 bytes) trace-id + * int (4 bytes) number of guest CPUs + * array of size number of guest CPUs: + * int (4 bytes) Guest CPU id + * int (4 bytes) Host PID, running the guest CPU + */ + + guest->name = strndup(buf, size); + if (!guest->name) + goto error; + buf += strlen(guest->name) + 1; + size -= strlen(guest->name) + 1; + + if (size < sizeof(long long)) + goto error; + guest->trace_id = tep_read_number(handle->pevent, buf, sizeof(long long)); + buf += sizeof(long long); + size -= sizeof(long long); + + if (size < sizeof(int)) + goto error; + guest->vcpu_count = tep_read_number(handle->pevent, buf, sizeof(int)); + buf += sizeof(int); + size -= sizeof(int); + + guest->cpu_pid = calloc(guest->vcpu_count, sizeof(int)); + if (!guest->cpu_pid) + goto error; + + for (i = 0; i < guest->vcpu_count; i++) { + if (size < 2 * sizeof(int)) + goto error; + cpu = tep_read_number(handle->pevent, buf, sizeof(int)); + buf += sizeof(int); + if (cpu >= guest->vcpu_count) + goto error; + guest->cpu_pid[cpu] = tep_read_number(handle->pevent, + buf, sizeof(int)); + buf += sizeof(int); + size -= 2 * sizeof(int); + } + + guest->next = handle->guest; + handle->guest = guest; + return 0; + +error: + if (guest) { + free(guest->cpu_pid); + free(guest->name); + free(guest); + } + return -1; +} + +/* Needs to be a constant, and 4K should be good enough */ +#define STR_PROCMAP_LINE_MAX 4096 +static int trace_pid_map_load(struct tracecmd_input *handle, char *buf) +{ + struct pid_addr_maps *maps = NULL; + char mapname[STR_PROCMAP_LINE_MAX+1]; + char *line; + int res; + int ret; + int i; + + maps = calloc(1, sizeof(*maps)); + if (!maps) + return -ENOMEM; + + ret = -EINVAL; + line = strchr(buf, '\n'); + if (!line) + goto out_fail; + + *line = '\0'; + if (strlen(buf) > STR_PROCMAP_LINE_MAX) + goto out_fail; + + res = sscanf(buf, "%x %x %"STRINGIFY(STR_PROCMAP_LINE_MAX)"s", &maps->pid, &maps->nr_lib_maps, mapname); + if (res != 3) + goto out_fail; + + ret = -ENOMEM; + maps->proc_name = strdup(mapname); + if (!maps->proc_name) + goto out_fail; + + maps->lib_maps = calloc(maps->nr_lib_maps, sizeof(struct tracecmd_proc_addr_map)); + if (!maps->lib_maps) + goto out_fail; + + buf = line + 1; + line = strchr(buf, '\n'); + for (i = 0; i < maps->nr_lib_maps; i++) { + if (!line) + break; + *line = '\0'; + if (strlen(buf) > STR_PROCMAP_LINE_MAX) + break; + res = sscanf(buf, "%llx %llx %s", &maps->lib_maps[i].start, + &maps->lib_maps[i].end, mapname); + if (res != 3) + break; + maps->lib_maps[i].lib_name = strdup(mapname); + if (!maps->lib_maps[i].lib_name) + goto out_fail; + buf = line + 1; + line = strchr(buf, '\n'); + } + + ret = -EINVAL; + if (i != maps->nr_lib_maps) + goto out_fail; + + qsort(maps->lib_maps, maps->nr_lib_maps, + sizeof(*maps->lib_maps), trace_pid_map_cmp); + + maps->next = handle->pid_maps; + handle->pid_maps = maps; + + return 0; + +out_fail: + procmap_free(maps); + return ret; +} + +static void trace_pid_map_free(struct pid_addr_maps *maps) +{ + struct pid_addr_maps *del; + + while (maps) { + del = maps; + maps = maps->next; + procmap_free(del); + } +} + +static int trace_pid_map_search(const void *a, const void *b) +{ + struct tracecmd_proc_addr_map *key = (struct tracecmd_proc_addr_map *)a; + struct tracecmd_proc_addr_map *map = (struct tracecmd_proc_addr_map *)b; + + if (key->start >= map->end) + return 1; + if (key->start < map->start) + return -1; + return 0; +} + +/** + * tracecmd_search_task_map - Search task memory address map + * @handle: input handle to the trace.dat file + * @pid: pid of the task + * @addr: address from the task memory space. + * + * Map of the task memory can be saved in the trace.dat file, using the option + * "--proc-map". If there is such information, this API can be used to look up + * into this memory map to find what library is loaded at the given @addr. + * + * A pointer to struct tracecmd_proc_addr_map is returned, containing the name + * of the library at given task @addr and the library start and end addresses. + */ +struct tracecmd_proc_addr_map * +tracecmd_search_task_map(struct tracecmd_input *handle, + int pid, unsigned long long addr) +{ + struct tracecmd_proc_addr_map *lib; + struct tracecmd_proc_addr_map key; + struct pid_addr_maps *maps; + + if (!handle || !handle->pid_maps) + return NULL; + + maps = handle->pid_maps; + while (maps) { + if (maps->pid == pid) + break; + maps = maps->next; + } + if (!maps || !maps->nr_lib_maps || !maps->lib_maps) + return NULL; + key.start = addr; + lib = bsearch(&key, maps->lib_maps, maps->nr_lib_maps, + sizeof(*maps->lib_maps), trace_pid_map_search); + + return lib; +} + +__hidden unsigned int get_meta_strings_size(struct tracecmd_input *handle) +{ + return handle->strings_size; +} + +__hidden unsigned long long get_last_option_offset(struct tracecmd_input *handle) +{ + return handle->options_last_offset; +} + +static int handle_option_done(struct tracecmd_input *handle, char *buf, int size) +{ + unsigned long long offset; + + if (size < 8) + return -1; + + offset = lseek64(handle->fd, 0, SEEK_CUR); + if (offset >= size) + handle->options_last_offset = offset - size; + + offset = tep_read_number(handle->pevent, buf, 8); + if (!offset) + return 0; + + if (lseek64(handle->fd, offset, SEEK_SET) == (off_t)-1) + return -1; + + return handle_options(handle); +} + +static inline int save_read_number(struct tep_handle *tep, char *data, int *data_size, + int *read_pos, int bytes, unsigned long long *num) +{ + if (bytes > *data_size) + return -1; + + *num = tep_read_number(tep, (data + *read_pos), bytes); + *read_pos += bytes; + *data_size -= bytes; + return 0; +} + +static inline char *save_read_string(char *data, int *data_size, int *read_pos) +{ + char *str; + + if (*data_size < 1) + return NULL; + + str = strdup(data + *read_pos); + if (!str) + return NULL; + *data_size -= (strlen(str) + 1); + if (*data_size < 0) { + free(str); + return NULL; + } + *read_pos += (strlen(str) + 1); + + return str; +} + +static int handle_buffer_option(struct tracecmd_input *handle, + unsigned short id, char *data, int size) +{ + struct input_buffer_instance *buff; + struct cpu_file_data *cpu_data; + unsigned long long tmp; + long long max_cpu = -1; + int rsize = 0; + char *name; + int i; + + if (save_read_number(handle->pevent, data, &size, &rsize, 8, &tmp)) + return -1; + + name = save_read_string(data, &size, &rsize); + if (!name) + return -1; + + if (*name == '\0') { + /* top buffer */ + buff = &handle->top_buffer; + } else { + buff = realloc(handle->buffers, sizeof(*handle->buffers) * (handle->nr_buffers + 1)); + if (!buff) { + free(name); + return -1; + } + handle->buffers = buff; + handle->nr_buffers++; + + buff = &handle->buffers[handle->nr_buffers - 1]; + } + memset(buff, 0, sizeof(struct input_buffer_instance)); + buff->name = name; + buff->offset = tmp; + + if (!HAS_SECTIONS(handle)) + return 0; + + /* file sections specific data */ + buff->clock = save_read_string(data, &size, &rsize); + if (!buff->clock) + return -1; + + if (*name == '\0' && !handle->trace_clock) + handle->trace_clock = strdup(buff->clock); + + if (id == TRACECMD_OPTION_BUFFER) { + if (save_read_number(handle->pevent, data, &size, &rsize, 4, &tmp)) + return -1; + buff->page_size = tmp; + + if (save_read_number(handle->pevent, data, &size, &rsize, 4, &tmp)) + return -1; + buff->cpus = tmp; + if (!buff->cpus) + return 0; + cpu_data = calloc(buff->cpus, sizeof(*cpu_data)); + if (!cpu_data) + return -1; + for (i = 0; i < buff->cpus; i++) { + if (save_read_number(handle->pevent, data, &size, &rsize, 4, &tmp)) + goto fail; + if ((long long)tmp > max_cpu) + max_cpu = tmp; + cpu_data[i].cpu = tmp; + if (save_read_number(handle->pevent, data, + &size, &rsize, 8, &cpu_data[i].offset)) + goto fail; + if (save_read_number(handle->pevent, data, + &size, &rsize, 8, &cpu_data[i].size)) + goto fail; + } + if (buff->cpus == max_cpu + 1) { + /* Check to make sure cpus match the index */ + for (i = 0; i < buff->cpus; i++) { + if (cpu_data[i].cpu != i) + goto copy_buffer; + } + buff->cpu_data = cpu_data; + } else { + copy_buffer: + buff->cpu_data = calloc(max_cpu + 1, sizeof(*cpu_data)); + if (!buff->cpu_data) + goto fail; + for (i = 0; i < buff->cpus; i++) { + if (buff->cpu_data[cpu_data[i].cpu].size) { + tracecmd_warning("More than one buffer defined for CPU %d (buffer %d)\n", + cpu_data[i].cpu, i); + goto fail; + } + buff->cpu_data[cpu_data[i].cpu] = cpu_data[i]; + } + buff->cpus = max_cpu + 1; + free(cpu_data); + } + } else { + buff->latency = true; + } + return 0; +fail: + free(cpu_data); + return -1; +} + +static int handle_options(struct tracecmd_input *handle) +{ + long long offset; + unsigned short option; + unsigned int size; + unsigned short id, flags; + char *cpustats = NULL; + struct hook_list *hook; + bool compress = false; + char *buf; + int cpus; + int ret; + + if (!HAS_SECTIONS(handle)) { + handle->options_start = lseek64(handle->fd, 0, SEEK_CUR); + } else { + if (read_section_header(handle, &id, &flags, NULL, NULL)) + return -1; + if (id != TRACECMD_OPTION_DONE) + return -1; + if (flags & TRACECMD_SEC_FL_COMPRESS) + compress = true; + } + + if (compress && in_uncompress_block(handle)) + return -1; + + for (;;) { + ret = read2(handle, &option); + if (ret) + goto out; + + if (!HAS_SECTIONS(handle) && option == TRACECMD_OPTION_DONE) + break; + + /* next 4 bytes is the size of the option */ + ret = read4(handle, &size); + if (ret) + goto out; + buf = malloc(size); + if (!buf) { + ret = -ENOMEM; + goto out; + } + ret = do_read_check(handle, buf, size); + if (ret) + goto out; + + switch (option) { + case TRACECMD_OPTION_DATE: + /* + * A time has been mapped that is the + * difference between the timestamps and + * gtod. It is stored as ASCII with '0x' + * appended. + */ + if (handle->flags & + (TRACECMD_FL_IGNORE_DATE | TRACECMD_FL_RAW_TS)) + break; + offset = strtoll(buf, NULL, 0); + /* Convert from micro to nano */ + offset *= 1000; + handle->ts_offset += offset; + break; + case TRACECMD_OPTION_OFFSET: + /* + * Similar to date option, but just adds an + * offset to the timestamp. + */ + if (handle->flags & TRACECMD_FL_RAW_TS) + break; + offset = strtoll(buf, NULL, 0); + handle->ts_offset += offset; + break; + case TRACECMD_OPTION_TIME_SHIFT: + /* + * long long int (8 bytes) trace session ID + * int (4 bytes) protocol flags. + * int (4 bytes) CPU count. + * array of size [CPU count]: + * [ + * int (4 bytes) count of timestamp offsets. + * long long array of size [count] of times, + * when the offsets were calculated. + * long long array of size [count] of timestamp offsets. + * long long array of size [count] of timestamp scaling ratios.* + * ] + * array of size [CPU count]: + * [ + * long long array of size [count] of timestamp scaling fraction bits.* + * ]* + */ + if (size < 16 || (handle->flags & TRACECMD_FL_RAW_TS)) + break; + handle->host.peer_trace_id = tep_read_number(handle->pevent, + buf, 8); + handle->host.flags = tep_read_number(handle->pevent, + buf + 8, 4); + ret = tsync_cpu_offsets_load(handle, buf + 12, size - 12); + if (ret < 0) + goto out; + tracecmd_enable_tsync(handle, true); + break; + case TRACECMD_OPTION_CPUSTAT: + buf[size-1] = '\n'; + cpustats = realloc(handle->cpustats, + handle->cpustats_size + size + 1); + if (!cpustats) { + ret = -ENOMEM; + goto out; + } + memcpy(cpustats + handle->cpustats_size, buf, size); + handle->cpustats_size += size; + cpustats[handle->cpustats_size] = 0; + handle->cpustats = cpustats; + break; + case TRACECMD_OPTION_BUFFER: + case TRACECMD_OPTION_BUFFER_TEXT: + ret = handle_buffer_option(handle, option, buf, size); + if (ret < 0) + goto out; + break; + case TRACECMD_OPTION_TRACECLOCK: + tracecmd_parse_trace_clock(handle, buf, size); + if (!handle->ts2secs) + handle->use_trace_clock = true; + break; + case TRACECMD_OPTION_UNAME: + handle->uname = strdup(buf); + break; + case TRACECMD_OPTION_VERSION: + handle->version = strdup(buf); + break; + case TRACECMD_OPTION_HOOK: + hook = tracecmd_create_event_hook(buf); + hook->next = handle->hooks; + handle->hooks = hook; + break; + case TRACECMD_OPTION_CPUCOUNT: + cpus = *(int *)buf; + handle->cpus = tep_read_number(handle->pevent, &cpus, 4); + if (handle->cpus > handle->max_cpu) + handle->max_cpu = handle->cpus; + tep_set_cpus(handle->pevent, handle->cpus); + break; + case TRACECMD_OPTION_PROCMAPS: + if (buf[size-1] == '\0') + trace_pid_map_load(handle, buf); + break; + case TRACECMD_OPTION_TRACEID: + if (size < 8) + break; + handle->trace_id = tep_read_number(handle->pevent, + buf, 8); + break; + case TRACECMD_OPTION_GUEST: + trace_guest_load(handle, buf, size); + break; + case TRACECMD_OPTION_TSC2NSEC: + if (size < 16 || (handle->flags & TRACECMD_FL_RAW_TS)) + break; + handle->tsc_calc.mult = tep_read_number(handle->pevent, + buf, 4); + handle->tsc_calc.shift = tep_read_number(handle->pevent, + buf + 4, 4); + handle->tsc_calc.offset = tep_read_number(handle->pevent, + buf + 8, 8); + break; + case TRACECMD_OPTION_HEADER_INFO: + case TRACECMD_OPTION_FTRACE_EVENTS: + case TRACECMD_OPTION_EVENT_FORMATS: + case TRACECMD_OPTION_KALLSYMS: + case TRACECMD_OPTION_PRINTK: + case TRACECMD_OPTION_CMDLINES: + if (size < 8) + break; + section_add_or_update(handle, option, -1, + tep_read_number(handle->pevent, buf, 8), 0); + break; + case TRACECMD_OPTION_DONE: + if (compress) + in_uncompress_reset(handle); + ret = handle_option_done(handle, buf, size); + free(buf); + return ret; + + default: + tracecmd_warning("unknown option %d", option); + break; + } + + free(buf); + + } + + ret = 0; + +out: + if (compress) + in_uncompress_reset(handle); + return ret; +} + +static int read_options_type(struct tracecmd_input *handle) +{ + char buf[10]; + + if (CHECK_READ_STATE(handle, TRACECMD_FILE_CPU_LATENCY)) + return 0; + + if (do_read_check(handle, buf, 10)) + return -1; + + /* check if this handles options */ + if (strncmp(buf, "options", 7) == 0) { + if (handle_options(handle) < 0) + return -1; + handle->file_state = TRACECMD_FILE_OPTIONS; + if (do_read_check(handle, buf, 10)) + return -1; + } + + /* + * Check if this is a latency report or flyrecord. + */ + if (strncmp(buf, "latency", 7) == 0) + handle->file_state = TRACECMD_FILE_CPU_LATENCY; + else if (strncmp(buf, "flyrecord", 9) == 0) + handle->file_state = TRACECMD_FILE_CPU_FLYRECORD; + else + return -1; + + return 0; +} + +int tracecmd_latency_data_read(struct tracecmd_input *handle, char **buf, size_t *size) +{ + struct cpu_zdata *zdata = &handle->latz; + void *data; + int rsize; + int fd = -1; + int id; + + if (!handle || !buf || !size) + return -1; + if (handle->file_state != TRACECMD_FILE_CPU_LATENCY) + return -1; + + if (!handle->cpu_compressed) { + fd = handle->fd; + } else if (!handle->read_zpage) { + if (zdata->fd < 0) + return -1; + fd = zdata->fd; + } + + /* Read data from a file */ + if (fd >= 0) { + if (!(*buf)) { + *size = BUFSIZ; + *buf = malloc(*size); + if (!(*buf)) + return -1; + } + return do_read_fd(fd, *buf, *size); + } + + /* Uncompress data in memory */ + if (zdata->last_chunk >= zdata->count) + return 0; + + id = zdata->last_chunk; + if (!*buf || *size < zdata->chunks[id].size) { + data = realloc(*buf, zdata->chunks[id].size); + if (!data) + return -1; + *buf = data; + *size = zdata->chunks[id].size; + } + + if (tracecmd_uncompress_chunk(handle->compress, &zdata->chunks[id], *buf)) + return -1; + + rsize = zdata->chunks[id].size; + zdata->last_chunk++; + return rsize; +} + +static int init_cpu_data(struct tracecmd_input *handle) +{ + enum kbuffer_long_size long_size; + enum kbuffer_endian endian; + unsigned long long max_size = 0; + unsigned long long pages; + int cpu; + + /* We expect this to be flyrecord */ + if (handle->file_state != TRACECMD_FILE_CPU_FLYRECORD) + return -1; + + if (force_read) + handle->read_page = true; + + if (handle->long_size == 8) + long_size = KBUFFER_LSIZE_8; + else + long_size = KBUFFER_LSIZE_4; + + if (tep_is_file_bigendian(handle->pevent)) + endian = KBUFFER_ENDIAN_BIG; + else + endian = KBUFFER_ENDIAN_LITTLE; + + for (cpu = 0; cpu < handle->cpus; cpu++) { + handle->cpu_data[cpu].compress.fd = -1; + handle->cpu_data[cpu].kbuf = kbuffer_alloc(long_size, endian); + if (!handle->cpu_data[cpu].kbuf) + goto out_free; + if (tep_is_old_format(handle->pevent)) + kbuffer_set_old_format(handle->cpu_data[cpu].kbuf); + + if (handle->cpu_data[cpu].file_size > max_size) + max_size = handle->cpu_data[cpu].file_size; + } + + /* Calculate about a meg of pages for buffering */ + pages = handle->page_size ? max_size / handle->page_size : 0; + if (!pages) + pages = 1; + pages = normalize_size(pages); + handle->page_map_size = handle->page_size * pages; + if (handle->page_map_size < handle->page_size) + handle->page_map_size = handle->page_size; + + + for (cpu = 0; cpu < handle->cpus; cpu++) { + if (init_cpu(handle, cpu)) + goto out_free; + } + + return 0; + + out_free: + for ( ; cpu >= 0; cpu--) { + free_page(handle, cpu); + kbuffer_free(handle->cpu_data[cpu].kbuf); + handle->cpu_data[cpu].kbuf = NULL; + } + return -1; +} + +int init_latency_data(struct tracecmd_input *handle) +{ + unsigned long long wsize; + int ret; + + if (!handle->cpu_compressed) + return 0; + + if (handle->read_zpage) { + handle->latz.count = tracecmd_load_chunks_info(handle->compress, &handle->latz.chunks); + if (handle->latz.count < 0) + return -1; + } else { + strcpy(handle->latz.file, COMPR_TEMP_FILE); + handle->latz.fd = mkstemp(handle->latz.file); + if (handle->latz.fd < 0) + return -1; + + ret = tracecmd_uncompress_copy_to(handle->compress, handle->latz.fd, NULL, &wsize); + if (ret) + return -1; + + lseek64(handle->latz.fd, 0, SEEK_SET); + } + + return 0; +} + +static int init_buffer_cpu_data(struct tracecmd_input *handle, struct input_buffer_instance *buffer) +{ + unsigned long long offset; + unsigned long long size; + unsigned short id, flags; + int cpu; + + if (handle->cpu_data) + return -1; + + if (lseek64(handle->fd, buffer->offset, SEEK_SET) == (off_t)-1) + return -1; + if (read_section_header(handle, &id, &flags, NULL, NULL)) + return -1; + if (flags & TRACECMD_SEC_FL_COMPRESS) + handle->cpu_compressed = true; + if (buffer->latency) { + handle->file_state = TRACECMD_FILE_CPU_LATENCY; + return init_latency_data(handle) == 0 ? 1 : -1; + } + handle->file_state = TRACECMD_FILE_CPU_FLYRECORD; + handle->cpus = buffer->cpus; + if (handle->max_cpu < handle->cpus) + handle->max_cpu = handle->cpus; + + handle->cpu_data = calloc(handle->cpus, sizeof(*handle->cpu_data)); + if (!handle->cpu_data) + return -1; + + for (cpu = 0; cpu < handle->cpus; cpu++) { + handle->cpu_data[cpu].cpu = buffer->cpu_data[cpu].cpu; + offset = buffer->cpu_data[cpu].offset; + size = buffer->cpu_data[cpu].size; + handle->cpu_data[cpu].file_offset = offset; + handle->cpu_data[cpu].file_size = size; + if (size && (offset + size > handle->total_file_size)) { + /* this happens if the file got truncated */ + printf("File possibly truncated. " + "Need at least %llu, but file size is %zu.\n", + offset + size, handle->total_file_size); + errno = EINVAL; + return -1; + } + } + + return init_cpu_data(handle); +} + +static int read_cpu_data(struct tracecmd_input *handle) +{ + unsigned long long size; + int cpus; + int cpu; + + /* + * Check if this is a latency report or not. + */ + if (handle->file_state == TRACECMD_FILE_CPU_LATENCY) + return 1; + + /* We expect this to be flyrecord */ + if (handle->file_state != TRACECMD_FILE_CPU_FLYRECORD) + return -1; + + cpus = handle->cpus; + + handle->cpu_data = malloc(sizeof(*handle->cpu_data) * handle->cpus); + if (!handle->cpu_data) + return -1; + memset(handle->cpu_data, 0, sizeof(*handle->cpu_data) * handle->cpus); + + for (cpu = 0; cpu < handle->cpus; cpu++) { + unsigned long long offset; + + handle->cpu_data[cpu].cpu = cpu; + read8(handle, &offset); + read8(handle, &size); + handle->cpu_data[cpu].file_offset = offset; + handle->cpu_data[cpu].file_size = size; + if (size && (offset + size > handle->total_file_size)) { + /* this happens if the file got truncated */ + printf("File possibly truncated. " + "Need at least %llu, but file size is %zu.\n", + offset + size, handle->total_file_size); + errno = EINVAL; + return -1; + } + } + + /* + * It is possible that an option changed the number of CPUs. + * If that happened, then there's "empty" cpu data saved for + * backward compatibility. + */ + if (cpus < handle->cpus) { + unsigned long long ignore; + int once = 0; + + read8(handle, &ignore); /* offset */ + read8(handle, &ignore); /* size */ + if (ignore != 0) { + if (!once) { + tracecmd_warning("ignored CPU data not zero size"); + once++; + } + } + } + + return init_cpu_data(handle); +} + +static int read_data_and_size(struct tracecmd_input *handle, + char **data, unsigned long long *size) +{ + if (read8(handle, size) < 0) + return -1; + *data = malloc(*size + 1); + if (!*data) + return -1; + if (do_read_check(handle, *data, *size)) { + free(*data); + return -1; + } + + return 0; +} + +static int read_and_parse_cmdlines(struct tracecmd_input *handle) +{ + struct tep_handle *pevent = handle->pevent; + unsigned long long size; + char *cmdlines; + + if (CHECK_READ_STATE(handle, TRACECMD_FILE_CMD_LINES)) + return 0; + + if (!HAS_SECTIONS(handle)) + section_add_or_update(handle, TRACECMD_OPTION_CMDLINES, 0, 0, + lseek64(handle->fd, 0, SEEK_CUR)); + + + if (read_data_and_size(handle, &cmdlines, &size) < 0) + return -1; + cmdlines[size] = 0; + tep_parse_saved_cmdlines(pevent, cmdlines); + free(cmdlines); + + handle->file_state = TRACECMD_FILE_CMD_LINES; + + return 0; +} + +static void extract_trace_clock(struct tracecmd_input *handle, char *line) +{ + char *clock = NULL; + char *next = NULL; + char *data; + + data = strtok_r(line, "[]", &next); + sscanf(data, "%ms", &clock); + /* TODO: report if it fails to allocate */ + handle->trace_clock = clock; + + if (!clock) + return; + + /* Clear usecs if raw timestamps are requested */ + if (handle->flags & TRACECMD_FL_RAW_TS) + handle->flags &= ~TRACECMD_FL_IN_USECS; + + /* Clear usecs if not one of the specified clocks */ + if (strcmp(clock, "local") && strcmp(clock, "global") && + strcmp(clock, "uptime") && strcmp(clock, "perf") && + strncmp(clock, "mono", 4) && strcmp(clock, TSCNSEC_CLOCK) && + strcmp(clock, "tai")) + handle->flags &= ~TRACECMD_FL_IN_USECS; +} + +void tracecmd_parse_trace_clock(struct tracecmd_input *handle, + char *file, int size __maybe_unused) +{ + char *line; + char *next = NULL; + + line = strtok_r(file, " ", &next); + while (line) { + /* current trace_clock is shown as "[local]". */ + if (*line == '[') + return extract_trace_clock(handle, line); + line = strtok_r(NULL, " ", &next); + } +} + +static int read_and_parse_trace_clock(struct tracecmd_input *handle, + struct tep_handle *pevent) +{ + unsigned long long size; + char *trace_clock; + + if (read_data_and_size(handle, &trace_clock, &size) < 0) + return -1; + trace_clock[size] = 0; + tracecmd_parse_trace_clock(handle, trace_clock, size); + free(trace_clock); + return 0; +} + +static int init_data_v6(struct tracecmd_input *handle) +{ + struct tep_handle *pevent = handle->pevent; + int ret; + + ret = read_cpu_data(handle); + if (ret < 0) + return ret; + + if (handle->use_trace_clock) { + /* + * There was a bug in the original setting of + * the trace_clock file which let it get + * corrupted. If it fails to read, force local + * clock. + */ + if (read_and_parse_trace_clock(handle, pevent) < 0) { + char clock[] = "[local]"; + tracecmd_warning("File has trace_clock bug, using local clock"); + tracecmd_parse_trace_clock(handle, clock, 8); + } + } + return ret; +} + +static int init_data(struct tracecmd_input *handle) +{ + return init_buffer_cpu_data(handle, &handle->top_buffer); +} + +/** + * tracecmd_init_data - prepare reading the data from trace.dat + * @handle: input handle for the trace.dat file + * + * This prepares reading the data from trace.dat. This is called + * after tracecmd_read_headers() and before tracecmd_read_data(). + */ +int tracecmd_init_data(struct tracecmd_input *handle) +{ + int ret; + + if (!HAS_SECTIONS(handle)) + ret = init_data_v6(handle); + else + ret = init_data(handle); + tracecmd_blk_hack(handle); + + return ret; +} + +/** + * tracecmd_make_pipe - Have the handle read a pipe instead of a file + * @handle: input handle to read from a pipe + * @cpu: the cpu that the pipe represents + * @fd: the read end of the pipe + * @cpus: the total number of cpus for this handle + * + * In order to stream data from the binary trace files and produce + * output or analyze the data, a tracecmd_input descriptor needs to + * be created, and then converted into a form that can act on a + * pipe. + * + * Note, there are limitations to what this descriptor can do. + * Most notibly, it can not read backwards. Once a page is read + * it can not be read at a later time (except if a record is attached + * to it and is holding the page ref). + * + * It is expected that the handle has already been created and + * tracecmd_read_headers() has run on it. + */ +int tracecmd_make_pipe(struct tracecmd_input *handle, int cpu, int fd, int cpus) +{ + enum kbuffer_long_size long_size; + enum kbuffer_endian endian; + + handle->read_page = true; + handle->use_pipe = true; + + if (!handle->cpus) { + handle->cpus = cpus; + handle->cpu_data = malloc(sizeof(*handle->cpu_data) * handle->cpus); + if (!handle->cpu_data) + return -1; + } + + if (cpu >= handle->cpus) + return -1; + + + if (handle->long_size == 8) + long_size = KBUFFER_LSIZE_8; + else + long_size = KBUFFER_LSIZE_4; + + if (tep_is_file_bigendian(handle->pevent)) + endian = KBUFFER_ENDIAN_BIG; + else + endian = KBUFFER_ENDIAN_LITTLE; + + memset(&handle->cpu_data[cpu], 0, sizeof(handle->cpu_data[cpu])); + handle->cpu_data[cpu].pipe_fd = fd; + handle->cpu_data[cpu].cpu = cpu; + + handle->cpu_data[cpu].kbuf = kbuffer_alloc(long_size, endian); + if (!handle->cpu_data[cpu].kbuf) + return -1; + if (tep_is_old_format(handle->pevent)) + kbuffer_set_old_format(handle->cpu_data[cpu].kbuf); + + handle->cpu_data[cpu].file_offset = 0; + handle->cpu_data[cpu].file_size = -1; + + init_cpu(handle, cpu); + + return 0; +} + +/** + * tracecmd_print_events - print the events that are stored in trace.dat + * @handle: input handle for the trace.dat file + * @regex: regex of events to print (NULL is all events) + * + * This is a debugging routine to print out the events that + * are stored in a given trace.dat file. + */ +void tracecmd_print_events(struct tracecmd_input *handle, const char *regex) +{ + if (!regex) + regex = ".*"; + + if (!HAS_SECTIONS(handle)) + read_headers_v6(handle, TRACECMD_FILE_ALL_EVENTS, regex); + + read_headers(handle, regex); +} + +/* Show the cpu data stats */ +static void show_cpu_stats(struct tracecmd_input *handle) +{ + struct cpu_data *cpu_data; + int i; + + for (i = 0; i < handle->cpus; i++) { + cpu_data = &handle->cpu_data[i]; + printf("CPU%d data recorded at offset=0x%llx\n", + i, cpu_data->file_offset); + printf(" %lld bytes in size\n", cpu_data->file_size); + } +} + +/** + * tracecmd_print_stats - prints the stats recorded in the options. + * @handle: input handle for the trace.dat file + * + * Looks for the option TRACECMD_OPTION_CPUSTAT and prints out what's + * stored there, if it is found. Otherwise it prints that none were found. + */ +void tracecmd_print_stats(struct tracecmd_input *handle) +{ + if (handle->cpustats) + printf("%s\n", handle->cpustats); + else + printf(" No stats in this file\n"); + + show_cpu_stats(handle); +} + +/** + * tracecmd_print_uname - prints the recorded uname if it was recorded + * @handle: input handle for the trace.dat file + * + * Looks for the option TRACECMD_OPTION_UNAME and prints out what's + * stored there, if it is found. Otherwise it prints that none were found. + */ +void tracecmd_print_uname(struct tracecmd_input *handle) +{ + if (handle->uname) + printf("%s\n", handle->uname); + else + printf(" uname was not recorded in this file\n"); +} + +/** + * tracecmd_print_uname - prints the recorded uname if it was recorded + * @handle: input handle for the trace.dat file + * + * Looks for the option TRACECMD_OPTION_VERSION and prints out what's + * stored there, if it is found. Otherwise it prints that none were found. + */ +void tracecmd_print_version(struct tracecmd_input *handle) +{ + if (handle->version) + printf("%s\n", handle->version); + else + printf(" version was not recorded in this file\n"); +} + +/** + * tracecmd_hooks - return the event hooks that were used in record + * @handle: input handle for the trace.dat file + * + * If trace-cmd record used -H to save hooks, they are parsed and + * presented as hooks here. + * + * Returns the hook list (do not free it, they are freed on close) + */ +struct hook_list *tracecmd_hooks(struct tracecmd_input *handle) +{ + return handle->hooks; +} + +static int init_metadata_strings(struct tracecmd_input *handle, int size) +{ + char *tmp; + + tmp = realloc(handle->strings, handle->strings_size + size); + if (!tmp) + return -1; + + handle->strings = tmp; + if (do_read_check(handle, handle->strings + handle->strings_size, size)) + return -1; + + handle->strings_size += size; + + return 0; +} + +static int read_metadata_strings(struct tracecmd_input *handle) +{ + unsigned short flags; + int found = 0; + unsigned short id; + unsigned int csize, rsize; + unsigned long long size; + off64_t offset; + + offset = lseek64(handle->fd, 0, SEEK_CUR); + do { + if (read_section_header(handle, &id, &flags, &size, NULL)) + break; + if (id == TRACECMD_OPTION_STRINGS) { + found++; + if ((flags & TRACECMD_SEC_FL_COMPRESS)) { + read4(handle, &csize); + read4(handle, &rsize); + do_lseek(handle, -8, SEEK_CUR); + if (in_uncompress_block(handle)) + break; + } else { + rsize = size; + } + init_metadata_strings(handle, rsize); + if (flags & TRACECMD_SEC_FL_COMPRESS) + in_uncompress_reset(handle); + } else { + if (lseek64(handle->fd, size, SEEK_CUR) == (off_t)-1) + break; + } + } while (1); + + if (lseek64(handle->fd, offset, SEEK_SET) == (off_t)-1) + return -1; + + return found ? 0 : -1; +} + +/** + * tracecmd_alloc_fd - create a tracecmd_input handle from a file descriptor + * @fd: the file descriptor for the trace.dat file + * @flags: bitmask of enum tracecmd_open_flags + * + * Allocate a tracecmd_input handle from a file descriptor and open the + * file. This tests if the file is of trace-cmd format and allocates + * a parse event descriptor. + * + * The returned pointer is not ready to be read yet. A tracecmd_read_headers() + * and tracecmd_init_data() still need to be called on the descriptor. + * + * Unless you know what you are doing with this, you want to use + * tracecmd_open_fd() instead. + */ +struct tracecmd_input *tracecmd_alloc_fd(int fd, int flags) +{ + struct tracecmd_input *handle; + char test[] = TRACECMD_MAGIC; + unsigned int page_size; + size_t offset; + char *version = NULL; + char *zver = NULL; + char *zname = NULL; + char buf[BUFSIZ]; + unsigned long ver; + + handle = malloc(sizeof(*handle)); + if (!handle) + return NULL; + memset(handle, 0, sizeof(*handle)); + + handle->fd = fd; + handle->ref = 1; + handle->latz.fd = -1; + /* By default, use usecs, unless told otherwise */ + handle->flags |= TRACECMD_FL_IN_USECS; + +#ifdef INMEMORY_DECOMPRESS + handle->read_zpage = 1; +#endif + if (do_read_check(handle, buf, 3)) + goto failed_read; + + if (memcmp(buf, test, 3) != 0) + goto failed_read; + + if (do_read_check(handle, buf, 7)) + goto failed_read; + if (memcmp(buf, "tracing", 7) != 0) + goto failed_read; + + version = read_string(handle); + if (!version) + goto failed_read; + tracecmd_info("version = %s", version); + ver = strtol(version, NULL, 10); + if (!ver && errno) + goto failed_read; + if (!tracecmd_is_version_supported(ver)) { + tracecmd_warning("Unsupported file version %lu", ver); + goto failed_read; + } + handle->file_version = ver; + free(version); + version = NULL; + + if (handle->file_version >= FILE_VERSION_SECTIONS) + handle->flags |= TRACECMD_FL_SECTIONED; + if (handle->file_version >= FILE_VERSION_COMPRESSION) + handle->flags |= TRACECMD_FL_COMPRESSION; + + if (do_read_check(handle, buf, 1)) + goto failed_read; + + handle->pevent = tep_alloc(); + if (!handle->pevent) + goto failed_read; + + /* register default ftrace functions first */ + if (!(flags & TRACECMD_FL_LOAD_NO_PLUGINS) && + !(flags & TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS)) + tracecmd_ftrace_overrides(handle, &handle->finfo); + + handle->plugin_list = trace_load_plugins(handle->pevent, flags); + + tep_set_file_bigendian(handle->pevent, buf[0]); + tep_set_local_bigendian(handle->pevent, tracecmd_host_bigendian()); + + do_read_check(handle, buf, 1); + handle->long_size = buf[0]; + tep_set_long_size(handle->pevent, handle->long_size); + + read4(handle, &page_size); + handle->page_size = page_size; + handle->next_offset = page_size; + + offset = lseek64(handle->fd, 0, SEEK_CUR); + handle->total_file_size = lseek64(handle->fd, 0, SEEK_END); + lseek64(handle->fd, offset, SEEK_SET); + + if (HAS_COMPRESSION(handle)) { + zname = read_string(handle); + if (!zname) + goto failed_read; + + zver = read_string(handle); + if (!zver) + goto failed_read; + + if (strcmp(zname, "none") == 0) { + handle->read_zpage = false; + handle->flags &= ~TRACECMD_FL_COMPRESSION; + } else { + handle->compress = tracecmd_compress_alloc(zname, zver, + handle->fd, + handle->pevent, NULL); + if (!handle->compress) { + tracecmd_warning("Unsupported file compression %s %s", zname, zver); + goto failed_read; + } + } + + free(zname); + free(zver); + } + + if (HAS_SECTIONS(handle)) { + if (read8(handle, &(handle->options_start))) { + tracecmd_warning("Filed to read the offset of the first option section"); + goto failed_read; + } + read_metadata_strings(handle); + } + + handle->file_state = TRACECMD_FILE_INIT; + + return handle; + + failed_read: + free(version); + free(zname); + free(zver); + free(handle); + + return NULL; +} + +/** + * tracecmd_alloc_fd - create a tracecmd_input handle from a file name + * @file: the file name of the file that is of tracecmd data type. + * @flags: bitmask of enum tracecmd_open_flags + * + * Allocate a tracecmd_input handle from a given file name and open the + * file. This tests if the file is of trace-cmd format and allocates + * a parse event descriptor. + * + * The returned pointer is not ready to be read yet. A tracecmd_read_headers() + * and tracecmd_init_data() still need to be called on the descriptor. + * + * Unless you know what you are doing with this, you want to use + * tracecmd_open() instead. + */ +struct tracecmd_input *tracecmd_alloc(const char *file, int flags) +{ + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) + return NULL; + + return tracecmd_alloc_fd(fd, flags); +} + +/** + * tracecmd_open_fd - create a tracecmd_handle from the trace.dat file descriptor + * @fd: the file descriptor for the trace.dat file + * @flags: bitmask of enum tracecmd_open_flags + */ +struct tracecmd_input *tracecmd_open_fd(int fd, int flags) +{ + struct tracecmd_input *handle; + int ret; + + handle = tracecmd_alloc_fd(fd, flags); + if (!handle) + return NULL; + + if (tracecmd_read_headers(handle, 0) < 0) + goto fail; + + if ((ret = tracecmd_init_data(handle)) < 0) + goto fail; + + return handle; + +fail: + tracecmd_close(handle); + return NULL; +} + +/** + * tracecmd_open - create a tracecmd_handle from a given file + * @file: the file name of the file that is of tracecmd data type. + * @flags: bitmask of enum tracecmd_open_flags + */ +struct tracecmd_input *tracecmd_open(const char *file, int flags) +{ + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) + return NULL; + + return tracecmd_open_fd(fd, flags); +} + +/** + * tracecmd_open_head - create a tracecmd_handle from a given file, read + * and parse only the trace headers from the file + * @file: the file name of the file that is of tracecmd data type. + * @flags: bitmask of enum tracecmd_open_flags + */ +struct tracecmd_input *tracecmd_open_head(const char *file, int flags) +{ + struct tracecmd_input *handle; + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) + return NULL; + + handle = tracecmd_alloc_fd(fd, flags); + if (!handle) + return NULL; + + if (tracecmd_read_headers(handle, 0) < 0) + goto fail; + + return handle; + +fail: + tracecmd_close(handle); + return NULL; +} + +/** + * tracecmd_ref - add a reference to the handle + * @handle: input handle for the trace.dat file + * + * Some applications may share a handle between parts of + * the application. Let those parts add reference counters + * to the handle, and the last one to close it will free it. + */ +void tracecmd_ref(struct tracecmd_input *handle) +{ + if (!handle) + return; + + handle->ref++; +} + +static inline void free_buffer(struct input_buffer_instance *buf) +{ + free(buf->name); + free(buf->clock); + free(buf->cpu_data); +} + +/** + * tracecmd_close - close and free the trace.dat handle + * @handle: input handle for the trace.dat file + * + * Close the file descriptor of the handle and frees + * the resources allocated by the handle. + */ +void tracecmd_close(struct tracecmd_input *handle) +{ + struct zchunk_cache *cache; + struct file_section *del_sec; + struct cpu_data *cpu_data; + struct page_map *page_map, *n; + int cpu; + int i; + + if (!handle) + return; + + if (handle->ref <= 0) { + tracecmd_warning("tracecmd: bad ref count on handle"); + return; + } + + if (--handle->ref) + return; + + for (cpu = 0; cpu < handle->cpus; cpu++) { + /* The tracecmd_peek_data may have cached a record */ + free_next(handle, cpu); + free_page(handle, cpu); + if (handle->cpu_data) { + cpu_data = &handle->cpu_data[cpu]; + if (cpu_data->kbuf) { + kbuffer_free(cpu_data->kbuf); + if (cpu_data->page_map) + free_page_map(cpu_data->page_map); + + if (cpu_data->page_cnt) + tracecmd_warning("%d pages still allocated on cpu %d%s", + cpu_data->page_cnt, cpu, + show_records(cpu_data->pages, + cpu_data->nr_pages)); + free(cpu_data->pages); + } + if (cpu_data->compress.fd >= 0) { + close(cpu_data->compress.fd); + unlink(cpu_data->compress.file); + } + while (!list_empty(&cpu_data->compress.cache)) { + cache = container_of(cpu_data->compress.cache.next, + struct zchunk_cache, list); + list_del(&cache->list); + free(cache->map); + free(cache); + } + free(cpu_data->compress.chunks); + list_for_each_entry_safe(page_map, n, &cpu_data->page_maps, list) { + list_del(&page_map->list); + free(page_map); + } + } + } + + free(handle->cpustats); + free(handle->cpu_data); + free(handle->uname); + free(handle->trace_clock); + free(handle->strings); + free(handle->version); + close(handle->fd); + free(handle->latz.chunks); + if (handle->latz.fd >= 0) { + close(handle->latz.fd); + unlink(handle->latz.file); + } + while (handle->sections) { + del_sec = handle->sections; + handle->sections = handle->sections->next; + free(del_sec); + } + + free_buffer(&handle->top_buffer); + for (i = 0; i < handle->nr_buffers; i++) + free_buffer(&handle->buffers[i]); + free(handle->buffers); + + tracecmd_free_hooks(handle->hooks); + handle->hooks = NULL; + + trace_pid_map_free(handle->pid_maps); + handle->pid_maps = NULL; + + trace_tsync_offset_free(&handle->host); + trace_guests_free(handle); + + if (handle->flags & TRACECMD_FL_BUFFER_INSTANCE) + tracecmd_close(handle->parent); + else { + /* Only main handle frees plugins, pevent and compression context */ + tracecmd_compress_destroy(handle->compress); + tep_unload_plugins(handle->plugin_list, handle->pevent); + tep_free(handle->pevent); + } + free(handle); +} + +static int read_copy_size8(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle, unsigned long long *size) +{ + /* read size */ + if (do_read_check(in_handle, size, 8)) + return -1; + + if (do_write_check(out_handle, size, 8)) + return -1; + + *size = tep_read_number(in_handle->pevent, size, 8); + return 0; +} + +static int read_copy_size4(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle, + unsigned int *size) +{ + /* read size */ + if (do_read_check(in_handle, size, 4)) + return -1; + + if (do_write_check(out_handle, size, 4)) + return -1; + + *size = tep_read_number(in_handle->pevent, size, 4); + return 0; +} + +static int read_copy_data(struct tracecmd_input *in_handle, + unsigned long long size, + struct tracecmd_output *out_handle) +{ + char *buf; + + buf = malloc(size); + if (!buf) + return -1; + if (do_read_check(in_handle, buf, size)) + goto failed_read; + + if (do_write_check(out_handle, buf, size)) + goto failed_read; + + free(buf); + + return 0; + + failed_read: + free(buf); + return -1; +} + + +static bool check_in_state(struct tracecmd_input *handle, int new_state) +{ + return check_file_state(handle->file_version, handle->file_state, new_state); +} + +static int copy_header_files(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle) +{ + bool compress = out_check_compression(out_handle); + struct file_section *sec; + unsigned long long offset; + unsigned long long size; + + if (!check_in_state(in_handle, TRACECMD_FILE_HEADERS) || + !check_out_state(out_handle, TRACECMD_FILE_HEADERS)) + return -1; + + sec = section_open(in_handle, TRACECMD_OPTION_HEADER_INFO); + if (!sec) + return -1; + + offset = out_write_section_header(out_handle, TRACECMD_OPTION_HEADER_INFO, + "headers", TRACECMD_SEC_FL_COMPRESS, true); + out_compression_start(out_handle, compress); + + /* "header_page" */ + if (read_copy_data(in_handle, 12, out_handle) < 0) + goto error; + + if (read_copy_size8(in_handle, out_handle, &size) < 0) + goto error; + + if (read_copy_data(in_handle, size, out_handle) < 0) + goto error; + + /* "header_event" */ + if (read_copy_data(in_handle, 13, out_handle) < 0) + goto error; + + if (read_copy_size8(in_handle, out_handle, &size) < 0) + goto error; + + if (read_copy_data(in_handle, size, out_handle) < 0) + goto error; + + in_handle->file_state = TRACECMD_FILE_HEADERS; + if (out_compression_end(out_handle, compress)) + goto error; + + out_set_file_state(out_handle, in_handle->file_state); + section_close(in_handle, sec); + + if (out_update_section_header(out_handle, offset)) + goto error; + + return 0; +error: + out_compression_reset(out_handle, compress); + section_close(in_handle, sec); + return -1; +} + +static int copy_ftrace_files(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) +{ + bool compress = out_check_compression(out_handle); + struct file_section *sec; + unsigned long long offset; + unsigned long long size; + unsigned int count; + unsigned int i; + + if (!check_in_state(in_handle, TRACECMD_FILE_FTRACE_EVENTS) || + !check_out_state(out_handle, TRACECMD_FILE_FTRACE_EVENTS)) + return -1; + + sec = section_open(in_handle, TRACECMD_OPTION_FTRACE_EVENTS); + if (!sec) + return -1; + offset = out_write_section_header(out_handle, TRACECMD_OPTION_FTRACE_EVENTS, + "ftrace events", TRACECMD_SEC_FL_COMPRESS, true); + + out_compression_start(out_handle, compress); + + if (read_copy_size4(in_handle, out_handle, &count) < 0) + goto error; + + for (i = 0; i < count; i++) { + + if (read_copy_size8(in_handle, out_handle, &size) < 0) + goto error; + + if (read_copy_data(in_handle, size, out_handle) < 0) + goto error; + } + + in_handle->file_state = TRACECMD_FILE_FTRACE_EVENTS; + if (out_compression_end(out_handle, compress)) + goto error; + + out_set_file_state(out_handle, in_handle->file_state); + + section_close(in_handle, sec); + + if (out_update_section_header(out_handle, offset)) + goto error; + + return 0; +error: + out_compression_reset(out_handle, compress); + section_close(in_handle, sec); + return -1; +} + +static int copy_event_files(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) +{ + bool compress = out_check_compression(out_handle); + struct file_section *sec; + unsigned long long offset; + unsigned long long size; + char *system; + unsigned int systems; + unsigned int count; + unsigned int i,x; + + if (!check_in_state(in_handle, TRACECMD_FILE_ALL_EVENTS) || + !check_out_state(out_handle, TRACECMD_FILE_ALL_EVENTS)) + return -1; + + sec = section_open(in_handle, TRACECMD_OPTION_EVENT_FORMATS); + if (!sec) + return -1; + offset = out_write_section_header(out_handle, TRACECMD_OPTION_EVENT_FORMATS, + "events format", TRACECMD_SEC_FL_COMPRESS, true); + + out_compression_start(out_handle, compress); + + if (read_copy_size4(in_handle, out_handle, &systems) < 0) + goto error; + + for (i = 0; i < systems; i++) { + system = read_string(in_handle); + if (!system) + goto error; + if (do_write_check(out_handle, system, strlen(system) + 1)) { + free(system); + goto error; + } + free(system); + + if (read_copy_size4(in_handle, out_handle, &count) < 0) + goto error; + + for (x=0; x < count; x++) { + if (read_copy_size8(in_handle, out_handle, &size) < 0) + goto error; + + if (read_copy_data(in_handle, size, out_handle) < 0) + goto error; + } + } + + in_handle->file_state = TRACECMD_FILE_ALL_EVENTS; + if (out_compression_end(out_handle, compress)) + goto error; + + out_set_file_state(out_handle, in_handle->file_state); + + section_close(in_handle, sec); + + if (out_update_section_header(out_handle, offset)) + goto error; + + return 0; +error: + out_compression_reset(out_handle, compress); + section_close(in_handle, sec); + return -1; +} + +static int copy_proc_kallsyms(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) +{ + bool compress = out_check_compression(out_handle); + struct file_section *sec; + unsigned long long offset; + unsigned int size; + + if (!check_in_state(in_handle, TRACECMD_FILE_KALLSYMS) || + !check_out_state(out_handle, TRACECMD_FILE_KALLSYMS)) + return -1; + + sec = section_open(in_handle, TRACECMD_OPTION_KALLSYMS); + if (!sec) + return -1; + offset = out_write_section_header(out_handle, TRACECMD_OPTION_KALLSYMS, + "kallsyms", TRACECMD_SEC_FL_COMPRESS, true); + + out_compression_start(out_handle, compress); + if (read_copy_size4(in_handle, out_handle, &size) < 0) + goto error; + + if (!size) + goto out; /* OK? */ + + if (read_copy_data(in_handle, size, out_handle) < 0) + goto error; +out: + in_handle->file_state = TRACECMD_FILE_KALLSYMS; + if (out_compression_end(out_handle, compress)) + goto error; + + out_set_file_state(out_handle, in_handle->file_state); + + section_close(in_handle, sec); + + if (out_update_section_header(out_handle, offset)) + goto error; + + return 0; +error: + out_compression_reset(out_handle, compress); + section_close(in_handle, sec); + return -1; +} + +static int copy_ftrace_printk(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) +{ + bool compress = out_check_compression(out_handle); + struct file_section *sec; + unsigned long long offset; + unsigned int size; + + if (!check_in_state(in_handle, TRACECMD_FILE_PRINTK) || + !check_out_state(out_handle, TRACECMD_FILE_PRINTK)) + return -1; + + sec = section_open(in_handle, TRACECMD_OPTION_PRINTK); + if (!sec) + return -1; + + offset = out_write_section_header(out_handle, TRACECMD_OPTION_PRINTK, + "printk", TRACECMD_SEC_FL_COMPRESS, true); + + out_compression_start(out_handle, compress); + + if (read_copy_size4(in_handle, out_handle, &size) < 0) + goto error; + + if (!size) + goto out; /* OK? */ + + if (read_copy_data(in_handle, size, out_handle) < 0) + goto error; + +out: + in_handle->file_state = TRACECMD_FILE_PRINTK; + if (out_compression_end(out_handle, compress)) + goto error; + + out_set_file_state(out_handle, in_handle->file_state); + + section_close(in_handle, sec); + + if (out_update_section_header(out_handle, offset)) + goto error; + + return 0; +error: + out_compression_reset(out_handle, compress); + section_close(in_handle, sec); + return -1; +} + +static int copy_command_lines(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) +{ + bool compress = out_check_compression(out_handle); + struct file_section *sec; + unsigned long long offset; + unsigned long long size; + + if (!check_in_state(in_handle, TRACECMD_FILE_CMD_LINES) || + !check_out_state(out_handle, TRACECMD_FILE_CMD_LINES)) + return -1; + + sec = section_open(in_handle, TRACECMD_OPTION_CMDLINES); + if (!sec) + return -1; + offset = out_write_section_header(out_handle, TRACECMD_OPTION_CMDLINES, + "command lines", TRACECMD_SEC_FL_COMPRESS, true); + + out_compression_start(out_handle, compress); + + if (read_copy_size8(in_handle, out_handle, &size) < 0) + goto error; + + if (!size) + goto out; /* OK? */ + + if (read_copy_data(in_handle, size, out_handle) < 0) + goto error; + +out: + in_handle->file_state = TRACECMD_FILE_CMD_LINES; + if (out_compression_end(out_handle, compress)) + goto error; + + out_set_file_state(out_handle, in_handle->file_state); + + section_close(in_handle, sec); + + if (out_update_section_header(out_handle, offset)) + goto error; + + return 0; +error: + out_compression_reset(out_handle, compress); + section_close(in_handle, sec); + return -1; +} + +static int copy_cpu_count(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) +{ + unsigned int cpus; + + if (!check_in_state(in_handle, TRACECMD_FILE_CPU_COUNT) || + !check_out_state(out_handle, TRACECMD_FILE_CPU_COUNT)) + return -1; + + if (!HAS_SECTIONS(in_handle)) { + if (read4(in_handle, &cpus)) + return -1; + } else { + cpus = in_handle->max_cpu; + } + + if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) { + cpus = tep_read_number(in_handle->pevent, &cpus, 4); + if (do_write_check(out_handle, &cpus, 4)) + return -1; + } else { + tracecmd_add_option(out_handle, TRACECMD_OPTION_CPUCOUNT, sizeof(int), &cpus); + } + + in_handle->file_state = TRACECMD_FILE_CPU_COUNT; + out_set_file_state(out_handle, in_handle->file_state); + + return 0; +} + +/** + * tracecmd_copy_headers - Copy headers from a tracecmd_input handle to a file descriptor + * @in_handle: input handle for the trace.dat file to copy from. + * @out_handle: output handle to the trace.dat file to copy to. + * @start_state: The file state to start copying from (zero for the beginnig) + * @end_state: The file state to stop at (zero for up to cmdlines) + * + * This is used to copy trace header data of a trace.dat file to a + * file descriptor. Using @start_state and @end_state it may be used + * multiple times against the input handle. + * + * NOTE: The input handle is also modified, and ends at the end + * state as well. + */ +int tracecmd_copy_headers(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle, + enum tracecmd_file_states start_state, + enum tracecmd_file_states end_state) +{ + struct file_section *sec = NULL; + int ret; + + if (!start_state) + start_state = TRACECMD_FILE_HEADERS; + if (!end_state) + end_state = TRACECMD_FILE_CMD_LINES; + + if (start_state > end_state) + return -1; + + if (end_state < TRACECMD_FILE_HEADERS) + return 0; + + if (in_handle->file_state >= start_state) { + /* Set the handle to just before the start state */ + sec = section_open(in_handle, TRACECMD_OPTION_HEADER_INFO); + if (!sec) + return -1; + /* Now that the file handle has moved, change its state */ + in_handle->file_state = TRACECMD_FILE_INIT; + } + + /* Try to bring the input up to the start state - 1 */ + ret = tracecmd_read_headers(in_handle, start_state - 1); + if (sec) + section_close(in_handle, sec); + if (ret < 0) + goto out; + + switch (start_state) { + case TRACECMD_FILE_HEADERS: + ret = copy_header_files(in_handle, out_handle); + if (ret < 0) + goto out; + + /* fallthrough */ + case TRACECMD_FILE_FTRACE_EVENTS: + /* handle's state is now updating with the copies */ + if (end_state <= in_handle->file_state) + return 0; + + ret = copy_ftrace_files(in_handle, out_handle); + if (ret < 0) + goto out; + + /* fallthrough */ + case TRACECMD_FILE_ALL_EVENTS: + if (end_state <= in_handle->file_state) + return 0; + + ret = copy_event_files(in_handle, out_handle); + if (ret < 0) + goto out; + + /* fallthrough */ + case TRACECMD_FILE_KALLSYMS: + if (end_state <= in_handle->file_state) + return 0; + + ret = copy_proc_kallsyms(in_handle, out_handle); + if (ret < 0) + goto out; + + /* fallthrough */ + case TRACECMD_FILE_PRINTK: + if (end_state <= in_handle->file_state) + return 0; + + ret = copy_ftrace_printk(in_handle, out_handle); + if (ret < 0) + goto out; + + /* fallthrough */ + case TRACECMD_FILE_CMD_LINES: + if (end_state <= in_handle->file_state) + return 0; + + ret = copy_command_lines(in_handle, out_handle); + if (ret < 0) + goto out; + + /* fallthrough */ + case TRACECMD_FILE_CPU_COUNT: + if (end_state <= in_handle->file_state) + return 0; + + ret = copy_cpu_count(in_handle, out_handle); + if (ret < 0) + goto out; + + /* fallthrough */ + default: + break; + } + + out: + return ret < 0 ? -1 : 0; +} + +int tracecmd_copy_buffer_descr(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle) +{ + int i; + + if (tracecmd_get_out_file_version(out_handle) >= FILE_VERSION_SECTIONS) + return 0; + + for (i = 0; i < in_handle->nr_buffers; i++) + tracecmd_add_buffer_info(out_handle, in_handle->buffers[i].name, 0); + + return tracecmd_write_buffer_info(out_handle); +} + +static int copy_options_recursive(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle) +{ + unsigned short id, flags = 0; + unsigned short option, en2; + unsigned long long next; + unsigned int size, en4; + bool skip; + + for (;;) { + if (do_read_check(in_handle, &option, 2)) + return -1; + + en2 = tep_read_number(in_handle->pevent, &option, 2); + + if (en2 == TRACECMD_OPTION_DONE && !HAS_SECTIONS(in_handle)) + return 0; + + /* next 4 bytes is the size of the option */ + if (do_read_check(in_handle, &size, 4)) + return -1; + + en4 = tep_read_number(in_handle->pevent, &size, 4); + if (en2 == TRACECMD_OPTION_DONE) { + /* option done v7 */ + if (en4 < 8) + return -1; + + if (read8(in_handle, &next)) + return -1; + + if (!next) + break; + + if (do_lseek(in_handle, next, SEEK_SET) == (off64_t)-1) + return -1; + + if (read_section_header(in_handle, &id, &flags, NULL, NULL)) + return -1; + + if (id != TRACECMD_OPTION_DONE) + return -1; + + if (flags & TRACECMD_SEC_FL_COMPRESS && in_uncompress_block(in_handle)) + return -1; + + return copy_options_recursive(in_handle, out_handle); + } + /* Do not copy these, as they have file specific offsets */ + switch (en2) { + case TRACECMD_OPTION_BUFFER: + case TRACECMD_OPTION_BUFFER_TEXT: + case TRACECMD_OPTION_HEADER_INFO: + case TRACECMD_OPTION_FTRACE_EVENTS: + case TRACECMD_OPTION_EVENT_FORMATS: + case TRACECMD_OPTION_KALLSYMS: + case TRACECMD_OPTION_PRINTK: + case TRACECMD_OPTION_CMDLINES: + skip = true; + break; + default: + skip = false; + break; + } + if (skip) { + do_lseek(in_handle, en4, SEEK_CUR); + continue; + } + if (do_write_check(out_handle, &option, 2)) + return -1; + + if (do_write_check(out_handle, &size, 4)) + return -1; + + if (read_copy_data(in_handle, en4, out_handle)) + return -1; + } + + return 0; +} + +static int copy_options(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) +{ + unsigned long long offset, start; + unsigned short id, en2, flags = 0; + int tmp; + + if (HAS_SECTIONS(in_handle)) { + if (read_section_header(in_handle, &id, &flags, NULL, NULL)) + return -1; + + if (id != TRACECMD_OPTION_DONE) + return -1; + + if (flags & TRACECMD_SEC_FL_COMPRESS && in_uncompress_block(in_handle)) + return -1; + } + start = tracecmd_get_out_file_offset(out_handle); + if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) { + if (do_write_check(out_handle, "options ", 10)) + return -1; + } + + offset = out_write_section_header(out_handle, TRACECMD_OPTION_DONE, "options", 0, false); + + if (copy_options_recursive(in_handle, out_handle)) + goto error; + + id = TRACECMD_OPTION_DONE; + en2 = tep_read_number(in_handle->pevent, &id, 2); + if (do_write_check(out_handle, &en2, 2)) + goto error; + + if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) { + out_save_options_offset(out_handle, start); + } else { + tmp = 8; + if (do_write_check(out_handle, &tmp, 4)) + goto error; + + out_save_options_offset(out_handle, start); + start = 0; + if (do_write_check(out_handle, &start, 8)) + goto error; + } + out_update_section_header(out_handle, offset); + if (flags & TRACECMD_SEC_FL_COMPRESS) + in_uncompress_reset(in_handle); + in_handle->file_state = TRACECMD_FILE_OPTIONS; + out_set_file_state(out_handle, in_handle->file_state); + /* Append local options */ + return tracecmd_append_options(out_handle); + +error: + if (flags & TRACECMD_SEC_FL_COMPRESS) + in_uncompress_reset(in_handle); + return 0; +} + +int tracecmd_copy_options(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle) +{ + if (!check_in_state(in_handle, TRACECMD_FILE_OPTIONS) || + !check_out_state(out_handle, TRACECMD_FILE_OPTIONS)) + return -1; + + if (!in_handle->options_start) + return 0; + + if (lseek64(in_handle->fd, in_handle->options_start, SEEK_SET) == (off64_t)-1) + return -1; + + if (copy_options(in_handle, out_handle) < 0) + return -1; + + return 0; +} + +static int copy_trace_latency(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle, const char *buf_name) +{ + int page_size = getpagesize(); + unsigned long long wsize; + unsigned long long offset; + int fd; + + if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS && + do_write_check(out_handle, "latency ", 10)) + return -1; + + offset = tracecmd_get_out_file_offset(out_handle); + + if (tracecmd_get_out_file_version(out_handle) >= FILE_VERSION_SECTIONS && + !out_add_buffer_option(out_handle, buf_name, TRACECMD_OPTION_BUFFER_TEXT, + offset, 0, NULL, page_size)) + return -1; + + offset = out_write_section_header(out_handle, TRACECMD_OPTION_BUFFER_TEXT, + "buffer latency", TRACECMD_SEC_FL_COMPRESS, false); + + if (in_handle->latz.fd >= 0) + fd = in_handle->latz.fd; + else + fd = in_handle->fd; + + if (!out_copy_fd_compress(out_handle, fd, 0, &wsize, page_size)) + return -1; + + if (out_update_section_header(out_handle, offset)) + return -1; + + out_set_file_state(out_handle, TRACECMD_FILE_CPU_LATENCY); + return 0; +} + +static int copy_trace_flyrecord_data(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle, const char *buff_name) +{ + struct cpu_data_source *data; + int total_size = 0; + int cpus; + int ret; + int i, j; + + if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) + cpus = in_handle->max_cpu; + else + cpus = in_handle->cpus; + + data = calloc(cpus, sizeof(struct cpu_data_source)); + if (!data) + return -1; + + for (i = 0; i < in_handle->cpus; i++) { + j = in_handle->cpu_data[i].cpu; + data[j].size = in_handle->cpu_data[i].file_size; + total_size += data[j].size; + if (in_handle->cpu_data[i].compress.fd >= 0) { + data[j].fd = in_handle->cpu_data[i].compress.fd; + data[j].offset = 0; + } else { + data[j].fd = in_handle->fd; + data[j].offset = in_handle->cpu_data[i].file_offset; + } + } + if (total_size || tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) + ret = out_write_cpu_data(out_handle, cpus, data, buff_name); + else + ret = 0; + free(data); + + return ret; +} + +static int copy_flyrecord_buffer(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle, int index) +{ + struct tracecmd_input *instance; + const char *name; + int ret; + + name = tracecmd_buffer_instance_name(in_handle, index); + if (!name) + return -1; + + instance = tracecmd_buffer_instance_handle(in_handle, index); + if (!instance) + return -1; + + if (!tracecmd_get_quiet(out_handle) && *name) + fprintf(stderr, "\nBuffer: %s\n\n", name); + + if (in_handle->buffers[index].latency) + ret = copy_trace_latency(in_handle, out_handle, name); + else + ret = copy_trace_flyrecord_data(instance, out_handle, name); + tracecmd_close(instance); + + return ret; +} + +static int copy_trace_data_from_v6(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle) +{ + char buf[10]; + int ret; + int i; + + if (do_read_check(in_handle, buf, 10)) + return -1; + + if (strncmp(buf, "latency", 7) == 0) + in_handle->file_state = TRACECMD_FILE_CPU_LATENCY; + else if (strncmp(buf, "flyrecord", 9) == 0) + in_handle->file_state = TRACECMD_FILE_CPU_FLYRECORD; + + tracecmd_init_data(in_handle); + tracecmd_set_out_clock(out_handle, in_handle->trace_clock); + + if (in_handle->file_state == TRACECMD_FILE_CPU_LATENCY) + return copy_trace_latency(in_handle, out_handle, ""); + + /* top instance */ + ret = copy_trace_flyrecord_data(in_handle, out_handle, ""); + if (ret) + return ret; + + for (i = 0; i < in_handle->nr_buffers; i++) + copy_flyrecord_buffer(in_handle, out_handle, i); + + return 0; +} + +static int copy_trace_data_from_v7(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle) +{ + int ret = 0; + int i; + + /* Force using temporary files for trace data decompression */ + in_handle->read_zpage = false; + tracecmd_init_data(in_handle); + tracecmd_set_out_clock(out_handle, in_handle->trace_clock); + + /* copy top buffer */ + if (in_handle->top_buffer.latency) + ret = copy_trace_latency(in_handle, out_handle, in_handle->top_buffer.name); + else if (in_handle->top_buffer.cpus) + ret = copy_trace_flyrecord_data(in_handle, out_handle, + in_handle->top_buffer.name); + else if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) + ret = out_write_emty_cpu_data(out_handle, in_handle->max_cpu); + if (ret) + return ret; + + for (i = 0; i < in_handle->nr_buffers; i++) + copy_flyrecord_buffer(in_handle, out_handle, i); + + return 0; +} + +__hidden int tracecmd_copy_trace_data(struct tracecmd_input *in_handle, + struct tracecmd_output *out_handle) +{ + int ret; + + if (!check_in_state(in_handle, TRACECMD_FILE_CPU_FLYRECORD) || + !check_out_state(out_handle, TRACECMD_FILE_CPU_FLYRECORD)) + return -1; + + if (in_handle->file_version < FILE_VERSION_SECTIONS) + ret = copy_trace_data_from_v6(in_handle, out_handle); + else + ret = copy_trace_data_from_v7(in_handle, out_handle); + + return ret; +} + +/** + * tracecmd_record_at_buffer_start - return true if record is first on subbuffer + * @handle: input handle for the trace.dat file + * @record: The record to test if it is the first record on page + * + * Returns true if the record is the first record on the page. + */ +int tracecmd_record_at_buffer_start(struct tracecmd_input *handle, + struct tep_record *record) +{ + struct page *page = record->priv; + struct kbuffer *kbuf = handle->cpu_data[record->cpu].kbuf; + int offset; + + if (!page || !kbuf) + return 0; + + offset = record->offset - page->offset; + return offset == kbuffer_start_of_data(kbuf); +} + +unsigned long long tracecmd_page_ts(struct tracecmd_input *handle, + struct tep_record *record) +{ + struct page *page = record->priv; + struct kbuffer *kbuf = handle->cpu_data[record->cpu].kbuf; + + if (!page || !kbuf) + return 0; + + return kbuffer_subbuf_timestamp(kbuf, page->map); +} + +unsigned int tracecmd_record_ts_delta(struct tracecmd_input *handle, + struct tep_record *record) +{ + struct kbuffer *kbuf = handle->cpu_data[record->cpu].kbuf; + struct page *page = record->priv; + int offset; + + if (!page || !kbuf) + return 0; + + offset = record->offset - page->offset; + + return kbuffer_ptr_delta(kbuf, page->map + offset); +} + +struct kbuffer *tracecmd_record_kbuf(struct tracecmd_input *handle, + struct tep_record *record) +{ + return handle->cpu_data[record->cpu].kbuf; +} + +void *tracecmd_record_page(struct tracecmd_input *handle, + struct tep_record *record) +{ + struct page *page = record->priv; + + return page ? page->map : NULL; +} + +void *tracecmd_record_offset(struct tracecmd_input *handle, + struct tep_record *record) +{ + struct page *page = record->priv; + int offset; + + if (!page) + return NULL; + + offset = record->offset - page->offset; + + return page->map + offset; +} + +int tracecmd_buffer_instances(struct tracecmd_input *handle) +{ + return handle->nr_buffers; +} + +const char *tracecmd_buffer_instance_name(struct tracecmd_input *handle, int indx) +{ + if (indx >= handle->nr_buffers) + return NULL; + + return handle->buffers[indx].name; +} + +struct tracecmd_input * +tracecmd_buffer_instance_handle(struct tracecmd_input *handle, int indx) +{ + struct tracecmd_input *new_handle; + struct input_buffer_instance *buffer = &handle->buffers[indx]; + size_t offset; + ssize_t ret; + + if (indx >= handle->nr_buffers) + return NULL; + + /* + * We make a copy of the current handle, but we substitute + * the cpu data with the cpu data for this buffer. + */ + new_handle = malloc(sizeof(*handle)); + if (!new_handle) + return NULL; + + *new_handle = *handle; + memset(&new_handle->top_buffer, 0, sizeof(new_handle->top_buffer)); + new_handle->cpu_data = NULL; + new_handle->nr_buffers = 0; + new_handle->buffers = NULL; + new_handle->version = NULL; + new_handle->sections = NULL; + new_handle->strings = NULL; + new_handle->guest = NULL; + new_handle->ref = 1; + if (handle->trace_clock) { + new_handle->trace_clock = strdup(handle->trace_clock); + if (!new_handle->trace_clock) { + free(new_handle); + return NULL; + } + } + memset(&new_handle->host, 0, sizeof(new_handle->host)); + new_handle->parent = handle; + new_handle->cpustats = NULL; + new_handle->hooks = NULL; + if (handle->uname) + /* Ignore if fails to malloc, no biggy */ + new_handle->uname = strdup(handle->uname); + tracecmd_ref(handle); + + new_handle->fd = dup(handle->fd); + + new_handle->flags |= TRACECMD_FL_BUFFER_INSTANCE; + + new_handle->pid_maps = NULL; + if (!HAS_SECTIONS(handle)) { + /* Save where we currently are */ + offset = lseek64(handle->fd, 0, SEEK_CUR); + + ret = lseek64(handle->fd, buffer->offset, SEEK_SET); + if (ret == (off64_t)-1) { + tracecmd_warning("could not seek to buffer %s offset %ld", + buffer->name, buffer->offset); + goto error; + } + /* + * read_options_type() is called right after the CPU count so update + * file state accordingly. + */ + new_handle->file_state = TRACECMD_FILE_CPU_COUNT; + ret = read_options_type(new_handle); + if (!ret) + ret = read_cpu_data(new_handle); + + if (ret < 0) { + tracecmd_warning("failed to read sub buffer %s", buffer->name); + goto error; + } + ret = lseek64(handle->fd, offset, SEEK_SET); + if (ret < 0) { + tracecmd_warning("could not seek to back to offset %ld", offset); + goto error; + } + } else { + new_handle->page_size = handle->buffers[indx].page_size; + if (init_buffer_cpu_data(new_handle, buffer) < 0) + goto error; + } + + return new_handle; + +error: + tracecmd_close(new_handle); + return NULL; +} + +int tracecmd_is_buffer_instance(struct tracecmd_input *handle) +{ + return handle->flags & TRACECMD_FL_BUFFER_INSTANCE; +} + +/** + * tracecmd_long_size - return the size of "long" for the arch + * @handle: input handle for the trace.dat file + */ +int tracecmd_long_size(struct tracecmd_input *handle) +{ + return handle->long_size; +} + +/** + * tracecmd_page_size - return the PAGE_SIZE for the arch + * @handle: input handle for the trace.dat file + */ +int tracecmd_page_size(struct tracecmd_input *handle) +{ + return handle->page_size; +} + +/** + * tracecmd_page_size - return the number of CPUs recorded + * @handle: input handle for the trace.dat file + */ +int tracecmd_cpus(struct tracecmd_input *handle) +{ + return handle->max_cpu; +} + +/** + * tracecmd_get_tep - return the tep handle + * @handle: input handle for the trace.dat file + */ +struct tep_handle *tracecmd_get_tep(struct tracecmd_input *handle) +{ + return handle->pevent; +} + +/** + * tracecmd_get_in_file_version - return the trace.dat file version + * @handle: input handle for the trace.dat file + */ +unsigned long tracecmd_get_in_file_version(struct tracecmd_input *handle) +{ + return handle->file_version; +} + +/** + * tracecmd_get_file_compress_proto - get name and version of compression algorithm + * @handle: input handle for the trace.dat file + * @name: return, name of the compression algorithm. + * @version: return, version of the compression algorithm. + * + * Get the name and the version of the compression algorithm, used to + * compress the file associated with @handle. + * Returns 0 on success, or -1 in case of an error. If 0 is returned, + * the name and version of the algorithm are stored in @name and @version. + * The returned strings must *not* be freed. + */ +int tracecmd_get_file_compress_proto(struct tracecmd_input *handle, + const char **name, const char **version) +{ + return tracecmd_compress_proto_get_name(handle->compress, name, version); +} + +/** + * tracecmd_get_use_trace_clock - return use_trace_clock + * @handle: input handle for the trace.dat file + */ +bool tracecmd_get_use_trace_clock(struct tracecmd_input *handle) +{ + return handle->use_trace_clock; +} + +/** + * tracecmd_get_options_offset - get offset of the options sections in the file + * @handle: input handle for the trace.dat file + */ +size_t tracecmd_get_options_offset(struct tracecmd_input *handle) +{ + return handle->options_start; +} + +/** + * tracecmd_get_trace_clock - return the saved trace clock + * @handle: input handle for the trace.dat file + * + * Returns a string of the clock that was saved in the trace.dat file. + * The string should not be freed, as it points to the internal + * structure data. + */ +const char *tracecmd_get_trace_clock(struct tracecmd_input *handle) +{ + return handle->trace_clock; +} + +/** + * tracecmd_get_cpustats - return the saved cpu stats + * @handle: input handle for the trace.dat file + * + * Provides a method to extract the cpu stats saved in @handle. + * + * Returns a string of the cpu stats that was saved in the trace.dat file. + * The string should not be freed, as it points to the internal + * structure data. + */ +const char *tracecmd_get_cpustats(struct tracecmd_input *handle) +{ + return handle->cpustats; +} + +/** + * tracecmd_get_uname - return the saved name and kernel information + * @handle: input handle for the trace.dat file + * + * Provides a method to extract the system information saved in @handle. + * + * Returns a string of the system information that was saved in the + * trace.dat file. + * The string should not be freed, as it points to the internal + * structure data. + */ +const char *tracecmd_get_uname(struct tracecmd_input *handle) +{ + return handle->uname; +} + +/** + * tracecmd_get_version - return the saved version information + * @handle: input handle for the trace.dat file + * + * Provides a method to extract the version string saved in @handle. + * + * Returns a string of the version that was saved in the trace.dat file. + * The string should not be freed, as it points to the internal + * structure data. + */ +const char *tracecmd_get_version(struct tracecmd_input *handle) +{ + return handle->version; +} + +/** + * tracecmd_get_cpu_file_size - return the saved cpu file size + * @handle: input handle for the trace.dat file + * @cpu: cpu index + * + * Provides a method to extract the cpu file size saved in @handle. + * + * Returns the cpu file size saved in trace.dat file or (off64_t)-1 for + * invalid cpu index. + */ +off64_t tracecmd_get_cpu_file_size(struct tracecmd_input *handle, int cpu) +{ + if (cpu < 0 || cpu >= handle->cpus) + return (off64_t)-1; + return handle->cpu_data[cpu].file_size; +} + +/** + * tracecmd_get_show_data_func - return the show data func + * @handle: input handle for the trace.dat file + */ +tracecmd_show_data_func +tracecmd_get_show_data_func(struct tracecmd_input *handle) +{ + return handle->show_data_func; +} + +/** + * tracecmd_set_show_data_func - set the show data func + * @handle: input handle for the trace.dat file + */ +void tracecmd_set_show_data_func(struct tracecmd_input *handle, + tracecmd_show_data_func func) +{ + handle->show_data_func = func; +} + +/** + * tracecmd_get_traceid - get the trace id of the session + * @handle: input handle for the trace.dat file + * + * Returns the trace id, written in the trace file + */ +unsigned long long tracecmd_get_traceid(struct tracecmd_input *handle) +{ + return handle->trace_id; +} + +/** + * tracecmd_get_first_ts - get the timestamp of the first recorded event + * @handle: input handle for the trace.dat file + * + * Returns the timestamp of the first recorded event + */ +unsigned long long tracecmd_get_first_ts(struct tracecmd_input *handle) +{ + unsigned long long ts = 0; + bool first = true; + int i; + + for (i = 0; i < handle->cpus; i++) { + /* Ignore empty buffers */ + if (!handle->cpu_data[i].size) + continue; + if (first || ts > handle->cpu_data[i].first_ts) + ts = handle->cpu_data[i].first_ts; + first = false; + } + + return ts; +} + +/** + * tracecmd_get_guest_cpumap - get the mapping of guest VCPU to host process + * @handle: input handle for the trace.dat file + * @trace_id: ID of the guest tracing session + * @name: return, name of the guest + * @vcpu_count: return, number of VPUs + * @cpu_pid: return, array with guest VCPU to host process mapping + * + * Returns @name of the guest, number of VPUs (@vcpu_count) + * and array @cpu_pid with size @vcpu_count. Array index is VCPU id, array + * content is PID of the host process, running this VCPU. + * + * This information is stored in host trace.dat file + */ +int tracecmd_get_guest_cpumap(struct tracecmd_input *handle, + unsigned long long trace_id, + const char **name, + int *vcpu_count, const int **cpu_pid) +{ + struct guest_trace_info *guest = handle->guest; + + while (guest) { + if (guest->trace_id == trace_id) + break; + guest = guest->next; + } + if (!guest) + return -1; + + if (name) + *name = guest->name; + if (vcpu_count) + *vcpu_count = guest->vcpu_count; + if (cpu_pid) + *cpu_pid = guest->cpu_pid; + return 0; +} + +/** + * tracecmd_enable_tsync - enable / disable the timestamps correction + * @handle: input handle for the trace.dat file + * @enable: enable / disable the timestamps correction + * + * Enables or disables timestamps correction on file load, using the array of + * recorded time offsets. If "enable" is true, but there are no time offsets, + * function fails and -1 is returned. + * + * Returns -1 in case of an error, or 0 otherwise + */ +int tracecmd_enable_tsync(struct tracecmd_input *handle, bool enable) +{ + if (enable && + (!handle->host.ts_offsets || !handle->host.cpu_count)) + return -1; + + handle->host.sync_enable = enable; + + return 0; +} + diff --git a/lib/trace-cmd/trace-msg.c b/lib/trace-cmd/trace-msg.c new file mode 100644 index 00000000..39465ade --- /dev/null +++ b/lib/trace-cmd/trace-msg.c @@ -0,0 +1,1404 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * trace-msg.c : define message protocol for communication between clients and + * a server + * + * Copyright (C) 2013 Hitachi, Ltd. + * Created by Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com> + * + */ + +#include <errno.h> +#include <poll.h> +#include <fcntl.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/types.h> +#include <linux/types.h> + +#include "trace-write-local.h" +#include "trace-cmd-local.h" +#include "trace-local.h" +#include "trace-msg.h" +#include "trace-cmd.h" + +typedef __u32 u32; +typedef __be32 be32; + +#define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__) + +/* Two (4k) pages is the max transfer for now */ +#define MSG_MAX_LEN 8192 + +#define MSG_HDR_LEN sizeof(struct tracecmd_msg_header) + +#define MSG_MAX_DATA_LEN (MSG_MAX_LEN - MSG_HDR_LEN) + +unsigned int page_size; + +struct tracecmd_msg_tinit { + be32 cpus; + be32 page_size; + be32 opt_num; +} __packed; + +struct tracecmd_msg_rinit { + be32 cpus; +} __packed; + +#define TRACE_REQ_PARAM_SIZE (2 * sizeof(int)) +enum trace_req_params { + TRACE_REQUEST_ARGS, + TRACE_REQUEST_TSYNC_PROTOS, +}; + +struct tracecmd_msg_trace_req_param { + int id; + int length; + char *value; +}; + +struct tracecmd_msg_trace_req { + be32 flags; + be32 argc; + u64 trace_id; +} __packed; + +struct tracecmd_msg_trace_resp { + be32 flags; + be32 cpus; + be32 page_size; + u64 trace_id; + char tsync_proto_name[TRACECMD_TSYNC_PNAME_LENGTH]; + be32 tsync_port; +} __packed; + +struct tracecmd_msg_tsync { + char sync_protocol_name[TRACECMD_TSYNC_PNAME_LENGTH]; + be32 sync_msg_id; +} __packed; + +struct tracecmd_msg_header { + be32 size; + be32 cmd; + be32 cmd_size; +} __packed; + +#define MSG_MAP \ + C(CLOSE, 0, 0), \ + C(TINIT, 1, sizeof(struct tracecmd_msg_tinit)), \ + C(RINIT, 2, sizeof(struct tracecmd_msg_rinit)), \ + C(SEND_DATA, 3, 0), \ + C(FIN_DATA, 4, 0), \ + C(NOT_SUPP, 5, 0), \ + C(TRACE_REQ, 6, sizeof(struct tracecmd_msg_trace_req)), \ + C(TRACE_RESP, 7, sizeof(struct tracecmd_msg_trace_resp)),\ + C(CLOSE_RESP, 8, 0), \ + C(TIME_SYNC, 9, sizeof(struct tracecmd_msg_tsync)), + +#undef C +#define C(a,b,c) MSG_##a = b + +enum tracecmd_msg_cmd { + MSG_MAP + MSG_NR_COMMANDS +}; + +#undef C +#define C(a,b,c) c + +static be32 msg_cmd_sizes[] = { MSG_MAP }; + +#undef C +#define C(a,b,c) #a + +static const char *msg_names[] = { MSG_MAP }; + +static const char *cmd_to_name(int cmd) +{ + if (cmd < 0 || cmd >= MSG_NR_COMMANDS) + return "Unknown"; + return msg_names[cmd]; +} + +struct tracecmd_msg { + struct tracecmd_msg_header hdr; + union { + struct tracecmd_msg_tinit tinit; + struct tracecmd_msg_rinit rinit; + struct tracecmd_msg_trace_req trace_req; + struct tracecmd_msg_trace_resp trace_resp; + struct tracecmd_msg_tsync tsync; + }; + char *buf; +} __packed; + +static inline int msg_buf_len(struct tracecmd_msg *msg) +{ + return ntohl(msg->hdr.size) - MSG_HDR_LEN - ntohl(msg->hdr.cmd_size); +} + +static int __msg_write(int fd, struct tracecmd_msg *msg, bool network) +{ + int msg_size, data_size; + int ret; + int cmd; + + if (network) { + cmd = ntohl(msg->hdr.cmd); + if (cmd < 0 || cmd >= MSG_NR_COMMANDS) + return -EINVAL; + dprint("msg send: %d (%s) [%d]\n", + cmd, cmd_to_name(cmd), ntohl(msg->hdr.size)); + } + msg_size = MSG_HDR_LEN + ntohl(msg->hdr.cmd_size); + data_size = ntohl(msg->hdr.size) - msg_size; + if (data_size < 0) + return -EINVAL; + + if (network) { + ret = __do_write_check(fd, msg, msg_size); + if (ret < 0) + return ret; + } + if (!data_size) + return 0; + + return __do_write_check(fd, msg->buf, data_size); +} + +__hidden off64_t msg_lseek(struct tracecmd_msg_handle *msg_handle, off64_t offset, int whence) +{ + /* + * lseek works only if the handle is in cache mode, + * cannot seek on a network socket + */ + if (!msg_handle->cache || msg_handle->cfd < 0) + return (off64_t)-1; + return lseek64(msg_handle->cfd, offset, whence); +} + +static int msg_write(struct tracecmd_msg_handle *msg_handle, struct tracecmd_msg *msg) +{ + if (msg_handle->cache && msg_handle->cfd >= 0) + return __msg_write(msg_handle->cfd, msg, false); + + + return __msg_write(msg_handle->fd, msg, true); +} + +enum msg_trace_flags { + MSG_TRACE_USE_FIFOS = 1 << 0, +}; + +static int make_tinit(struct tracecmd_msg_handle *msg_handle, + struct tracecmd_msg *msg) +{ + int cpu_count = msg_handle->cpu_count; + int opt_num = 0; + int data_size = 0; + + if (msg_handle->flags & (TRACECMD_MSG_FL_USE_TCP | + TRACECMD_MSG_FL_USE_VSOCK)) { + msg->buf = msg_handle->flags & TRACECMD_MSG_FL_USE_TCP ? + strdup("tcp") : strdup("vsock"); + if (!msg->buf) + return -1; + opt_num++; + data_size += strlen(msg->buf) + 1; + } + + msg->tinit.cpus = htonl(cpu_count); + msg->tinit.page_size = htonl(page_size); + msg->tinit.opt_num = htonl(opt_num); + + msg->hdr.size = htonl(ntohl(msg->hdr.size) + data_size); + + return 0; +} + +/* test a to u */ +static int tatou(const char *s, unsigned int *res) +{ + long r; + + r = atol(s); + if (r >= 0 && r <= UINT_MAX) { + *res = (unsigned int)r; + return 0; + } + return -1; +} + +static int write_uints(char *buf, size_t buf_len, + unsigned int *arr, int arr_len) +{ + int i, ret, tot = 0; + + for (i = 0; i < arr_len; i++) { + ret = snprintf(buf, buf_len, "%u", arr[i]); + if (ret < 0) + return ret; + + /* Count the '\0' byte */ + ret++; + tot += ret; + if (buf) + buf += ret; + if (buf_len >= ret) + buf_len -= ret; + else + buf_len = 0; + } + + return tot; +} + +static int make_rinit(struct tracecmd_msg *msg, int cpus, unsigned int *ports) +{ + int data_size; + + data_size = write_uints(NULL, 0, ports, cpus); + msg->buf = malloc(data_size); + if (!msg->buf) + return -ENOMEM; + write_uints(msg->buf, data_size, ports, cpus); + + msg->rinit.cpus = htonl(cpus); + msg->hdr.size = htonl(ntohl(msg->hdr.size) + data_size); + + return 0; +} + +static void tracecmd_msg_init(u32 cmd, struct tracecmd_msg *msg) +{ + memset(msg, 0, sizeof(*msg)); + msg->hdr.size = htonl(MSG_HDR_LEN + msg_cmd_sizes[cmd]); + msg->hdr.cmd = htonl(cmd); + msg->hdr.cmd_size = htonl(msg_cmd_sizes[cmd]); +} + +static void msg_free(struct tracecmd_msg *msg) +{ + free(msg->buf); + memset(msg, 0, sizeof(*msg)); +} + +static int tracecmd_msg_send(struct tracecmd_msg_handle *msg_handle, struct tracecmd_msg *msg) +{ + int ret = 0; + + ret = msg_write(msg_handle, msg); + if (ret < 0) + ret = -ECOMM; + + msg_free(msg); + + return ret; +} + +static int msg_send_nofree(struct tracecmd_msg_handle *msg_handle, struct tracecmd_msg *msg) +{ + int ret = 0; + + ret = msg_write(msg_handle, msg); + if (ret < 0) + ret = -ECOMM; + + return ret; +} + +static int msg_read(int fd, void *buf, u32 size, int *n) +{ + ssize_t r; + + while (size) { + r = read(fd, buf + *n, size); + if (r < 0) { + if (errno == EINTR) + continue; + return -errno; + } else if (!r) + return -ENOTCONN; + size -= r; + *n += r; + } + + return 0; +} + +static char scratch_buf[MSG_MAX_LEN]; + +static int msg_read_extra(int fd, struct tracecmd_msg *msg, + int *n, int size) +{ + int cmd, cmd_size, rsize; + int ret; + + cmd = ntohl(msg->hdr.cmd); + if (cmd < 0 || cmd >= MSG_NR_COMMANDS) + return -EINVAL; + + cmd_size = ntohl(msg->hdr.cmd_size); + if (cmd_size < 0) + return -EINVAL; + + if (cmd_size > 0) { + rsize = cmd_size; + if (rsize > msg_cmd_sizes[cmd]) + rsize = msg_cmd_sizes[cmd]; + + ret = msg_read(fd, msg, rsize, n); + if (ret < 0) + return ret; + + ret = msg_read(fd, scratch_buf, cmd_size - rsize, n); + if (ret < 0) + return ret; + } + + if (size > *n) { + size -= *n; + msg->buf = malloc(size); + if (!msg->buf) + return -ENOMEM; + + *n = 0; + return msg_read(fd, msg->buf, size, n); + } + + return 0; +} + +/* + * Read header information of msg first, then read all data + */ +static int tracecmd_msg_recv(int fd, struct tracecmd_msg *msg) +{ + u32 size = 0; + int n = 0; + int ret; + + ret = msg_read(fd, msg, MSG_HDR_LEN, &n); + if (ret < 0) + return ret; + + dprint("msg received: %d (%s) [%d]\n", + ntohl(msg->hdr.cmd), cmd_to_name(ntohl(msg->hdr.cmd)), + ntohl(msg->hdr.size)); + + size = ntohl(msg->hdr.size); + if (size > MSG_MAX_LEN) + /* too big */ + goto error; + else if (size < MSG_HDR_LEN) + /* too small */ + goto error; + else if (size > MSG_HDR_LEN) + return msg_read_extra(fd, msg, &n, size); + + return 0; +error: + tracecmd_plog("Receive an invalid message(size=%d)\n", size); + return -ENOMSG; +} + +#define MSG_WAIT_MSEC 5000 +static int msg_wait_to = MSG_WAIT_MSEC; + +bool tracecmd_msg_done(struct tracecmd_msg_handle *msg_handle) +{ + return (volatile int)msg_handle->done; +} + +void tracecmd_msg_set_done(struct tracecmd_msg_handle *msg_handle) +{ + msg_handle->done = true; +} + +static void error_operation(struct tracecmd_msg *msg) +{ + tracecmd_warning("Message: cmd=%d size=%d", ntohl(msg->hdr.cmd), ntohl(msg->hdr.size)); +} + +/* + * A return value of 0 indicates time-out + */ +static int tracecmd_msg_recv_wait(int fd, struct tracecmd_msg *msg) +{ + struct pollfd pfd; + int ret; + + pfd.fd = fd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, tracecmd_get_debug() ? -1 : msg_wait_to); + if (ret < 0) + return -errno; + else if (ret == 0) + return -ETIMEDOUT; + + return tracecmd_msg_recv(fd, msg); +} + +static int tracecmd_msg_wait_for_msg(int fd, struct tracecmd_msg *msg) +{ + u32 cmd; + int ret; + + ret = tracecmd_msg_recv_wait(fd, msg); + if (ret < 0) { + if (ret == -ETIMEDOUT) + tracecmd_warning("Connection timed out"); + return ret; + } + + cmd = ntohl(msg->hdr.cmd); + if (cmd == MSG_CLOSE) + return -ECONNABORTED; + + return 0; +} + +static int tracecmd_msg_send_notsupp(struct tracecmd_msg_handle *msg_handle) +{ + struct tracecmd_msg msg; + + tracecmd_msg_init(MSG_NOT_SUPP, &msg); + return tracecmd_msg_send(msg_handle, &msg); +} + +static int handle_unexpected_msg(struct tracecmd_msg_handle *msg_handle, + struct tracecmd_msg *msg) +{ + /* Don't send MSG_NOT_SUPP back if we just received one */ + if (ntohl(msg->hdr.cmd) == MSG_NOT_SUPP) + return 0; + + return tracecmd_msg_send_notsupp(msg_handle); + +} + +int tracecmd_msg_send_init_data(struct tracecmd_msg_handle *msg_handle, + unsigned int **client_ports) +{ + struct tracecmd_msg msg; + unsigned int *ports; + int i, cpus, ret; + char *p, *buf_end; + ssize_t buf_len; + + *client_ports = NULL; + + tracecmd_msg_init(MSG_TINIT, &msg); + ret = make_tinit(msg_handle, &msg); + if (ret < 0) + goto out; + + ret = tracecmd_msg_send(msg_handle, &msg); + if (ret < 0) + goto out; + + msg_free(&msg); + + ret = tracecmd_msg_wait_for_msg(msg_handle->fd, &msg); + if (ret < 0) + goto out; + + if (ntohl(msg.hdr.cmd) != MSG_RINIT) { + ret = -EOPNOTSUPP; + goto error; + } + + buf_len = msg_buf_len(&msg); + if (buf_len <= 0) { + ret = -EINVAL; + goto error; + } + + if (msg.buf[buf_len-1] != '\0') { + ret = -EINVAL; + goto error; + } + + cpus = ntohl(msg.rinit.cpus); + ports = malloc(sizeof(*ports) * cpus); + if (!ports) { + ret = -ENOMEM; + goto out; + } + + buf_end = msg.buf + buf_len; + for (i = 0, p = msg.buf; i < cpus; i++, p++) { + if (p >= buf_end || tatou(p, &ports[i])) { + free(ports); + ret = -EINVAL; + goto error; + } + p = strchr(p, '\0'); + } + + *client_ports = ports; + + msg_free(&msg); + return 0; + +error: + error_operation(&msg); + if (ret == -EOPNOTSUPP) + handle_unexpected_msg(msg_handle, &msg); +out: + msg_free(&msg); + return ret; +} + +static bool process_option(struct tracecmd_msg_handle *msg_handle, + const char *opt) +{ + if (strcmp(opt, "tcp") == 0) { + msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP; + return true; + } + if (strcmp(opt, "vsock") == 0) { + msg_handle->flags |= TRACECMD_MSG_FL_USE_VSOCK; + return true; + } + return false; +} + +struct tracecmd_msg_handle * +tracecmd_msg_handle_alloc(int fd, unsigned long flags) +{ + struct tracecmd_msg_handle *handle; + + handle = calloc(1, sizeof(struct tracecmd_msg_handle)); + if (!handle) + return NULL; + + handle->fd = fd; + handle->flags = flags; + handle->cfd = -1; + handle->cache = false; + return handle; +} + +int tracecmd_msg_handle_cache(struct tracecmd_msg_handle *msg_handle) +{ + if (msg_handle->cfd < 0) { + strcpy(msg_handle->cfile, MSG_CACHE_FILE); + msg_handle->cfd = mkstemp(msg_handle->cfile); + if (msg_handle->cfd < 0) + return -1; + unlink(msg_handle->cfile); + } + msg_handle->cache = true; + return 0; +} + +static int flush_cache(struct tracecmd_msg_handle *msg_handle) +{ + char buf[MSG_MAX_DATA_LEN]; + int ret; + + if (!msg_handle->cache || msg_handle->cfd < 0) + return 0; + msg_handle->cache = false; + if (lseek64(msg_handle->cfd, 0, SEEK_SET) == (off64_t)-1) + return -1; + do { + ret = read(msg_handle->cfd, buf, MSG_MAX_DATA_LEN); + if (ret <= 0) + break; + ret = tracecmd_msg_data_send(msg_handle, buf, ret); + if (ret < 0) + break; + } while (ret >= 0); + + close(msg_handle->cfd); + msg_handle->cfd = -1; + return ret; +} + +void tracecmd_msg_handle_close(struct tracecmd_msg_handle *msg_handle) +{ + if (msg_handle->fd >= 0) + close(msg_handle->fd); + if (msg_handle->cfd >= 0) + close(msg_handle->cfd); + free(msg_handle); +} + +#define MAX_OPTION_SIZE 4096 + +int tracecmd_msg_initial_setting(struct tracecmd_msg_handle *msg_handle) +{ + struct tracecmd_msg msg; + char *p, *buf_end; + ssize_t buf_len; + int pagesize; + int options, i; + int cpus; + int ret; + + memset(&msg, 0, sizeof(msg)); + ret = tracecmd_msg_recv_wait(msg_handle->fd, &msg); + if (ret < 0) { + if (ret == -ETIMEDOUT) + tracecmd_warning("Connection timed out"); + return ret; + } + + if (ntohl(msg.hdr.cmd) != MSG_TINIT) { + ret = -EOPNOTSUPP; + goto error; + } + + cpus = ntohl(msg.tinit.cpus); + tracecmd_plog("cpus=%d\n", cpus); + if (cpus < 0) { + ret = -EINVAL; + goto error; + } + + msg_handle->cpu_count = cpus; + + pagesize = ntohl(msg.tinit.page_size); + tracecmd_plog("pagesize=%d\n", pagesize); + if (pagesize <= 0) { + ret = -EINVAL; + goto error; + } + + buf_len = msg_buf_len(&msg); + if (buf_len < 0) { + ret = -EINVAL; + goto error; + } + + if (buf_len == 0) + goto no_options; + + if (msg.buf[buf_len-1] != '\0') { + ret = -EINVAL; + goto error; + } + + buf_end = msg.buf + buf_len; + options = ntohl(msg.tinit.opt_num); + for (i = 0, p = msg.buf; i < options; i++, p++) { + if (p >= buf_end) { + ret = -EINVAL; + goto error; + } + + /* do we understand this option? */ + if (!process_option(msg_handle, p)) + tracecmd_plog("Cannot understand option '%s'\n", p); + + p = strchr(p, '\0'); + } + +no_options: + msg_free(&msg); + return pagesize; + +error: + error_operation(&msg); + if (ret == -EOPNOTSUPP) + handle_unexpected_msg(msg_handle, &msg); + msg_free(&msg); + return ret; +} + +int tracecmd_msg_send_port_array(struct tracecmd_msg_handle *msg_handle, + unsigned int *ports) +{ + struct tracecmd_msg msg; + int ret; + + tracecmd_msg_init(MSG_RINIT, &msg); + ret = make_rinit(&msg, msg_handle->cpu_count, ports); + if (ret < 0) + return ret; + + ret = tracecmd_msg_send(msg_handle, &msg); + if (ret < 0) + return ret; + + return 0; +} + +int tracecmd_msg_send_close_msg(struct tracecmd_msg_handle *msg_handle) +{ + struct tracecmd_msg msg; + + tracecmd_msg_init(MSG_CLOSE, &msg); + return tracecmd_msg_send(msg_handle, &msg); +} + +int tracecmd_msg_send_close_resp_msg(struct tracecmd_msg_handle *msg_handle) +{ + struct tracecmd_msg msg; + + tracecmd_msg_init(MSG_CLOSE_RESP, &msg); + return tracecmd_msg_send(msg_handle, &msg); +} + +int tracecmd_msg_data_send(struct tracecmd_msg_handle *msg_handle, + const char *buf, int size) +{ + struct tracecmd_msg msg; + int n; + int ret; + int count = 0; + + /* Don't bother doing anything if there's nothing to do */ + if (!size) + return 0; + + tracecmd_msg_init(MSG_SEND_DATA, &msg); + + msg.buf = malloc(MSG_MAX_DATA_LEN); + if (!msg.buf) + return -ENOMEM; + + msg.hdr.size = htonl(MSG_MAX_LEN); + + n = size; + while (n) { + if (n > MSG_MAX_DATA_LEN) { + memcpy(msg.buf, buf + count, MSG_MAX_DATA_LEN); + n -= MSG_MAX_DATA_LEN; + count += MSG_MAX_DATA_LEN; + } else { + msg.hdr.size = htonl(MSG_HDR_LEN + n); + memcpy(msg.buf, buf + count, n); + n = 0; + } + ret = msg_write(msg_handle, &msg); + if (ret < 0) + break; + } + + msg_free(&msg); + return ret; +} + +int tracecmd_msg_finish_sending_data(struct tracecmd_msg_handle *msg_handle) +{ + struct tracecmd_msg msg; + int ret; + + flush_cache(msg_handle); + tracecmd_msg_init(MSG_FIN_DATA, &msg); + ret = tracecmd_msg_send(msg_handle, &msg); + if (ret < 0) + return ret; + return 0; +} + +int tracecmd_msg_read_data(struct tracecmd_msg_handle *msg_handle, int ofd) +{ + struct tracecmd_msg msg; + int t, n, cmd; + ssize_t s; + int ret; + + while (!tracecmd_msg_done(msg_handle)) { + ret = tracecmd_msg_recv_wait(msg_handle->fd, &msg); + if (ret < 0) { + tracecmd_warning("reading client %d (%s)", ret, strerror(ret)); + return ret; + } + + cmd = ntohl(msg.hdr.cmd); + if (cmd == MSG_FIN_DATA) { + /* Finish receiving data */ + break; + } else if (cmd != MSG_SEND_DATA) { + ret = handle_unexpected_msg(msg_handle, &msg); + if (ret < 0) + goto error; + goto next; + } + + n = msg_buf_len(&msg); + t = n; + s = 0; + while (t > 0) { + s = write(ofd, msg.buf+s, t); + if (s < 0) { + if (errno == EINTR) + continue; + tracecmd_warning("writing to file"); + ret = -errno; + goto error; + } + t -= s; + s = n - t; + } + +next: + msg_free(&msg); + } + + return 0; + +error: + error_operation(&msg); + msg_free(&msg); + return ret; +} + +int tracecmd_msg_collect_data(struct tracecmd_msg_handle *msg_handle, int ofd) +{ + int ret; + + ret = tracecmd_msg_read_data(msg_handle, ofd); + if (ret) + return ret; + + return tracecmd_msg_wait_close(msg_handle); +} + +static int tracecmd_msg_wait_for_cmd(struct tracecmd_msg_handle *msg_handle, enum tracecmd_msg_cmd cmd) +{ + struct tracecmd_msg msg; + int ret = -1; + + memset(&msg, 0, sizeof(msg)); + while (!tracecmd_msg_done(msg_handle)) { + ret = tracecmd_msg_recv(msg_handle->fd, &msg); + if (ret < 0) + goto error; + + if (ntohl(msg.hdr.cmd) == cmd) + return 0; + + error_operation(&msg); + ret = handle_unexpected_msg(msg_handle, &msg); + if (ret < 0) + goto error; + + msg_free(&msg); + } + +error: + msg_free(&msg); + return ret; +} + +int tracecmd_msg_wait_close(struct tracecmd_msg_handle *msg_handle) +{ + return tracecmd_msg_wait_for_cmd(msg_handle, MSG_CLOSE); +} + +int tracecmd_msg_wait_close_resp(struct tracecmd_msg_handle *msg_handle) +{ + return tracecmd_msg_wait_for_cmd(msg_handle, MSG_CLOSE_RESP); +} + +static int make_trace_req_protos(char **buf, int *size, + struct tracecmd_tsync_protos *protos) +{ + int protos_size = 1; + size_t buf_size; + char **names; + char *nbuf; + char *p; + + names = protos->names; + while (*names) { + protos_size += strlen(*names) + 1; + names++; + } + + buf_size = TRACE_REQ_PARAM_SIZE + protos_size; + nbuf = realloc(*buf, *size + buf_size); + if (!nbuf) + return -1; + + p = nbuf + *size; + memset(p, 0, buf_size); + + *(unsigned int *)p = htonl(TRACE_REQUEST_TSYNC_PROTOS); + p += sizeof(int); + *(unsigned int *)p = htonl(protos_size); + p += sizeof(int); + + names = protos->names; + while (*names) { + strcpy(p, *names); + p += strlen(*names) + 1; + names++; + } + p = NULL; + + *size += buf_size; + *buf = nbuf; + return 0; +} + +static int make_trace_req_args(char **buf, int *size, int argc, char **argv) +{ + size_t args_size; + size_t buf_size; + char *nbuf; + char *p; + int i; + + args_size = sizeof(int); + for (i = 0; i < argc; i++) + args_size += strlen(argv[i]) + 1; + + buf_size = TRACE_REQ_PARAM_SIZE + args_size; + nbuf = realloc(*buf, *size + buf_size); + if (!nbuf) + return -1; + + p = nbuf + *size; + memset(p, 0, buf_size); + + *(unsigned int *)p = htonl(TRACE_REQUEST_ARGS); + p += sizeof(int); + *(unsigned int *)p = htonl(args_size); + p += sizeof(int); + + *(unsigned int *)p = htonl(argc); + p += sizeof(int); + for (i = 0; i < argc; i++) + p = stpcpy(p, argv[i]) + 1; + + *size += buf_size; + *buf = nbuf; + return 0; +} + +static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, + bool use_fifos, unsigned long long trace_id, + struct tracecmd_tsync_protos *protos) +{ + int size = 0; + char *buf = NULL; + + msg->trace_req.flags = 0; + if (use_fifos) + msg->trace_req.flags |= MSG_TRACE_USE_FIFOS; + msg->trace_req.flags = htonl(msg->trace_req.flags); + msg->trace_req.trace_id = htonll(trace_id); + + if (argc && argv) + make_trace_req_args(&buf, &size, argc, argv); + if (protos && protos->names) + make_trace_req_protos(&buf, &size, protos); + + msg->buf = buf; + msg->hdr.size = htonl(ntohl(msg->hdr.size) + size); + + return 0; +} + +int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle, + int argc, char **argv, bool use_fifos, + unsigned long long trace_id, + struct tracecmd_tsync_protos *protos) +{ + struct tracecmd_msg msg; + int ret; + + tracecmd_msg_init(MSG_TRACE_REQ, &msg); + ret = make_trace_req(&msg, argc, argv, use_fifos, trace_id, protos); + if (ret < 0) + return ret; + + return tracecmd_msg_send(msg_handle, &msg); +} + +static int get_trace_req_protos(char *buf, int length, + struct tracecmd_tsync_protos **protos) +{ + struct tracecmd_tsync_protos *plist = NULL; + int count = 0; + char *p; + int i, j; + + i = length; + p = buf; + while (i > 0) { + i -= strlen(p) + 1; + count++; + p += strlen(p) + 1; + } + + plist = calloc(1, sizeof(struct tracecmd_tsync_protos)); + if (!plist) + goto error; + plist->names = calloc(count + 1, sizeof(char *)); + if (!plist->names) + goto error; + i = length; + p = buf; + j = 0; + while (i > 0 && j < (count - 1)) { + i -= strlen(p) + 1; + plist->names[j++] = strdup(p); + p += strlen(p) + 1; + } + + *protos = plist; + return 0; +error: + if (plist) { + free(plist->names); + free(plist); + } + return -1; +} + +static int get_trace_req_args(char *buf, int length, int *argc, char ***argv) +{ + unsigned int nr_args; + char *p, *buf_end; + char **args = NULL; + char *vagrs = NULL; + int ret; + int i; + + if (length <= sizeof(int) || buf[length - 1] != '\0') { + ret = -EINVAL; + goto out; + } + + nr_args = ntohl(*(unsigned int *)buf); + buf += sizeof(int); + length -= sizeof(int); + + args = calloc(nr_args, sizeof(*args)); + if (!args) { + ret = -ENOMEM; + goto out; + } + + vagrs = calloc(length, sizeof(char)); + if (!vagrs) { + ret = -ENOMEM; + goto out; + } + + memcpy(vagrs, buf, length); + buf_end = vagrs + length; + for (i = 0, p = vagrs; i < nr_args; i++, p++) { + if (p >= buf_end) { + ret = -EINVAL; + goto out; + } + args[i] = p; + p = strchr(p, '\0'); + } + + *argc = nr_args; + *argv = args; + return 0; + +out: + free(args); + free(vagrs); + return ret; + +} + +/* + * NOTE: On success, the returned `argv` should be freed with: + * free(argv[0]); + * free(argv); + * and `tsync_protos` with free(tsync_protos); + */ +int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle, + int *argc, char ***argv, bool *use_fifos, + unsigned long long *trace_id, + struct tracecmd_tsync_protos **protos) +{ + struct tracecmd_msg msg; + unsigned int param_id; + int param_length; + ssize_t buf_len; + char *p; + int ret; + + ret = tracecmd_msg_recv(msg_handle->fd, &msg); + if (ret < 0) + return ret; + + if (ntohl(msg.hdr.cmd) != MSG_TRACE_REQ) { + ret = -ENOTSUP; + goto out; + } + + buf_len = ntohl(msg.hdr.size) - MSG_HDR_LEN - ntohl(msg.hdr.cmd_size); + if (buf_len < 0) { + ret = -EINVAL; + goto out; + } + + *use_fifos = ntohl(msg.trace_req.flags) & MSG_TRACE_USE_FIFOS; + *trace_id = ntohll(msg.trace_req.trace_id); + p = msg.buf; + while (buf_len > 2 * sizeof(int)) { + param_id = ntohl(*((unsigned int *)p)); + p += sizeof(int); + buf_len -= sizeof(int); + param_length = ntohl(*((unsigned int *)p)); + p += sizeof(int); + buf_len -= sizeof(int); + if (buf_len < param_length) + break; + ret = 0; + switch (param_id) { + case TRACE_REQUEST_ARGS: + ret = get_trace_req_args(p, param_length, argc, argv); + break; + case TRACE_REQUEST_TSYNC_PROTOS: + ret = get_trace_req_protos(p, param_length, protos); + break; + default: + break; + } + if (ret) + break; + buf_len -= param_length; + p += param_length; + } + + msg_free(&msg); + return 0; + +out: + error_operation(&msg); + if (ret == -EOPNOTSUPP) + handle_unexpected_msg(msg_handle, &msg); + msg_free(&msg); + return ret; +} + +/** + * tracecmd_msg_send_time_sync - Send a time sync packet + * @msg_handle: message handle, holding the communication context + * @sync_protocol: name of the time synch protocol, string up to + * TRACECMD_TSYNC_PNAME_LENGTH characters length. + * @sync_msg_id: id if the time synch message, protocol dependent + * @payload_size: size of the packet payload, 0 in case of no payload + * @payload: pointer to the packet payload, or NULL in case of no payload + * + * Returns 0 if packet is sent successfully, or negative error otherwise. + */ +int tracecmd_msg_send_time_sync(struct tracecmd_msg_handle *msg_handle, + char *sync_protocol, unsigned int sync_msg_id, + unsigned int payload_size, char *payload) +{ + struct tracecmd_msg msg; + + tracecmd_msg_init(MSG_TIME_SYNC, &msg); + strncpy(msg.tsync.sync_protocol_name, sync_protocol, TRACECMD_TSYNC_PNAME_LENGTH); + msg.tsync.sync_msg_id = htonl(sync_msg_id); + msg.hdr.size = htonl(ntohl(msg.hdr.size) + payload_size); + + msg.buf = payload; + return msg_send_nofree(msg_handle, &msg); +} + +/** + * tracecmd_msg_recv_time_sync - Receive a time sync packet + * @msg_handle: message handle, holding the communication context + * @sync_protocol: return the name of the packet's time synch protocol. + * It must point to a prealocated buffer with size + * TRACECMD_TSYNC_PNAME_LENGTH + * @sync_msg_id: return the id of the packet's time synch message + * @payload_size: size of the packet's payload, can be: + * NULL - the payload is not interested and should be ignored + * pointer to int, with value 0 - update with the size of the payload + * allocate memory and cpy the payload + * into it + * pointer to int, with value greater than 0 - expected size of the + * payload, preallocated + * memory is passed to the API + * with that size + *@payload: pointer to the packet payload, can be: + * NULL - the payload is not interested and should be ignored + * pointer to char *, with value NULL - a new memory is allocated and returned + * here, containing the packet's payload + * the @payload_size is updated with the + * size of the allocated memory. It must be + * freed by free() + * pointer to char *, with no-NULL value - A prealocated array is passed, with size + * @payload_size. If payload's size is equal + * or less, it will be copied here. + * + * Returns 0 if packet is received successfully, or negative error otherwise. + */ +int tracecmd_msg_recv_time_sync(struct tracecmd_msg_handle *msg_handle, + char *sync_protocol, + unsigned int *sync_msg_id, + unsigned int *payload_size, char **payload) +{ + struct tracecmd_msg msg; + int ret = -1; + int buf_size; + + memset(&msg, 0, sizeof(msg)); + ret = tracecmd_msg_recv(msg_handle->fd, &msg); + if (ret < 0) + goto out; + + if (ntohl(msg.hdr.cmd) != MSG_TIME_SYNC) { + ret = -EOPNOTSUPP; + goto out; + } + + if (sync_protocol) + strncpy(sync_protocol, msg.tsync.sync_protocol_name, + TRACECMD_TSYNC_PNAME_LENGTH); + if (sync_msg_id) + *sync_msg_id = ntohl(msg.tsync.sync_msg_id); + + buf_size = msg_buf_len(&msg); + if (buf_size < 0) { + ret = -EINVAL; + goto out; + } + + if (buf_size && payload && payload_size) { + if (*payload_size) { + if (*payload_size < buf_size || *payload == NULL) { + ret = -ENOMEM; + goto out; + } + memcpy(*payload, msg.buf, buf_size); + goto out; + } + + *payload = malloc(buf_size); + if (*payload == NULL) { + ret = -ENOMEM; + goto out; + } + *payload_size = buf_size; + memcpy(*payload, msg.buf, buf_size); + } + +out: + msg_free(&msg); + return ret; +} + +static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus, + unsigned int *ports, bool use_fifos, + unsigned long long trace_id, + const char *tsync_proto, + unsigned int tsync_port) +{ + int data_size; + + if (!tsync_proto) + tsync_proto = ""; + + data_size = write_uints(NULL, 0, ports, nr_cpus); + msg->buf = malloc(data_size); + if (!msg->buf) + return -ENOMEM; + write_uints(msg->buf, data_size, ports, nr_cpus); + + msg->hdr.size = htonl(ntohl(msg->hdr.size) + data_size); + msg->trace_resp.flags = use_fifos ? MSG_TRACE_USE_FIFOS : 0; + msg->trace_resp.flags = htonl(msg->trace_resp.flags); + strncpy(msg->trace_resp.tsync_proto_name, tsync_proto, TRACECMD_TSYNC_PNAME_LENGTH); + msg->trace_resp.tsync_port = htonl(tsync_port); + + msg->trace_resp.cpus = htonl(nr_cpus); + msg->trace_resp.page_size = htonl(page_size); + msg->trace_resp.trace_id = htonll(trace_id); + + return 0; +} + +int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle, + int nr_cpus, int page_size, + unsigned int *ports, bool use_fifos, + unsigned long long trace_id, + const char *tsync_proto, unsigned int tsync_port) +{ + struct tracecmd_msg msg; + int ret; + + tracecmd_msg_init(MSG_TRACE_RESP, &msg); + ret = make_trace_resp(&msg, page_size, nr_cpus, ports, + use_fifos, trace_id, tsync_proto, tsync_port); + if (ret < 0) + return ret; + + return tracecmd_msg_send(msg_handle, &msg); +} + +int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle, + int *nr_cpus, int *page_size, + unsigned int **ports, bool *use_fifos, + unsigned long long *trace_id, + char **tsync_proto, + unsigned int *tsync_port) +{ + struct tracecmd_msg msg; + char *p, *buf_end; + ssize_t buf_len; + int i, ret; + + ret = tracecmd_msg_recv(msg_handle->fd, &msg); + if (ret < 0) + return ret; + + if (ntohl(msg.hdr.cmd) != MSG_TRACE_RESP) { + ret = -ENOTSUP; + goto out; + } + + buf_len = msg_buf_len(&msg); + if (buf_len <= 0) { + ret = -EINVAL; + goto out; + } + + *use_fifos = ntohl(msg.trace_resp.flags) & MSG_TRACE_USE_FIFOS; + *nr_cpus = ntohl(msg.trace_resp.cpus); + *page_size = ntohl(msg.trace_resp.page_size); + *trace_id = ntohll(msg.trace_resp.trace_id); + *tsync_proto = strdup(msg.trace_resp.tsync_proto_name); + *tsync_port = ntohl(msg.trace_resp.tsync_port); + *ports = calloc(*nr_cpus, sizeof(**ports)); + if (!*ports) { + ret = -ENOMEM; + goto out; + } + + buf_end = msg.buf + buf_len; + for (i = 0, p = msg.buf; i < *nr_cpus; i++, p++) { + if (p >= buf_end || tatou(p, &(*ports)[i])) { + free(*ports); + ret = -EINVAL; + goto out; + } + p = strchr(p, '\0'); + } + + msg_free(&msg); + return 0; + +out: + error_operation(&msg); + if (ret == -EOPNOTSUPP) + handle_unexpected_msg(msg_handle, &msg); + msg_free(&msg); + return ret; +} diff --git a/lib/trace-cmd/trace-output.c b/lib/trace-cmd/trace-output.c new file mode 100644 index 00000000..ca7132e1 --- /dev/null +++ b/lib/trace-cmd/trace-output.c @@ -0,0 +1,2819 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#define _LARGEFILE64_SOURCE +#include <dirent.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <stdarg.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> +#include <glob.h> + +#include "tracefs.h" +#include "trace-cmd.h" +#include "trace-cmd-local.h" +#include "trace-write-local.h" +#include "list.h" +#include "trace-msg.h" + +/* We can't depend on the host size for size_t, all must be 64 bit */ +typedef unsigned long long tsize_t; +typedef long long stsize_t; + +struct tracecmd_option { + unsigned short id; + int size; + void *data; + tsize_t offset; + struct list_head list; +}; + +struct tracecmd_buffer { + int cpus; + void *name; + tsize_t offset; + struct tracecmd_option *option; + struct list_head list; +}; + +enum { + OUTPUT_FL_SEND_META = (1 << 0), +}; + +struct tracecmd_output { + int fd; + int page_size; + int cpus; + struct tep_handle *pevent; + char *tracing_dir; + char *kallsyms; + int nr_options; + bool quiet; + unsigned long file_state; + unsigned long file_version; + + /* size of meta-data strings, not yet stored in the file */ + unsigned long strings_p; + /* current virtual offset of meta-data string */ + unsigned long strings_offs; + + unsigned long long options_start; + bool big_endian; + bool do_compress; + struct tracecmd_compression *compress; + + struct list_head options; + struct list_head buffers; + struct tracecmd_msg_handle *msg_handle; + char *trace_clock; + + /* meta-data strings, not yet stored in the file */ + char *strings; +}; + +struct list_event { + struct list_event *next; + char *name; + char *file; +}; + +struct list_event_system { + struct list_event_system *next; + struct list_event *events; + char *name; +}; + +#define HAS_SECTIONS(H) ((H)->file_version >= FILE_VERSION_SECTIONS) + +static int write_options(struct tracecmd_output *handle); +static int save_string_section(struct tracecmd_output *handle, bool compress); + +__hidden long long +do_write_check(struct tracecmd_output *handle, const void *data, long long size) +{ + if (handle->do_compress) + return tracecmd_compress_buffer_write(handle->compress, data, size); + + if (handle->msg_handle) + return tracecmd_msg_data_send(handle->msg_handle, data, size); + + return __do_write_check(handle->fd, data, size); +} + +static inline off64_t do_lseek(struct tracecmd_output *handle, off_t offset, int whence) +{ + if (handle->do_compress) + return tracecmd_compress_lseek(handle->compress, offset, whence); + + if (handle->msg_handle) + return msg_lseek(handle->msg_handle, offset, whence); + + return lseek64(handle->fd, offset, whence); +} + +static inline int do_preed(struct tracecmd_output *handle, void *dst, int len, off_t offset) +{ + if (handle->do_compress) + return tracecmd_compress_pread(handle->compress, dst, len, offset); + + return pread(handle->fd, dst, len, offset); +} + +static short convert_endian_2(struct tracecmd_output *handle, short val) +{ + if (!handle->pevent) + return val; + + return tep_read_number(handle->pevent, &val, 2); +} + +static int convert_endian_4(struct tracecmd_output *handle, int val) +{ + if (!handle->pevent) + return val; + + return tep_read_number(handle->pevent, &val, 4); +} + +static unsigned long long convert_endian_8(struct tracecmd_output *handle, + unsigned long long val) +{ + if (!handle->pevent) + return val; + + return tep_read_number(handle->pevent, &val, 8); +} + +__hidden void out_compression_reset(struct tracecmd_output *handle, bool compress) +{ + if (!compress || !handle->compress) + return; + + tracecmd_compress_reset(handle->compress); + handle->do_compress = false; +} + +__hidden int out_uncompress_block(struct tracecmd_output *handle) +{ + int ret = 0; + + if (!handle->compress) + return 0; + + ret = tracecmd_uncompress_block(handle->compress); + if (!ret) + handle->do_compress = true; + + return ret; +} + +__hidden int out_compression_start(struct tracecmd_output *handle, bool compress) +{ + if (!compress || !handle->compress) + return 0; + + tracecmd_compress_reset(handle->compress); + handle->do_compress = true; + + return 0; +} + +__hidden int out_compression_end(struct tracecmd_output *handle, bool compress) +{ + if (!compress || !handle->compress) + return 0; + + handle->do_compress = false; + return tracecmd_compress_block(handle->compress); +} + +static long add_string(struct tracecmd_output *handle, const char *string) +{ + int size = strlen(string) + 1; + int pos = handle->strings_p; + char *strings; + + strings = realloc(handle->strings, pos + size); + if (!strings) + return -1; + handle->strings = strings; + memcpy(handle->strings + pos, string, size); + handle->strings_p += size; + + return handle->strings_offs + pos; +} + +/** + * tracecmd_set_quiet - Set if to print output to the screen + * @quiet: If non zero, print no output to the screen + * + */ +void tracecmd_set_quiet(struct tracecmd_output *handle, bool set_quiet) +{ + if (handle) + handle->quiet = set_quiet; +} + +void tracecmd_set_out_clock(struct tracecmd_output *handle, const char *clock) +{ + if (handle && clock) { + free(handle->trace_clock); + handle->trace_clock = strdup(clock); + } +} + +/** + * tracecmd_get_quiet - Get if to print output to the screen + * Returns non zero, if no output to the screen should be printed + * + */ +bool tracecmd_get_quiet(struct tracecmd_output *handle) +{ + if (handle) + return handle->quiet; + return false; +} + +void tracecmd_output_free(struct tracecmd_output *handle) +{ + struct tracecmd_option *option; + struct tracecmd_buffer *buffer; + + if (!handle) + return; + + if (handle->tracing_dir) + free(handle->tracing_dir); + + if (handle->pevent) + tep_unref(handle->pevent); + + while (!list_empty(&handle->buffers)) { + buffer = container_of(handle->buffers.next, + struct tracecmd_buffer, list); + list_del(&buffer->list); + free(buffer->name); + free(buffer); + } + while (!list_empty(&handle->options)) { + option = container_of(handle->options.next, + struct tracecmd_option, list); + list_del(&option->list); + free(option->data); + free(option); + } + + free(handle->strings); + free(handle->trace_clock); + tracecmd_compress_destroy(handle->compress); + free(handle); +} + +void tracecmd_output_close(struct tracecmd_output *handle) +{ + if (!handle) + return; + + if (HAS_SECTIONS(handle)) { + /* write any unsaved options at the end of trace files with sections */ + write_options(handle); + + /* write strings section */ + save_string_section(handle, true); + } + + if (handle->fd >= 0) { + close(handle->fd); + handle->fd = -1; + } + + tracecmd_output_free(handle); +} +static unsigned long get_size_fd(int fd) +{ + unsigned long long size = 0; + char buf[BUFSIZ]; + int r; + + do { + r = read(fd, buf, BUFSIZ); + if (r > 0) + size += r; + } while (r > 0); + + lseek(fd, 0, SEEK_SET); + + return size; +} + +static unsigned long get_size(const char *file) +{ + unsigned long long size = 0; + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) { + tracecmd_warning("Can't read '%s'", file); + return 0; /* Caller will fail with zero */ + } + size = get_size_fd(fd); + close(fd); + + return size; +} + +static tsize_t copy_file_fd(struct tracecmd_output *handle, int fd, unsigned long long max) +{ + tsize_t rsize = BUFSIZ; + tsize_t size = 0; + char buf[BUFSIZ]; + stsize_t r; + + do { + if (max && rsize > max) + rsize = max; + + r = read(fd, buf, rsize); + if (r > 0) { + size += r; + if (do_write_check(handle, buf, r)) + return 0; + if (max) { + max -= r; + if (!max) + break; + } + } + } while (r > 0); + + return size; +} + +static tsize_t copy_file(struct tracecmd_output *handle, + const char *file) +{ + tsize_t size = 0; + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) { + tracecmd_warning("Can't read '%s'", file); + return 0; + } + size = copy_file_fd(handle, fd, 0); + close(fd); + + return size; +} + +#define PAGES_IN_CHUNK 10 +__hidden unsigned long long out_copy_fd_compress(struct tracecmd_output *handle, + int fd, unsigned long long max, + unsigned long long *write_size, + int page) +{ + unsigned long long rsize = 0; + unsigned long long wsize = 0; + unsigned long long size; + int ret; + + if (handle->compress) { + rsize = max; + ret = tracecmd_compress_copy_from(handle->compress, fd, + PAGES_IN_CHUNK * page, + &rsize, &wsize); + if (ret < 0) + return 0; + + size = rsize; + if (write_size) + *write_size = wsize; + } else { + size = copy_file_fd(handle, fd, max); + if (write_size) + *write_size = size; + } + + return size; +} + +static tsize_t copy_file_compress(struct tracecmd_output *handle, + const char *file, unsigned long long *write_size) +{ + int ret; + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) { + tracecmd_warning("Can't read '%s'", file); + return 0; + } + + ret = out_copy_fd_compress(handle, fd, 0, write_size, getpagesize()); + if (!ret) + tracecmd_warning("Can't compress '%s'", file); + + close(fd); + return ret; +} + +/* + * Finds the path to the debugfs/tracing + * Allocates the string and stores it. + */ +static const char *find_tracing_dir(struct tracecmd_output *handle) +{ + if (!handle->tracing_dir) { + const char *dir = tracefs_tracing_dir(); + + if (dir) + handle->tracing_dir = strdup(dir); + } + return handle->tracing_dir; +} + +static char *get_tracing_file(struct tracecmd_output *handle, const char *name) +{ + const char *tracing; + char *file; + int ret; + + tracing = find_tracing_dir(handle); + if (!tracing) + return NULL; + + ret = asprintf(&file, "%s/%s", tracing, name); + if (ret < 0) + return NULL; + + return file; +} + +static void put_tracing_file(char *file) +{ + free(file); +} + +int tracecmd_ftrace_enable(int set) +{ + struct stat buf; + char *path = "/proc/sys/kernel/ftrace_enabled"; + int fd; + char *val = set ? "1" : "0"; + int ret = 0; + + /* if ftace_enable does not exist, simply ignore it */ + fd = stat(path, &buf); + if (fd < 0) + return ENODEV; + + fd = open(path, O_WRONLY); + if (fd < 0) { + tracecmd_warning("Can't %s ftrace", set ? "enable" : "disable"); + return EIO; + } + + if (write(fd, val, 1) < 0) + ret = -1; + close(fd); + + return ret; +} + +__hidden unsigned long long +out_write_section_header(struct tracecmd_output *handle, unsigned short header_id, + char *description, int flags, bool option) +{ + tsize_t endian8; + tsize_t offset; + long long size; + short endian2; + int endian4; + int desc; + + if (header_id >= TRACECMD_OPTION_MAX) + return -1; + if (!HAS_SECTIONS(handle)) + return 0; + if (!handle->compress) + flags &= ~TRACECMD_SEC_FL_COMPRESS; + offset = do_lseek(handle, 0, SEEK_CUR); + if (option) { + endian8 = convert_endian_8(handle, offset); + if (!tracecmd_add_option(handle, header_id, 8, &endian8)) + return -1; + } + /* Section ID */ + endian2 = convert_endian_2(handle, header_id); + if (do_write_check(handle, &endian2, 2)) + return (off64_t)-1; + + /* Section flags */ + endian2 = convert_endian_2(handle, flags); + if (do_write_check(handle, &endian2, 2)) + return (off64_t)-1; + + /* Section description */ + if (description) + desc = add_string(handle, description); + else + desc = -1; + endian4 = convert_endian_4(handle, desc); + if (do_write_check(handle, &endian4, 4)) + return (off64_t)-1; + + offset = do_lseek(handle, 0, SEEK_CUR); + size = 0; + /* Reserve for section size */ + if (do_write_check(handle, &size, 8)) + return (off64_t)-1; + return offset; +} + +__hidden int out_update_section_header(struct tracecmd_output *handle, tsize_t offset) +{ + tsize_t current; + tsize_t endian8; + tsize_t size; + + if (!HAS_SECTIONS(handle) || offset == 0) + return 0; + + current = do_lseek(handle, 0, SEEK_CUR); + /* The real size is the difference between the saved offset and + * the current offset - 8 bytes, the reserved space for the section size. + */ + size = current - offset; + if (size < 8) + return -1; + size -= 8; + if (do_lseek(handle, offset, SEEK_SET) == (off64_t)-1) + return -1; + + endian8 = convert_endian_8(handle, size); + if (do_write_check(handle, &endian8, 8)) + return -1; + if (do_lseek(handle, current, SEEK_SET) == (off64_t)-1) + return -1; + return 0; +} + +static int save_string_section(struct tracecmd_output *handle, bool compress) +{ + enum tracecmd_section_flags flags = 0; + tsize_t offset; + + if (!handle->strings || !handle->strings_p) + return 0; + + if (!check_out_state(handle, TRACECMD_OPTION_STRINGS)) { + tracecmd_warning("Cannot write strings, unexpected state 0x%X", + handle->file_state); + return -1; + } + + if (compress) + flags |= TRACECMD_SEC_FL_COMPRESS; + offset = out_write_section_header(handle, TRACECMD_OPTION_STRINGS, "strings", flags, false); + if (offset == (off64_t)-1) + return -1; + + out_compression_start(handle, compress); + + if (do_write_check(handle, handle->strings, handle->strings_p)) + goto error; + + if (out_compression_end(handle, compress)) + goto error; + + if (out_update_section_header(handle, offset)) + return -1; + + handle->strings_offs += handle->strings_p; + free(handle->strings); + handle->strings = NULL; + handle->strings_p = 0; + handle->file_state = TRACECMD_OPTION_STRINGS; + return 0; + +error: + out_compression_reset(handle, compress); + return -1; +} + +static int read_header_files(struct tracecmd_output *handle, bool compress) +{ + enum tracecmd_section_flags flags = 0; + tsize_t size, check_size, endian8; + struct stat st; + tsize_t offset; + char *path; + int fd = -1; + int ret; + + if (!check_out_state(handle, TRACECMD_FILE_HEADERS)) { + tracecmd_warning("Cannot read header files, unexpected state 0x%X", + handle->file_state); + return -1; + } + + path = get_tracing_file(handle, "events/header_page"); + if (!path) + return -1; + + if (compress) + flags |= TRACECMD_SEC_FL_COMPRESS; + offset = out_write_section_header(handle, TRACECMD_OPTION_HEADER_INFO, + "headers", flags, true); + if (offset == (off64_t)-1) + return -1; + + out_compression_start(handle, compress); + ret = stat(path, &st); + if (ret < 0) { + /* old style did not show this info, just add zero */ + put_tracing_file(path); + if (do_write_check(handle, "header_page", 12)) + goto out_close; + size = 0; + if (do_write_check(handle, &size, 8)) + goto out_close; + if (do_write_check(handle, "header_event", 13)) + goto out_close; + if (do_write_check(handle, &size, 8)) + goto out_close; + if (out_compression_end(handle, compress)) + goto out_close; + if (out_update_section_header(handle, offset)) + goto out_close; + return 0; + } + + fd = open(path, O_RDONLY); + if (fd < 0) { + tracecmd_warning("can't read '%s'", path); + goto out_close; + } + + /* unfortunately, you can not stat debugfs files for size */ + size = get_size_fd(fd); + + if (do_write_check(handle, "header_page", 12)) + goto out_close; + endian8 = convert_endian_8(handle, size); + if (do_write_check(handle, &endian8, 8)) + goto out_close; + check_size = copy_file_fd(handle, fd, 0); + close(fd); + if (size != check_size) { + tracecmd_warning("wrong size for '%s' size=%lld read=%lld", path, size, check_size); + errno = EINVAL; + goto out_close; + } + put_tracing_file(path); + + path = get_tracing_file(handle, "events/header_event"); + if (!path) + goto out_close; + + fd = open(path, O_RDONLY); + if (fd < 0) { + tracecmd_warning("can't read '%s'", path); + goto out_close; + } + + size = get_size_fd(fd); + + if (do_write_check(handle, "header_event", 13)) + goto out_close; + endian8 = convert_endian_8(handle, size); + if (do_write_check(handle, &endian8, 8)) + goto out_close; + check_size = copy_file_fd(handle, fd, 0); + close(fd); + if (size != check_size) { + tracecmd_warning("wrong size for '%s'", path); + goto out_close; + } + put_tracing_file(path); + if (out_compression_end(handle, compress)) + goto out_close; + + if (out_update_section_header(handle, offset)) + goto out_close; + handle->file_state = TRACECMD_FILE_HEADERS; + + return 0; + + out_close: + out_compression_reset(handle, compress); + if (fd >= 0) + close(fd); + return -1; +} + +static int copy_event_system(struct tracecmd_output *handle, + struct list_event_system *slist) +{ + struct list_event *elist; + unsigned long long size, check_size, endian8; + struct stat st; + char *format; + int endian4; + int count = 0; + int ret; + + for (elist = slist->events; elist; elist = elist->next) + count++; + + endian4 = convert_endian_4(handle, count); + if (do_write_check(handle, &endian4, 4)) + return -1; + + for (elist = slist->events; elist; elist = elist->next) { + format = elist->file; + ret = stat(format, &st); + + if (ret >= 0) { + /* unfortunately, you can not stat debugfs files for size */ + size = get_size(format); + endian8 = convert_endian_8(handle, size); + if (do_write_check(handle, &endian8, 8)) + return -1; + check_size = copy_file(handle, format); + if (size != check_size) { + tracecmd_warning("error in size of file '%s'", format); + return -1; + } + } + } + + return 0; +} + +static void add_list_event_system(struct list_event_system **systems, + const char *system, + const char *event, + const char *path) +{ + struct list_event_system *slist; + struct list_event *elist; + + for (slist = *systems; slist; slist = slist->next) + if (strcmp(slist->name, system) == 0) + break; + + if (!slist) { + slist = malloc(sizeof(*slist)); + if (!slist) + goto err_mem; + slist->name = strdup(system); + if (!slist->name) { + free(slist); + goto err_mem; + } + slist->next = *systems; + slist->events = NULL; + *systems = slist; + } + + for (elist = slist->events; elist; elist = elist->next) + if (strcmp(elist->name, event) == 0) + break; + + if (!elist) { + elist = malloc(sizeof(*elist)); + if (!elist) + goto err_mem; + elist->name = strdup(event); + elist->file = strdup(path); + if (!elist->name || !elist->file) { + free(elist->name); + free(elist->file); + free(elist); + goto err_mem; + } + elist->next = slist->events; + slist->events = elist; + } + return; + err_mem: + tracecmd_warning("Insufficient memory"); +} + +static void free_list_events(struct list_event_system *list) +{ + struct list_event_system *slist; + struct list_event *elist; + + while (list) { + slist = list; + list = list->next; + while (slist->events) { + elist = slist->events; + slist->events = elist->next; + free(elist->name); + free(elist->file); + free(elist); + } + free(slist->name); + free(slist); + } +} + +static void glob_events(struct tracecmd_output *handle, + struct list_event_system **systems, + const char *str) +{ + glob_t globbuf; + char *events_path; + char *system; + char *event; + char *path; + char *file; + char *ptr; + int do_ftrace = 0; + int events_len; + int ret; + int i; + + if (strncmp(str, "ftrace/", 7) == 0) + do_ftrace = 1; + + events_path = get_tracing_file(handle, "events"); + events_len = strlen(events_path); + + path = malloc(events_len + strlen(str) + + strlen("/format") + 2); + if (!path) + return; + path[0] = '\0'; + strcat(path, events_path); + strcat(path, "/"); + strcat(path, str); + strcat(path, "/format"); + put_tracing_file(events_path); + + globbuf.gl_offs = 0; + ret = glob(path, 0, NULL, &globbuf); + free(path); + if (ret < 0) + return; + + for (i = 0; i < globbuf.gl_pathc; i++) { + file = globbuf.gl_pathv[i]; + system = strdup(file + events_len + 1); + system = strtok_r(system, "/", &ptr); + if (!ptr) { + /* ?? should we warn? */ + free(system); + continue; + } + + if (!do_ftrace && strcmp(system, "ftrace") == 0) { + free(system); + continue; + } + + event = strtok_r(NULL, "/", &ptr); + if (!ptr) { + /* ?? should we warn? */ + free(system); + continue; + } + + add_list_event_system(systems, system, event, file); + free(system); + } + globfree(&globbuf); +} + +static void +create_event_list_item(struct tracecmd_output *handle, + struct list_event_system **systems, + struct tracecmd_event_list *list) +{ + char *ptr; + char *str; + + str = strdup(list->glob); + if (!str) + goto err_mem; + + /* system and event names are separated by a ':' */ + ptr = strchr(str, ':'); + if (ptr) + *ptr = '/'; + else + /* system and event may also be separated by a '/' */ + ptr = strchr(str, '/'); + + if (ptr) { + glob_events(handle, systems, str); + free(str); + return; + } + + ptr = str; + str = malloc(strlen(ptr) + 3); + if (!str) + goto err_mem; + str[0] = '\0'; + strcat(str, ptr); + strcat(str, "/*"); + glob_events(handle, systems, str); + + str[0] = '\0'; + strcat(str, "*/"); + strcat(str, ptr); + glob_events(handle, systems, str); + + free(ptr); + free(str); + return; + err_mem: + tracecmd_warning("Insufficient memory"); +} + +static int read_ftrace_files(struct tracecmd_output *handle, bool compress) +{ + enum tracecmd_section_flags flags = 0; + struct list_event_system *systems = NULL; + struct tracecmd_event_list list = { .glob = "ftrace/*" }; + tsize_t offset; + int ret; + + if (!check_out_state(handle, TRACECMD_FILE_FTRACE_EVENTS)) { + tracecmd_warning("Cannot read ftrace files, unexpected state 0x%X", + handle->file_state); + return -1; + } + + if (compress) + flags |= TRACECMD_SEC_FL_COMPRESS; + offset = out_write_section_header(handle, TRACECMD_OPTION_FTRACE_EVENTS, + "ftrace events", flags, true); + if (offset == (off64_t)-1) + return -1; + + create_event_list_item(handle, &systems, &list); + out_compression_start(handle, compress); + + ret = copy_event_system(handle, systems); + if (!ret) + ret = out_compression_end(handle, compress); + else + out_compression_reset(handle, compress); + + free_list_events(systems); + if (ret) + return ret; + if (out_update_section_header(handle, offset)) + return -1; + + handle->file_state = TRACECMD_FILE_FTRACE_EVENTS; + + return ret; +} + +static struct list_event_system * +create_event_list(struct tracecmd_output *handle, + struct tracecmd_event_list *event_list) +{ + struct list_event_system *systems = NULL; + struct tracecmd_event_list *list; + + for (list = event_list; list; list = list->next) + create_event_list_item(handle, &systems, list); + + return systems; +} + +static int read_event_files(struct tracecmd_output *handle, + struct tracecmd_event_list *event_list, bool compress) +{ + enum tracecmd_section_flags flags = 0; + struct list_event_system *systems; + struct list_event_system *slist; + struct tracecmd_event_list *list; + struct tracecmd_event_list all_events = { .glob = "*/*" }; + int count = 0; + tsize_t offset; + int endian4; + int ret; + + if (!check_out_state(handle, TRACECMD_FILE_ALL_EVENTS)) { + tracecmd_warning("Cannot read event files, unexpected state 0x%X", + handle->file_state); + return -1; + } + + if (compress) + flags |= TRACECMD_SEC_FL_COMPRESS; + offset = out_write_section_header(handle, TRACECMD_OPTION_EVENT_FORMATS, + "events format", flags, true); + if (offset == (off64_t)-1) + return -1; + /* + * If any of the list is the special keyword "all" then + * just do all files. + */ + for (list = event_list; list; list = list->next) { + if (strcmp(list->glob, "all") == 0) + break; + } + /* all events are listed, use a global glob */ + if (!event_list || list) + event_list = &all_events; + + systems = create_event_list(handle, event_list); + + for (slist = systems; slist; slist = slist->next) + count++; + out_compression_start(handle, compress); + ret = -1; + endian4 = convert_endian_4(handle, count); + if (do_write_check(handle, &endian4, 4)) + goto out_free; + + ret = 0; + for (slist = systems; !ret && slist; slist = slist->next) { + if (do_write_check(handle, slist->name, + strlen(slist->name) + 1)) { + ret = -1; + continue; + } + ret = copy_event_system(handle, slist); + } + if (ret) + goto out_free; + + ret = out_compression_end(handle, compress); + if (ret) + goto out_free; + ret = out_update_section_header(handle, offset); + + out_free: + if (!ret) + handle->file_state = TRACECMD_FILE_ALL_EVENTS; + else + out_compression_reset(handle, compress); + + free_list_events(systems); + + return ret; +} + +#define KPTR_UNINITIALIZED 'X' + +static void set_proc_kptr_restrict(int reset) +{ + char *path = "/proc/sys/kernel/kptr_restrict"; + static char saved = KPTR_UNINITIALIZED; + int fd, ret = -1; + struct stat st; + char buf; + + if ((reset && saved == KPTR_UNINITIALIZED) || + (stat(path, &st) < 0)) + return; + + fd = open(path, O_RDONLY); + if (fd < 0) + goto err; + + if (reset) { + buf = saved; + } else { + if (read(fd, &buf, 1) < 0) + goto err; + saved = buf; + buf = '0'; + } + close(fd); + + fd = open(path, O_WRONLY); + if (fd < 0) + goto err; + if (write(fd, &buf, 1) > 0) + ret = 0; +err: + if (fd > 0) + close(fd); + if (ret) + tracecmd_warning("can't set kptr_restrict"); +} + +static int read_proc_kallsyms(struct tracecmd_output *handle, bool compress) +{ + enum tracecmd_section_flags flags = 0; + unsigned int size, check_size, endian4; + const char *path = "/proc/kallsyms"; + tsize_t offset; + struct stat st; + int ret; + + if (!check_out_state(handle, TRACECMD_FILE_KALLSYMS)) { + tracecmd_warning("Cannot read kallsyms, unexpected state 0x%X", + handle->file_state); + return -1; + } + + if (handle->kallsyms) + path = handle->kallsyms; + + if (compress) + flags |= TRACECMD_SEC_FL_COMPRESS; + offset = out_write_section_header(handle, TRACECMD_OPTION_KALLSYMS, + "kallsyms", flags, true); + if (offset == (off64_t)-1) + return -1; + + out_compression_start(handle, compress); + ret = stat(path, &st); + if (ret < 0) { + /* not found */ + size = 0; + endian4 = convert_endian_4(handle, size); + ret = do_write_check(handle, &endian4, 4); + goto out; + } + size = get_size(path); + endian4 = convert_endian_4(handle, size); + ret = do_write_check(handle, &endian4, 4); + if (ret) + goto out; + + set_proc_kptr_restrict(0); + check_size = copy_file(handle, path); + if (size != check_size) { + errno = EINVAL; + tracecmd_warning("error in size of file '%s'", path); + set_proc_kptr_restrict(1); + ret = -1; + goto out; + } + set_proc_kptr_restrict(1); + + ret = out_compression_end(handle, compress); + if (ret) + goto out; + + ret = out_update_section_header(handle, offset); +out: + if (!ret) + handle->file_state = TRACECMD_FILE_KALLSYMS; + else + out_compression_reset(handle, compress); + return ret; +} + +static int read_ftrace_printk(struct tracecmd_output *handle, bool compress) +{ + enum tracecmd_section_flags flags = 0; + unsigned int size, check_size, endian4; + tsize_t offset; + struct stat st; + char *path; + int ret; + + if (!check_out_state(handle, TRACECMD_FILE_PRINTK)) { + tracecmd_warning("Cannot read printk, unexpected state 0x%X", + handle->file_state); + return -1; + } + + path = get_tracing_file(handle, "printk_formats"); + if (!path) + return -1; + + if (compress) + flags |= TRACECMD_SEC_FL_COMPRESS; + offset = out_write_section_header(handle, TRACECMD_OPTION_PRINTK, "printk", flags, true); + if (offset == (off64_t)-1) + return -1; + + out_compression_start(handle, compress); + ret = stat(path, &st); + if (ret < 0) { + /* not found */ + size = 0; + endian4 = convert_endian_4(handle, size); + if (do_write_check(handle, &endian4, 4)) + goto fail; + goto out; + } + size = get_size(path); + endian4 = convert_endian_4(handle, size); + if (do_write_check(handle, &endian4, 4)) + goto fail; + check_size = copy_file(handle, path); + if (size != check_size) { + errno = EINVAL; + tracecmd_warning("error in size of file '%s'", path); + goto fail; + } + + out: + put_tracing_file(path); + if (out_compression_end(handle, compress)) + return -1; + + if (out_update_section_header(handle, offset)) + return -1; + handle->file_state = TRACECMD_FILE_PRINTK; + return 0; + fail: + put_tracing_file(path); + out_compression_reset(handle, compress); + return -1; +} + +static int save_tracing_file_data(struct tracecmd_output *handle, + const char *filename) +{ + unsigned long long endian8; + char *file = NULL; + struct stat st; + off64_t check_size; + off64_t size; + int ret = -1; + + file = get_tracing_file(handle, filename); + if (!file) + return -1; + + ret = stat(file, &st); + if (ret >= 0) { + size = get_size(file); + endian8 = convert_endian_8(handle, size); + if (do_write_check(handle, &endian8, 8)) + goto out_free; + check_size = copy_file(handle, file); + if (size != check_size) { + errno = EINVAL; + tracecmd_warning("error in size of file '%s'", file); + goto out_free; + } + } else { + size = 0; + endian8 = convert_endian_8(handle, size); + if (do_write_check(handle, &endian8, 8)) + goto out_free; + } + ret = 0; + +out_free: + put_tracing_file(file); + return ret; +} + +static int write_compression_header(struct tracecmd_output *handle) +{ + const char *name = NULL; + const char *ver = NULL; + int ret; + + ret = tracecmd_compress_proto_get_name(handle->compress, &name, &ver); + if (ret < 0 || !name || !ver) { + name = "none"; + ver = ""; + } + + if (do_write_check(handle, name, strlen(name) + 1)) + return -1; + + if (do_write_check(handle, ver, strlen(ver) + 1)) + return -1; + + return 0; +} + +static int get_trace_page_size(struct tracecmd_output *handle, const char *name) +{ + struct tracefs_instance *instance; + struct tep_handle *tep = NULL; + int psize, size; + char *buff = NULL; + + /* In case of an error, return user space page size */ + psize = getpagesize(); + + instance = tracefs_instance_alloc(find_tracing_dir(handle), name); + if (!instance) + goto out; + + buff = tracefs_instance_file_read(instance, "events/header_page", &size); + if (!buff) + goto out; + + tep = tep_alloc(); + if (!tep) + goto out; + + if (tep_parse_header_page(tep, buff, size, sizeof(long long))) + goto out; + + psize = tep_get_sub_buffer_size(tep); + +out: + tracefs_instance_free(instance); + tep_free(tep); + free(buff); + + return psize; +} + +/** + * tracecmd_output_create_fd - allocate new output handle to a trace file + * @fd: File descriptor for the handle to write to. + * + * Allocate a tracecmd_output descriptor and perform minimal initialization. + * @fd will be set as the file descriptor for the handle. Nothing is + * written in the file yet, and if @fd is -1, then all writes will be ignored. + * + * Returns a pointer to a newly allocated file descriptor for the use of creating + * a tracecmd data file. In case of an error, NULL is returned. The returned + * handle must be freed with tracecmd_output_close() or tracecmd_output_free() + */ +struct tracecmd_output *tracecmd_output_create_fd(int fd) +{ + struct tracecmd_output *handle; + + handle = calloc(1, sizeof(*handle)); + if (!handle) + return NULL; + + handle->fd = fd; + + handle->file_version = FILE_VERSION_DEFAULT; + + handle->page_size = get_trace_page_size(handle, NULL); + handle->big_endian = tracecmd_host_bigendian(); + + list_head_init(&handle->options); + list_head_init(&handle->buffers); + + handle->file_state = TRACECMD_FILE_ALLOCATED; + + return handle; +} + +/** + * tracecmd_output_set_msg - associated an output file handle with network message handle + * @handle: output handle to a trace file. + * @msg_handle: network handle, allocated by tracecmd_msg_handle_alloc() + * + * Associate an output file handle (@handle) to a network stream (@msg_handle). + * All subsequent calls to @handle will send data over the network using @msg_handle + * instead of writing to a file. + * + * This must be called after the handle file version is set and before calling + * tracecmd_output_write_headers(). + * + * Returns 0 on success, or -1 if the output file handle is not allocated or not + * in the expected state. + */ +int tracecmd_output_set_msg(struct tracecmd_output *handle, struct tracecmd_msg_handle *msg_handle) +{ + if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED) + return -1; + + handle->msg_handle = msg_handle; + /* Force messages to be cached in a temp file before sending through the socket */ + if (handle->msg_handle && HAS_SECTIONS(handle)) + tracecmd_msg_handle_cache(handle->msg_handle); + + return 0; +} + +/** + * tracecmd_output_set_trace_dir - Set a custom tracing dir, instead of system default + * @handle: output handle to a trace file. + * @tracing_dir: full path to a directory with tracing files + * + * Associate the output file handle (@handle) with a custom tracing directory + * (@tracing_dir), to be used when creating the trace file instead of using the + * system default tracig directory. + * + * Must be called before tracecmd_output_write_headers(). + * + * Returns 0 on success, or -1 if the output file handle is not allocated or not + * in the expected state. + */ +int tracecmd_output_set_trace_dir(struct tracecmd_output *handle, const char *tracing_dir) +{ + if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED) + return -1; + + free(handle->tracing_dir); + if (tracing_dir) { + handle->tracing_dir = strdup(tracing_dir); + if (!handle->tracing_dir) + return -1; + } else + handle->tracing_dir = NULL; + + return 0; +} + +/** + * tracecmd_output_set_kallsyms - Set a custom kernel symbols file + * @handle: output handle to a trace file. + * @tracing_dir: full path to a file with kernel symbols + * + * Have the output file handle (@handle) use a custom kernel symbols file instead + * of the default /proc/kallsyms. + * + * Must be called before tracecmd_output_write_headers(). + * + * Returns 0 on success, or -1 if the output file handle is not allocated or + * not in the expected state. + */ +int tracecmd_output_set_kallsyms(struct tracecmd_output *handle, const char *kallsyms) +{ + if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED) + return -1; + + free(handle->kallsyms); + if (kallsyms) { + handle->kallsyms = strdup(kallsyms); + if (!handle->kallsyms) + return -1; + } else + handle->kallsyms = NULL; + + return 0; +} + +/** + * tracecmd_output_set_from_input - Inherit parameters from an existing trace file + * @handle: output handle to a trace file. + * @ihandle: input handle to an existing trace file. + * + * Have the output file handle (@handle) inherit the properties of a given + * input file handle (@ihandle). + * + * The parameters that are copied are: + * - tep handle + * - page size + * - file endian + * - file version + * - file compression protocol + * + * Must be called before tracecmd_output_write_headers(). + * + * Returns 0 on success, or -1 if the output file handle is not allocated or + * not in expected state. + */ +int tracecmd_output_set_from_input(struct tracecmd_output *handle, struct tracecmd_input *ihandle) +{ + const char *cname = NULL; + const char *cver = NULL; + + if (!handle || !ihandle || handle->file_state != TRACECMD_FILE_ALLOCATED) + return -1; + + /* get endian, page size, file version and compression */ + /* Use the pevent of the ihandle for later writes */ + handle->pevent = tracecmd_get_tep(ihandle); + tep_ref(handle->pevent); + handle->page_size = tracecmd_page_size(ihandle); + handle->file_version = tracecmd_get_in_file_version(ihandle); + handle->big_endian = tep_is_file_bigendian(handle->pevent); + + if (!tracecmd_get_file_compress_proto(ihandle, &cname, &cver)) { + handle->compress = tracecmd_compress_alloc(cname, cver, handle->fd, + handle->pevent, handle->msg_handle); + if (!handle->compress) + return -1; + + if (handle->file_version < FILE_VERSION_COMPRESSION) + handle->file_version = FILE_VERSION_COMPRESSION; + } + + return 0; +} + +/** + * tracecmd_output_set_version - Set file version of the output handle + * @handle: output handle to a trace file. + * @file_version: desired file version + * + * This API must be called before tracecmd_output_write_headers(). + * + * Returns 0 on success, or -1 if the output file handle is not allocated or not in expected state. + */ +int tracecmd_output_set_version(struct tracecmd_output *handle, int file_version) +{ + if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED) + return -1; + if (file_version < FILE_VERSION_MIN || file_version > FILE_VERSION_MAX) + return -1; + handle->file_version = file_version; + if (handle->file_version < FILE_VERSION_COMPRESSION) + handle->compress = NULL; + return 0; +} + +/** + * tracecmd_output_set_compression - Set file compression algorithm of the output handle + * @handle: output handle to a trace file. + * @compression: name of the desired compression algorithm. Can be one of: + * - "none" - do not use compression + * - "all" - use the best available compression algorithm + * - or specific name of the desired compression algorithm + * + * This API must be called before tracecmd_output_write_headers(). + * + * Returns 0 on success, or -1 in case of an error: + * - the output file handle is not allocated or not in expected state. + * - the specified compression algorithm is not available + */ +int tracecmd_output_set_compression(struct tracecmd_output *handle, const char *compression) +{ + if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED) + return -1; + + handle->compress = NULL; + if (compression && strcmp(compression, "none")) { + if (!strcmp(compression, "any")) { + handle->compress = tracecmd_compress_alloc(NULL, NULL, handle->fd, + handle->pevent, + handle->msg_handle); + if (!handle->compress) + tracecmd_warning("No compression algorithms are supported"); + } else { + handle->compress = tracecmd_compress_alloc(compression, NULL, handle->fd, + handle->pevent, + handle->msg_handle); + if (!handle->compress) { + tracecmd_warning("Compression algorithm %s is not supported", + compression); + return -1; + } + } + } + if (handle->compress && handle->file_version < FILE_VERSION_COMPRESSION) { + handle->file_version = FILE_VERSION_COMPRESSION; + if (handle->msg_handle) + tracecmd_msg_handle_cache(handle->msg_handle); + } + + return 0; +} + +/** + * output_write_init - Write the initial data into the trace file + * @handle: output handle to a trace file. + * + * Must be called after tracecmd_output_set_*() functions and before writing + * anything else. + * + * The initial information to be written into the file: + * - initial file magic bytes + * - file version + * - data endian + * - long size + * - page size + * - compression header + * + * Returns 0 on success, or -1 if the output file handle is not allocated or + * not in the expected state. + */ +static int output_write_init(struct tracecmd_output *handle) +{ + unsigned long long offset; + char buf[BUFSIZ]; + int endian4; + + if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED) + return -1; + + buf[0] = 23; + buf[1] = 8; + buf[2] = 68; + memcpy(buf + 3, "tracing", 7); + + if (do_write_check(handle, buf, 10)) + return -1; + + sprintf(buf, "%lu", handle->file_version); + if (do_write_check(handle, buf, strlen(buf) + 1)) + return -1; + + if (handle->big_endian) + buf[0] = 1; + else + buf[0] = 0; + if (do_write_check(handle, buf, 1)) + return -1; + + /* save size of long (this may not be what the kernel is) */ + buf[0] = sizeof(long); + if (do_write_check(handle, buf, 1)) + return -1; + + endian4 = convert_endian_4(handle, handle->page_size); + if (do_write_check(handle, &endian4, 4)) + return -1; + + if (handle->file_version >= FILE_VERSION_COMPRESSION) { + if (write_compression_header(handle)) + return -1; + } + + if (HAS_SECTIONS(handle)) { + /* Write 0 as options offset and save its location */ + offset = 0; + handle->options_start = do_lseek(handle, 0, SEEK_CUR); + if (do_write_check(handle, &offset, 8)) + return -1; + } + + handle->file_state = TRACECMD_FILE_INIT; + return 0; +} + +/** + * tracecmd_output_write_headers - Write the trace file headers + * @handle: output handle to a trace file. + * @list: desired events that will be included in the trace file. + * It can be NULL for all available events + * + * These headers are written in the file: + * - header files from the tracing directory + * - ftrace events from the tracing directory + * - event file from the tracing directory - all or only the one from @list + * - kernel symbols from the tracing directory + * - kernel printk strings from the tracing directory + * + * Returns 0 on success, or -1 in case of an error. + */ +int tracecmd_output_write_headers(struct tracecmd_output *handle, + struct tracecmd_event_list *list) +{ + bool compress = false; + + if (!handle || handle->file_state < TRACECMD_FILE_ALLOCATED) + return -1; + + /* Write init data, if not written yet */ + if (handle->file_state < TRACECMD_FILE_INIT && output_write_init(handle)) + return -1; + if (handle->compress) + compress = true; + if (read_header_files(handle, compress)) + return -1; + if (read_ftrace_files(handle, compress)) + return -1; + if (read_event_files(handle, list, compress)) + return -1; + if (read_proc_kallsyms(handle, compress)) + return -1; + if (read_ftrace_printk(handle, compress)) + return -1; + return 0; +} + +/** + * tracecmd_add_option_v - add options to the file + * @handle: the output file handle name + * @id: the id of the option + * @size: the size of the option data + * @data: the data to write to the file + * @vector: array of vectors, pointing to the data to write in the file + * @count: number of items in the vector array + * + * + * Returns handle to update option if needed. + * Just the content can be updated, with smaller or equal to + * content than the specified size. + */ +struct tracecmd_option * +tracecmd_add_option_v(struct tracecmd_output *handle, + unsigned short id, const struct iovec *vector, int count) + +{ + struct tracecmd_option *option; + char *data = NULL; + int i, size = 0; + + /* + * We can only add options before tracing data were written. + * This may change in the future. + */ + if (!HAS_SECTIONS(handle) && handle->file_state > TRACECMD_FILE_OPTIONS) + return NULL; + + for (i = 0; i < count; i++) + size += vector[i].iov_len; + /* Some IDs (like TRACECMD_OPTION_TRACECLOCK) pass vector with 0 / NULL data */ + if (size) { + data = malloc(size); + if (!data) { + tracecmd_warning("Insufficient memory"); + return NULL; + } + } + option = calloc(1, sizeof(*option)); + if (!option) { + tracecmd_warning("Could not allocate space for option"); + free(data); + return NULL; + } + + handle->nr_options++; + option->data = data; + for (i = 0; i < count; i++) { + if (vector[i].iov_base && vector[i].iov_len) { + memcpy(data, vector[i].iov_base, vector[i].iov_len); + data += vector[i].iov_len; + } + } + + option->size = size; + option->id = id; + + list_add_tail(&option->list, &handle->options); + + return option; +} + +/** + * tracecmd_add_option - add options to the file + * @handle: the output file handle name + * @id: the id of the option + * @size: the size of the option data + * @data: the data to write to the file + * + * Returns handle to update option if needed + * Just the content can be updated, with smaller or equal to + * content than the specified size + */ +struct tracecmd_option * +tracecmd_add_option(struct tracecmd_output *handle, + unsigned short id, int size, const void *data) +{ + struct iovec vect; + + vect.iov_base = (void *) data; + vect.iov_len = size; + return tracecmd_add_option_v(handle, id, &vect, 1); +} + +int tracecmd_write_cpus(struct tracecmd_output *handle, int cpus) +{ + int ret; + + if (!check_out_state(handle, TRACECMD_FILE_CPU_COUNT)) { + tracecmd_warning("Cannot write CPU count into the file, unexpected state 0x%X", + handle->file_state); + return -1; + } + + if (!HAS_SECTIONS(handle)) { + cpus = convert_endian_4(handle, cpus); + ret = do_write_check(handle, &cpus, 4); + if (ret < 0) + return ret; + } else { + tracecmd_add_option(handle, TRACECMD_OPTION_CPUCOUNT, sizeof(int), &cpus); + } + + handle->file_state = TRACECMD_FILE_CPU_COUNT; + return 0; +} + +static int write_options_v6(struct tracecmd_output *handle) +{ + struct tracecmd_option *options; + unsigned short option; + unsigned short endian2; + unsigned int endian4; + + /* If already written, ignore */ + if (handle->file_state == TRACECMD_FILE_OPTIONS) + return 0; + if (!check_out_state(handle, TRACECMD_FILE_OPTIONS)) { + tracecmd_warning("Cannot write options into the file, unexpected state 0x%X", + handle->file_state); + return -1; + } + + if (do_write_check(handle, "options ", 10)) + return -1; + handle->options_start = do_lseek(handle, 0, SEEK_CUR); + list_for_each_entry(options, &handle->options, list) { + endian2 = convert_endian_2(handle, options->id); + if (do_write_check(handle, &endian2, 2)) + return -1; + + endian4 = convert_endian_4(handle, options->size); + if (do_write_check(handle, &endian4, 4)) + return -1; + + /* Save the data location in case it needs to be updated */ + options->offset = do_lseek(handle, 0, SEEK_CUR); + + if (do_write_check(handle, options->data, + options->size)) + return -1; + } + + option = TRACECMD_OPTION_DONE; + + if (do_write_check(handle, &option, 2)) + return -1; + + handle->file_state = TRACECMD_FILE_OPTIONS; + return 0; +} + +static int write_options(struct tracecmd_output *handle) +{ + struct tracecmd_option *options; + unsigned long long endian8; + unsigned short endian2; + unsigned int endian4; + bool new = false; + tsize_t offset; + + /* Check if there are unsaved options */ + list_for_each_entry(options, &handle->options, list) { + if (!options->offset) { + new = true; + break; + } + } + if (!new) + return 0; + offset = do_lseek(handle, 0, SEEK_CUR); + + /* Append to the previous options section, if any */ + if (handle->options_start) { + if (do_lseek(handle, handle->options_start, SEEK_SET) == (off64_t)-1) + return -1; + endian8 = convert_endian_8(handle, offset); + if (do_write_check(handle, &endian8, 8)) + return -1; + if (do_lseek(handle, offset, SEEK_SET) == (off_t)-1) + return -1; + } + + offset = out_write_section_header(handle, TRACECMD_OPTION_DONE, "options", 0, false); + if (offset == (off_t)-1) + return -1; + + list_for_each_entry(options, &handle->options, list) { + /* Option is already saved, skip it */ + if (options->offset) + continue; + endian2 = convert_endian_2(handle, options->id); + if (do_write_check(handle, &endian2, 2)) + return -1; + endian4 = convert_endian_4(handle, options->size); + if (do_write_check(handle, &endian4, 4)) + return -1; + /* Save the data location */ + options->offset = do_lseek(handle, 0, SEEK_CUR); + if (do_write_check(handle, options->data, options->size)) + return -1; + } + + endian2 = convert_endian_2(handle, TRACECMD_OPTION_DONE); + if (do_write_check(handle, &endian2, 2)) + return -1; + endian4 = convert_endian_4(handle, 8); + if (do_write_check(handle, &endian4, 4)) + return -1; + endian8 = 0; + handle->options_start = do_lseek(handle, 0, SEEK_CUR); + if (do_write_check(handle, &endian8, 8)) + return -1; + if (out_update_section_header(handle, offset)) + return -1; + + return 0; +} + +int tracecmd_write_meta_strings(struct tracecmd_output *handle) +{ + if (!HAS_SECTIONS(handle)) + return 0; + + return save_string_section(handle, true); +} + +int tracecmd_write_options(struct tracecmd_output *handle) +{ + if (!HAS_SECTIONS(handle)) + return write_options_v6(handle); + return write_options(handle); +} + +static int append_options_v6(struct tracecmd_output *handle) +{ + struct tracecmd_option *options; + unsigned short option; + unsigned short endian2; + unsigned int endian4; + off_t offset; + int r; + + /* + * We can append only if options are already written and tracing data + * is not yet written + */ + if (handle->file_state != TRACECMD_FILE_OPTIONS) + return -1; + + if (do_lseek(handle, 0, SEEK_END) == (off_t)-1) + return -1; + offset = do_lseek(handle, -2, SEEK_CUR); + if (offset == (off_t)-1) + return -1; + + r = do_preed(handle, &option, 2, offset); + if (r != 2 || option != TRACECMD_OPTION_DONE) + return -1; + + list_for_each_entry(options, &handle->options, list) { + endian2 = convert_endian_2(handle, options->id); + if (do_write_check(handle, &endian2, 2)) + return -1; + + endian4 = convert_endian_4(handle, options->size); + if (do_write_check(handle, &endian4, 4)) + return -1; + + /* Save the data location in case it needs to be updated */ + options->offset = do_lseek(handle, 0, SEEK_CUR); + + if (do_write_check(handle, options->data, + options->size)) + return -1; + } + + option = TRACECMD_OPTION_DONE; + + if (do_write_check(handle, &option, 2)) + return -1; + + return 0; +} + +int tracecmd_append_options(struct tracecmd_output *handle) +{ + if (!HAS_SECTIONS(handle)) + return append_options_v6(handle); + return write_options(handle); +} + +static struct tracecmd_option * +add_buffer_option_v6(struct tracecmd_output *handle, const char *name, int cpus) +{ + struct tracecmd_option *option; + char *buf; + int size = 8 + strlen(name) + 1; + + buf = calloc(1, size); + if (!buf) { + tracecmd_warning("Failed to malloc buffer"); + return NULL; + } + *(tsize_t *)buf = 0; + strcpy(buf + 8, name); + + option = tracecmd_add_option(handle, TRACECMD_OPTION_BUFFER, size, buf); + free(buf); + + /* + * In case a buffer instance has different number of CPUs as the + * local machine. + */ + if (cpus) + tracecmd_add_option(handle, TRACECMD_OPTION_CPUCOUNT, + sizeof(int), &cpus); + + return option; +} + +int tracecmd_add_buffer_info(struct tracecmd_output *handle, const char *name, int cpus) +{ + struct tracecmd_buffer *buf; + + buf = calloc(1, sizeof(struct tracecmd_buffer)); + if (!buf) + return -1; + buf->name = strdup(name); + buf->cpus = cpus; + if (!buf->name) { + free(buf); + return -1; + } + list_add_tail(&buf->list, &handle->buffers); + return 0; +} + +int tracecmd_write_buffer_info(struct tracecmd_output *handle) +{ + struct tracecmd_option *option; + struct tracecmd_buffer *buf; + + if (HAS_SECTIONS(handle)) + return 0; + + list_for_each_entry(buf, &handle->buffers, list) { + option = add_buffer_option_v6(handle, buf->name, buf->cpus); + if (!option) + return -1; + buf->option = option; + } + + return 0; +} + +static tsize_t get_buffer_file_offset(struct tracecmd_output *handle, const char *name) +{ + struct tracecmd_buffer *buf; + + list_for_each_entry(buf, &handle->buffers, list) { + if (!strcmp(name, buf->name)) { + if (!buf->option) + break; + return buf->option->offset; + } + } + return 0; +} + +int tracecmd_write_cmdlines(struct tracecmd_output *handle) +{ + enum tracecmd_section_flags flags = 0; + bool compress = false; + tsize_t offset; + int ret; + + if (!check_out_state(handle, TRACECMD_FILE_CMD_LINES)) { + tracecmd_warning("Cannot write command lines into the file, unexpected state 0x%X", + handle->file_state); + return -1; + } + + if (handle->compress) + compress = true; + + if (compress) + flags |= TRACECMD_SEC_FL_COMPRESS; + offset = out_write_section_header(handle, TRACECMD_OPTION_CMDLINES, + "command lines", flags, true); + if (offset == (off64_t)-1) + return -1; + + out_compression_start(handle, compress); + + ret = save_tracing_file_data(handle, "saved_cmdlines"); + if (ret < 0) { + out_compression_reset(handle, compress); + return ret; + } + + if (out_compression_end(handle, compress)) + return -1; + + if (out_update_section_header(handle, offset)) + return -1; + + handle->file_state = TRACECMD_FILE_CMD_LINES; + return 0; +} + +static char *get_clock(struct tracecmd_output *handle) +{ + struct tracefs_instance *inst; + + if (handle->trace_clock) + return handle->trace_clock; + + /* + * If no clock is set on this handle, get the trace clock of + * the top instance in the handle's tracing dir + */ + if (!handle->tracing_dir) { + handle->trace_clock = tracefs_get_clock(NULL); + return handle->trace_clock; + } + + inst = tracefs_instance_alloc(handle->tracing_dir, NULL); + if (!inst) + return NULL; + handle->trace_clock = tracefs_get_clock(inst); + tracefs_instance_free(inst); + return handle->trace_clock; +} + +__hidden struct tracecmd_option * +out_add_buffer_option(struct tracecmd_output *handle, const char *name, + unsigned short id, unsigned long long data_offset, + int cpus, struct data_file_write *cpu_data, int page_size) +{ + struct tracecmd_option *option; + int i, j = 0, k = 0; + int *cpu_ids = NULL; + struct iovec *vect; + char *clock; + + if (!HAS_SECTIONS(handle)) + return NULL; + + clock = get_clock(handle); + if (!clock) { + tracecmd_warning("Could not find clock, set to 'local'"); + clock = "local"; + } + + /* + * Buffer flyrecord option: + * - trace data offset in the file + * - buffer name + * - buffer clock + * - page size + * - CPU count + * - for each CPU: + * - CPU id + * - CPU trace data offset in the file + * - CPU trace data size + */ + + /* + * Buffer latency option: + * - trace data offset in the file + * - buffer name + * - buffer clock + */ + + /* + * 5 : offset, name, clock, page size, count + * 3 : cpu offset, name, clock + */ + vect = calloc(5 + (cpus * 3), sizeof(struct iovec)); + if (!vect) + return NULL; + if (cpus) { + cpu_ids = calloc(cpus, sizeof(int)); + if (!cpu_ids) { + free(vect); + return NULL; + } + } + vect[j].iov_base = (void *) &data_offset; + vect[j++].iov_len = 8; + vect[j].iov_base = (void *) name; + vect[j++].iov_len = strlen(name) + 1; + vect[j].iov_base = (void *) clock; + vect[j++].iov_len = strlen(clock) + 1; + if (id == TRACECMD_OPTION_BUFFER) { + vect[j].iov_base = &page_size; + vect[j++].iov_len = 4; + vect[j].iov_base = (void *) &k; + vect[j++].iov_len = 4; + for (i = 0; i < cpus; i++) { + if (!cpu_data[i].file_size) + continue; + cpu_ids[i] = i; + vect[j].iov_base = &cpu_ids[i]; + vect[j++].iov_len = 4; + vect[j].iov_base = &cpu_data[i].data_offset; + vect[j++].iov_len = 8; + vect[j].iov_base = &cpu_data[i].write_size; + vect[j++].iov_len = 8; + k++; + } + } + + option = tracecmd_add_option_v(handle, id, vect, j); + free(vect); + free(cpu_ids); + + return option; +} + +struct tracecmd_output *tracecmd_create_file_latency(const char *output_file, int cpus, + int file_version, const char *compression) +{ + enum tracecmd_section_flags flags = 0; + struct tracecmd_output *handle; + tsize_t offset; + char *path; + + handle = tracecmd_output_create(output_file); + if (!handle) + return NULL; + + if (file_version && tracecmd_output_set_version(handle, file_version)) + goto out_free; + + if (compression) { + if (tracecmd_output_set_compression(handle, compression)) + goto out_free; + } else if (file_version >= FILE_VERSION_COMPRESSION) { + tracecmd_output_set_compression(handle, "any"); + } + + if (tracecmd_output_write_headers(handle, NULL)) + goto out_free; + /* + * Save the command lines; + */ + if (tracecmd_write_cmdlines(handle) < 0) + goto out_free; + + if (tracecmd_write_cpus(handle, cpus) < 0) + goto out_free; + if (tracecmd_write_buffer_info(handle) < 0) + goto out_free; + if (tracecmd_write_options(handle) < 0) + goto out_free; + + if (!check_out_state(handle, TRACECMD_FILE_CPU_LATENCY)) { + tracecmd_warning("Cannot write latency data into the file, unexpected state 0x%X", + handle->file_state); + goto out_free; + } + + if (!HAS_SECTIONS(handle) && do_write_check(handle, "latency ", 10)) + goto out_free; + + path = get_tracing_file(handle, "trace"); + if (!path) + goto out_free; + + offset = do_lseek(handle, 0, SEEK_CUR); + if (HAS_SECTIONS(handle) && + !out_add_buffer_option(handle, "", TRACECMD_OPTION_BUFFER_TEXT, + offset, 0, NULL, getpagesize())) + goto out_free; + if (handle->compress) + flags |= TRACECMD_SEC_FL_COMPRESS; + + offset = out_write_section_header(handle, TRACECMD_OPTION_BUFFER_TEXT, + "buffer latency", flags, false); + + copy_file_compress(handle, path, NULL); + if (out_update_section_header(handle, offset)) + goto out_free; + + put_tracing_file(path); + + handle->file_state = TRACECMD_FILE_CPU_LATENCY; + + if (HAS_SECTIONS(handle)) + tracecmd_write_options(handle); + + return handle; + +out_free: + tracecmd_output_close(handle); + return NULL; +} + +static int save_clock(struct tracecmd_output *handle, char *clock) +{ + unsigned long long endian8; + char *str = NULL; + int ret; + + ret = asprintf(&str, "[%s]", clock); + if (ret < 0) + return -1; + + endian8 = convert_endian_8(handle, strlen(str)); + ret = do_write_check(handle, &endian8, 8); + if (ret) + goto out; + ret = do_write_check(handle, str, strlen(str)); + +out: + free(str); + return ret; +} + +static int update_buffer_cpu_offset_v6(struct tracecmd_output *handle, + const char *name, tsize_t offset) +{ + tsize_t b_offset; + tsize_t current; + + if (!name) + name = ""; + + b_offset = get_buffer_file_offset(handle, name); + if (!b_offset) { + tracecmd_warning("Cannot find description for buffer %s", name); + return -1; + } + + current = do_lseek(handle, 0, SEEK_CUR); + + /* Go to the option data, where will write the offest */ + if (do_lseek(handle, b_offset, SEEK_SET) == (off64_t)-1) { + tracecmd_warning("could not seek to %lld", b_offset); + return -1; + } + + if (do_write_check(handle, &offset, 8)) + return -1; + + /* Go back to end of file */ + if (do_lseek(handle, current, SEEK_SET) == (off64_t)-1) { + tracecmd_warning("could not seek to %lld", offset); + return -1; + } + return 0; +} + +__hidden int out_write_emty_cpu_data(struct tracecmd_output *handle, int cpus) +{ + unsigned long long zero = 0; + char *clock; + int ret; + int i; + + if (HAS_SECTIONS(handle)) + return 0; + + ret = handle->file_state == TRACECMD_FILE_CPU_FLYRECORD ? 0 : + check_file_state(handle->file_version, + handle->file_state, + TRACECMD_FILE_CPU_FLYRECORD); + if (ret < 0) { + tracecmd_warning("Cannot write trace data into the file, unexpected state 0x%X", + handle->file_state); + return ret; + } + + if (do_write_check(handle, "flyrecord", 10)) + return -1; + + for (i = 0; i < cpus; i++) { + /* Write 0 for trace data offset and size */ + if (do_write_check(handle, &zero, 8)) + return -1; + + if (do_write_check(handle, &zero, 8)) + return -1; + } + clock = get_clock(handle); + if (clock && save_clock(handle, clock)) + return -1; + + handle->file_state = TRACECMD_FILE_CPU_FLYRECORD; + return 0; +} + +__hidden int out_write_cpu_data(struct tracecmd_output *handle, + int cpus, struct cpu_data_source *data, const char *buff_name) +{ + struct data_file_write *data_files = NULL; + enum tracecmd_section_flags flags = 0; + tsize_t data_offs, offset; + unsigned long long endian8; + unsigned long long read_size; + int page_size; + char *clock; + char *str; + int ret; + int i; + + /* This can be called multiple times (when recording instances) */ + ret = handle->file_state == TRACECMD_FILE_CPU_FLYRECORD ? 0 : + check_file_state(handle->file_version, + handle->file_state, + TRACECMD_FILE_CPU_FLYRECORD); + if (ret < 0) { + tracecmd_warning("Cannot write trace data into the file, unexpected state 0x%X", + handle->file_state); + goto out_free; + } + + if (*buff_name == '\0') + page_size = handle->page_size; + else + page_size = get_trace_page_size(handle, buff_name); + + data_offs = do_lseek(handle, 0, SEEK_CUR); + if (!HAS_SECTIONS(handle) && do_write_check(handle, "flyrecord", 10)) + goto out_free; + + if (handle->compress) + flags |= TRACECMD_SEC_FL_COMPRESS; + if (asprintf(&str, "buffer flyrecord %s", buff_name) < 1) + goto out_free; + offset = out_write_section_header(handle, TRACECMD_OPTION_BUFFER, str, flags, false); + free(str); + if (offset == (off_t)-1) + goto out_free; + + data_files = calloc(cpus, sizeof(*data_files)); + if (!data_files) + goto out_free; + + for (i = 0; i < cpus; i++) { + data_files[i].file_size = data[i].size; + /* + * Place 0 for the data offset and size, and save the offsets to + * updated them with the correct data later. + */ + if (!HAS_SECTIONS(handle)) { + endian8 = 0; + data_files[i].file_data_offset = do_lseek(handle, 0, SEEK_CUR); + if (do_write_check(handle, &endian8, 8)) + goto out_free; + data_files[i].file_write_size = do_lseek(handle, 0, SEEK_CUR); + if (do_write_check(handle, &endian8, 8)) + goto out_free; + } + } + + if (!HAS_SECTIONS(handle)) { + update_buffer_cpu_offset_v6(handle, buff_name, data_offs); + clock = get_clock(handle); + if (clock && save_clock(handle, clock)) + goto out_free; + } + + for (i = 0; i < cpus; i++) { + data_files[i].data_offset = do_lseek(handle, 0, SEEK_CUR); + /* Page align offset */ + data_files[i].data_offset += page_size - 1; + data_files[i].data_offset &= ~(page_size - 1); + + ret = do_lseek(handle, data_files[i].data_offset, SEEK_SET); + if (ret == (off64_t)-1) + goto out_free; + + if (!tracecmd_get_quiet(handle)) + fprintf(stderr, "CPU%d data recorded at offset=0x%llx\n", + i, (unsigned long long)data_files[i].data_offset); + + if (data[i].size) { + if (lseek64(data[i].fd, data[i].offset, SEEK_SET) == (off64_t)-1) + goto out_free; + read_size = out_copy_fd_compress(handle, data[i].fd, + data[i].size, &data_files[i].write_size, + page_size); + + if (read_size != data_files[i].file_size) { + errno = EINVAL; + tracecmd_warning("did not match size of %lld to %lld", + read_size, data_files[i].file_size); + goto out_free; + } + } else { + data_files[i].write_size = 0; + } + + if (!HAS_SECTIONS(handle)) { + /* Write the real CPU data offset in the file */ + if (do_lseek(handle, data_files[i].file_data_offset, SEEK_SET) == (off64_t)-1) + goto out_free; + endian8 = convert_endian_8(handle, data_files[i].data_offset); + if (do_write_check(handle, &endian8, 8)) + goto out_free; + /* Write the real CPU data size in the file */ + if (do_lseek(handle, data_files[i].file_write_size, SEEK_SET) == (off64_t)-1) + goto out_free; + endian8 = convert_endian_8(handle, data_files[i].write_size); + if (do_write_check(handle, &endian8, 8)) + goto out_free; + offset = data_files[i].data_offset + data_files[i].write_size; + if (do_lseek(handle, offset, SEEK_SET) == (off64_t)-1) + goto out_free; + } + if (!tracecmd_get_quiet(handle)) { + fprintf(stderr, " %llu bytes in size", + (unsigned long long)data_files[i].write_size); + if (flags & TRACECMD_SEC_FL_COMPRESS) + fprintf(stderr, " (%llu uncompressed)", + (unsigned long long)data_files[i].file_size); + fprintf(stderr, "\n"); + } + } + + if (HAS_SECTIONS(handle) && + !out_add_buffer_option(handle, buff_name, TRACECMD_OPTION_BUFFER, + data_offs, cpus, data_files, page_size)) + goto out_free; + + free(data_files); + if (do_lseek(handle, 0, SEEK_END) == (off64_t)-1) + return -1; + + if (out_update_section_header(handle, offset)) + goto out_free; + + handle->file_state = TRACECMD_FILE_CPU_FLYRECORD; + + if (HAS_SECTIONS(handle)) + tracecmd_write_options(handle); + + return 0; + + out_free: + do_lseek(handle, 0, SEEK_END); + free(data_files); + return -1; +} + +int tracecmd_write_cpu_data(struct tracecmd_output *handle, + int cpus, char * const *cpu_data_files, const char *buff_name) +{ + struct cpu_data_source *data; + struct stat st; + int size = 0; + int ret; + int i; + + if (!buff_name) + buff_name = ""; + + data = calloc(cpus, sizeof(struct cpu_data_source)); + if (!data) + return -1; + + for (i = 0; i < cpus; i++) { + ret = stat(cpu_data_files[i], &st); + if (ret < 0) { + tracecmd_warning("can not stat '%s'", cpu_data_files[i]); + break; + } + data[i].fd = open(cpu_data_files[i], O_RDONLY); + if (data[i].fd < 0) { + tracecmd_warning("Can't read '%s'", data[i].fd); + break; + } + + data[i].size = st.st_size; + data[i].offset = 0; + size += st.st_size; + } + + if (i < cpus) + ret = -1; + else + ret = out_write_cpu_data(handle, cpus, data, buff_name); + + for (i--; i >= 0; i--) + close(data[i].fd); + + free(data); + return ret; +} + +int tracecmd_append_cpu_data(struct tracecmd_output *handle, + int cpus, char * const *cpu_data_files) +{ + int ret; + + ret = tracecmd_write_cpus(handle, cpus); + if (ret) + return ret; + ret = tracecmd_write_buffer_info(handle); + if (ret) + return ret; + ret = tracecmd_write_options(handle); + if (ret) + return ret; + + return tracecmd_write_cpu_data(handle, cpus, cpu_data_files, NULL); +} + +int tracecmd_append_buffer_cpu_data(struct tracecmd_output *handle, + const char *name, int cpus, char * const *cpu_data_files) +{ + return tracecmd_write_cpu_data(handle, cpus, cpu_data_files, name); +} + +struct tracecmd_output *tracecmd_get_output_handle_fd(int fd) +{ + struct tracecmd_output *handle = NULL; + struct tracecmd_input *ihandle; + const char *cname = NULL; + const char *cver = NULL; + int fd2; + + /* Move the file descriptor to the beginning */ + if (lseek(fd, 0, SEEK_SET) == (off_t)-1) + return NULL; + + /* dup fd to be used by the ihandle bellow */ + fd2 = dup(fd); + if (fd2 < 0) + return NULL; + + /* get a input handle from this */ + ihandle = tracecmd_alloc_fd(fd2, TRACECMD_FL_LOAD_NO_PLUGINS); + if (!ihandle) + return NULL; + tracecmd_read_headers(ihandle, 0); + + /* move the file descriptor to the end */ + if (lseek(fd, 0, SEEK_END) == (off_t)-1) + goto out_free; + + /* create a partial output handle */ + handle = calloc(1, sizeof(*handle)); + if (!handle) + goto out_free; + + handle->fd = fd; + + /* get tep, state, endian and page size */ + handle->file_state = tracecmd_get_file_state(ihandle); + /* Use the tep of the ihandle for later writes */ + handle->pevent = tracecmd_get_tep(ihandle); + tep_ref(handle->pevent); + handle->page_size = tracecmd_page_size(ihandle); + handle->file_version = tracecmd_get_in_file_version(ihandle); + handle->options_start = get_last_option_offset(ihandle); + handle->strings_offs = get_meta_strings_size(ihandle); + list_head_init(&handle->options); + list_head_init(&handle->buffers); + + if (!tracecmd_get_file_compress_proto(ihandle, &cname, &cver)) { + handle->compress = tracecmd_compress_alloc(cname, cver, handle->fd, + handle->pevent, handle->msg_handle); + if (!handle->compress) + goto out_free; + } + tracecmd_close(ihandle); + + return handle; + + out_free: + tracecmd_close(ihandle); + free(handle); + return NULL; +} + +/** + * tracecmd_output_create - Create new output handle to a trace file with given name + * @output_file: Name of the trace file that will be created. + * + * The @output_file parameter can be NULL. In this case the output handle is created + * and initialized, but is not associated with a file. + * + * Returns pointer to created outpuy handle, or NULL in case of an error. + */ +struct tracecmd_output *tracecmd_output_create(const char *output_file) +{ + struct tracecmd_output *out; + int fd = -1; + + if (output_file) { + fd = open(output_file, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); + if (fd < 0) + return NULL; + } + out = tracecmd_output_create_fd(fd); + if (!out && fd >= 0) { + close(fd); + unlink(output_file); + } + + return out; +} + +/** + * tracecmd_copy - copy the headers of one trace.dat file for another + * @ihandle: input handle of the trace.dat file to copy + * @file: the trace.dat file to create + * @state: what data will be copied from the source handle + * @file_version: version of the output file + * @compression: compression of the output file, can be one of: + * NULL - inherit compression from the input file + * "any" - compress the output file with the best available algorithm + * "none" - do not compress the output file + * algorithm_name - compress the output file with specified algorithm + * + * Reads the header information and creates a new trace data file + * with the same characteristics (events and all) and returns + * tracecmd_output handle to this new file. + */ +struct tracecmd_output *tracecmd_copy(struct tracecmd_input *ihandle, const char *file, + enum tracecmd_file_states state, int file_version, + const char *compression) +{ + enum tracecmd_file_states fstate; + struct tracecmd_output *handle; + + handle = tracecmd_output_create(file); + if (!handle) + return NULL; + + if (tracecmd_output_set_from_input(handle, ihandle)) + goto out_free; + + if (file_version >= FILE_VERSION_MIN) + tracecmd_output_set_version(handle, file_version); + + if (compression && tracecmd_output_set_compression(handle, compression)) + goto out_free; + + output_write_init(handle); + fstate = state > TRACECMD_FILE_CPU_COUNT ? TRACECMD_FILE_CPU_COUNT : state; + if (tracecmd_copy_headers(ihandle, handle, 0, fstate) < 0) + goto out_free; + + if (tracecmd_copy_buffer_descr(ihandle, handle) < 0) + goto out_free; + + if (state >= TRACECMD_FILE_OPTIONS && + tracecmd_copy_options(ihandle, handle) < 0) + goto out_free; + + if (state >= TRACECMD_FILE_CPU_LATENCY && + tracecmd_copy_trace_data(ihandle, handle) < 0) + goto out_free; + + if (HAS_SECTIONS(handle)) + tracecmd_write_options(handle); + + /* The file is all ready to have cpu data attached */ + return handle; + +out_free: + if (handle) + tracecmd_output_close(handle); + + unlink(file); + return NULL; +} + +__hidden void out_set_file_state(struct tracecmd_output *handle, int new_state) +{ + handle->file_state = new_state; +} + +__hidden bool check_out_state(struct tracecmd_output *handle, int new_state) +{ + return check_file_state(handle->file_version, handle->file_state, new_state); +} + +__hidden bool out_check_compression(struct tracecmd_output *handle) +{ + return (handle->compress != NULL); +} + +__hidden int out_save_options_offset(struct tracecmd_output *handle, unsigned long long start) +{ + unsigned long long new, en8; + + if (HAS_SECTIONS(handle)) { + /* Append to the previous options section, if any */ + if (!handle->options_start) + return -1; + + new = do_lseek(handle, 0, SEEK_CUR); + if (do_lseek(handle, handle->options_start, SEEK_SET) == (off64_t)-1) + return -1; + + en8 = convert_endian_8(handle, start); + if (do_write_check(handle, &en8, 8)) + return -1; + + handle->options_start = new; + if (do_lseek(handle, new, SEEK_SET) == (off64_t)-1) + return -1; + } else { + handle->options_start = start; + } + + return 0; +} + +/** + * tracecmd_get_out_file_version - return the trace.dat file version + * @handle: output handle for the trace.dat file + */ +unsigned long tracecmd_get_out_file_version(struct tracecmd_output *handle) +{ + return handle->file_version; +} + +unsigned long long tracecmd_get_out_file_offset(struct tracecmd_output *handle) +{ + return do_lseek(handle, 0, SEEK_CUR); +} diff --git a/lib/trace-cmd/trace-perf.c b/lib/trace-cmd/trace-perf.c new file mode 100644 index 00000000..a10da55d --- /dev/null +++ b/lib/trace-cmd/trace-perf.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2021, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com> + * + */ +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> + +#include "trace-cmd-private.h" + +static void default_perf_init_pe(struct perf_event_attr *pe) +{ + pe->type = PERF_TYPE_SOFTWARE; + pe->sample_type = PERF_SAMPLE_CPU; + pe->size = sizeof(struct perf_event_attr); + pe->config = PERF_COUNT_HW_CPU_CYCLES; + pe->disabled = 1; + pe->exclude_kernel = 1; + pe->freq = 1; + pe->sample_freq = 1000; + pe->inherit = 1; + pe->mmap = 1; + pe->comm = 1; + pe->task = 1; + pe->precise_ip = 1; + pe->sample_id_all = 1; + pe->read_format = PERF_FORMAT_ID | + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; +} + +/** + * trace_perf_init - Initialize perf context + * + * @perf: structure, representing perf context, that will be initialized. + * @pages: Number of perf memory mapped pages. + * @cpu: CPU number, associated with this perf context. + * @pid: PID, associated with this perf context. + * + * The perf context in initialized with default values. The caller can set + * custom perf parameters in perf->pe, before calling trace_perf_open() API. + * + * Returns 0 on success, or -1 in case of an error. + * + */ +int __hidden trace_perf_init(struct trace_perf *perf, int pages, int cpu, int pid) +{ + if (!perf) + return -1; + + memset(perf, 0, sizeof(struct trace_perf)); + default_perf_init_pe(&perf->pe); + perf->cpu = cpu; + perf->pages = pages; + perf->pid = pid; + perf->fd = -1; + + return 0; +} + +/** + * trace_perf_close - Close perf session + * + * @perf: structure, representing context of a running perf session, opened + * with trace_perf_open() + * + */ +void __hidden trace_perf_close(struct trace_perf *perf) +{ + if (perf->fd >= 0) + close(perf->fd); + perf->fd = -1; + if (perf->mmap && perf->mmap != MAP_FAILED) + munmap(perf->mmap, (perf->pages + 1) * getpagesize()); + perf->mmap = NULL; +} + +/** + * trace_perf_open - Open perf session + * + * @perf: structure, representing perf context that will be opened. It must be + * initialized with trace_perf_init(). + * + * Returns 0 on success, or -1 in case of an error. In case of success, the + * session must be closed with trace_perf_close() + */ +int __hidden trace_perf_open(struct trace_perf *perf) +{ + perf->fd = syscall(__NR_perf_event_open, &perf->pe, perf->pid, perf->cpu, -1, 0); + if (perf->fd < 0) + return -1; + fcntl(perf->fd, F_SETFL, O_NONBLOCK); + + perf->mmap = mmap(NULL, (perf->pages + 1) * getpagesize(), + PROT_READ | PROT_WRITE, MAP_SHARED, perf->fd, 0); + if (perf->mmap == MAP_FAILED) + goto error; + + return 0; + +error: + trace_perf_close(perf); + return -1; +} diff --git a/lib/trace-cmd/trace-plugin.c b/lib/trace-cmd/trace-plugin.c new file mode 100644 index 00000000..127771ea --- /dev/null +++ b/lib/trace-cmd/trace-plugin.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdlib.h> +#include <unistd.h> +#include <dlfcn.h> +#include <sys/stat.h> +#include <libgen.h> +#include "trace-cmd.h" +#include "trace-local.h" +#include "trace-cmd-local.h" + +#define LOCAL_PLUGIN_DIR ".local/lib/trace-cmd/plugins/" + +struct trace_plugin_list { + struct trace_plugin_list *next; + char *name; + void *handle; +}; + +struct trace_plugin_context { + enum tracecmd_context context; + enum tracecmd_plugin_flag flags; + union { + void *data; + struct tracecmd_input *trace_input; + struct tracecmd_output *trace_output; + }; +}; + +/** + * tracecmd_plugin_context_create - Create and initialize tracecmd plugins context. + * @context: Context of the trace-cmd command. + * @data: Pointer to the context specific data, which will be passed to plugins. + * + * Returns a pointer to created tracecmd plugins context, or NULL in case memory + * allocation fails. The returned pointer should be freed by free (). + */ +struct trace_plugin_context * +tracecmd_plugin_context_create(enum tracecmd_context context, void *data) +{ + struct trace_plugin_context *trace; + + trace = calloc(1, sizeof(struct trace_plugin_context)); + if (!trace) + return NULL; + trace->context = context; + trace->data = data; + return trace; +} + +/** + * tracecmd_plugin_set_flag - Set a flag to tracecmd plugins context. + * @context: Context of the trace-cmd command. + * @flag: Flag, whil will be set. + * + */ +void tracecmd_plugin_set_flag(struct trace_plugin_context *context, + enum tracecmd_plugin_flag flag) +{ + if (context) + context->flags |= flag; +} + +/** + * tracecmd_plugin_context_input - Get a tracecmd_input plugin context. + * @context: Context of the trace-cmd command. + * + * Returns pointer to tracecmd_input, if such context is available or + * NULL otherwise. + */ +struct tracecmd_input * +tracecmd_plugin_context_input(struct trace_plugin_context *context) +{ + if (!context || context->context != TRACECMD_INPUT) + return NULL; + return context->trace_input; +} + +/** + * tracecmd_plugin_context_output - Get a tracecmd_output plugin context + * @context: Context of the trace-cmd command. + * + * Returns pointer to tracecmd_output, if such context is available or + * NULL otherwise. + */ +struct tracecmd_output * +tracecmd_plugin_context_output(struct trace_plugin_context *context) +{ + if (!context || context->context != TRACECMD_OUTPUT) + return NULL; + return context->trace_output; +} + +static void +load_plugin(struct trace_plugin_context *trace, const char *path, + const char *file, void *data) +{ + struct trace_plugin_list **plugin_list = data; + tracecmd_plugin_load_func func; + struct trace_plugin_list *list; + const char *alias; + char *plugin; + void *handle; + int ret; + + ret = asprintf(&plugin, "%s/%s", path, file); + if (ret < 0) { + tracecmd_warning("could not allocate plugin memory"); + return; + } + + handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL); + if (!handle) { + tracecmd_warning("could not load plugin '%s'\n%s", plugin, dlerror()); + goto out_free; + } + + alias = dlsym(handle, TRACECMD_PLUGIN_ALIAS_NAME); + if (!alias) + alias = file; + + func = dlsym(handle, TRACECMD_PLUGIN_LOADER_NAME); + if (!func) { + tracecmd_warning("could not find func '%s' in plugin '%s'\n%s", + TRACECMD_PLUGIN_LOADER_NAME, plugin, dlerror()); + goto out_free; + } + + list = malloc(sizeof(*list)); + if (!list) { + tracecmd_warning("could not allocate plugin memory"); + goto out_free; + } + + list->next = *plugin_list; + list->handle = handle; + list->name = plugin; + *plugin_list = list; + + tracecmd_info("registering plugin: %s", plugin); + func(trace); + return; + + out_free: + free(plugin); +} + +static void +load_plugins_dir(struct trace_plugin_context *trace, const char *suffix, + const char *path, + void (*load_plugin)(struct trace_plugin_context *trace, + const char *path, + const char *name, + void *data), + void *data) +{ + struct dirent *dent; + struct stat st; + DIR *dir; + int ret; + + ret = stat(path, &st); + if (ret < 0) + return; + + if (!S_ISDIR(st.st_mode)) + return; + + dir = opendir(path); + if (!dir) + return; + + while ((dent = readdir(dir))) { + const char *name = dent->d_name; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0) + continue; + + /* Only load plugins that end in suffix */ + if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0) + continue; + + load_plugin(trace, path, name, data); + } + + closedir(dir); +} + +static char *get_source_plugins_dir(void) +{ + char *p, path[PATH_MAX+1]; + int ret; + + ret = readlink("/proc/self/exe", path, PATH_MAX); + if (ret > PATH_MAX || ret < 0) + return NULL; + + path[ret] = 0; + dirname(path); + p = strrchr(path, '/'); + if (!p) + return NULL; + /* Check if we are in the the source tree */ + if (strcmp(p, "/tracecmd") != 0) + return NULL; + + strcpy(p, "/lib/trace-cmd/plugins"); + return strdup(path); +} + +static void +load_plugins_hook(struct trace_plugin_context *trace, const char *suffix, + void (*load_plugin)(struct trace_plugin_context *trace, + const char *path, + const char *name, + void *data), + void *data) +{ + char *home; + char *path; + char *envdir; + int ret; + + if (trace && trace->flags & TRACECMD_DISABLE_PLUGINS) + return; + + /* + * If a system plugin directory was defined, + * check that first. + */ +#ifdef PLUGIN_TRACECMD_DIR + if (!trace || !(trace->flags & TRACECMD_DISABLE_SYS_PLUGINS)) + load_plugins_dir(trace, suffix, PLUGIN_TRACECMD_DIR, + load_plugin, data); +#endif + + /* + * Next let the environment-set plugin directory + * override the system defaults. + */ + envdir = getenv("TRACECMD_PLUGIN_DIR"); + if (envdir) + load_plugins_dir(trace, suffix, envdir, load_plugin, data); + + /* + * Now let the home directory override the environment + * or system defaults. + */ + home = getenv("HOME"); + if (!home) + return; + + ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR); + if (ret < 0) { + tracecmd_warning("could not allocate plugin memory"); + return; + } + + load_plugins_dir(trace, suffix, path, load_plugin, data); + + free(path); + + path = get_source_plugins_dir(); + if (path) { + load_plugins_dir(trace, suffix, path, load_plugin, data); + free(path); + } +} + +/** + * tracecmd_load_plugins - Load trace-cmd specific plugins. + * @context: Context of the trace-cmd command, will be passed to the plugins + * at load time. + * + * Returns a list of loaded plugins + */ +struct trace_plugin_list* +tracecmd_load_plugins(struct trace_plugin_context *trace) +{ + struct trace_plugin_list *list = NULL; + + load_plugins_hook(trace, ".so", load_plugin, &list); + return list; +} + +/** + * tracecmd_unload_plugins - Unload trace-cmd specific plugins. + * @plugin_list - List of plugins, previously loaded with tracecmd_load_plugins. + * @context: Context of the trace-cmd command, will be passed to the plugins + * at unload time. + * + */ +void +tracecmd_unload_plugins(struct trace_plugin_list *plugin_list, + struct trace_plugin_context *trace) +{ + tracecmd_plugin_unload_func func; + struct trace_plugin_list *list; + + while (plugin_list) { + list = plugin_list; + plugin_list = list->next; + func = dlsym(list->handle, TRACECMD_PLUGIN_UNLOADER_NAME); + if (func) + func(trace); + dlclose(list->handle); + free(list->name); + free(list); + } +} diff --git a/lib/trace-cmd/trace-recorder.c b/lib/trace-cmd/trace-recorder.c new file mode 100644 index 00000000..c8333789 --- /dev/null +++ b/lib/trace-cmd/trace-recorder.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#define _LARGEFILE64_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <time.h> +#include <poll.h> +#include <unistd.h> +#include <errno.h> + +#include "tracefs.h" +#include "trace-cmd-private.h" +#include "trace-cmd-local.h" +#include "event-utils.h" + +/* F_GETPIPE_SZ was introduced in 2.6.35, older systems don't have it */ +#ifndef F_GETPIPE_SZ +# define F_GETPIPE_SZ 1032 /* The Linux number for the option */ +#endif + +#ifndef SPLICE_F_MOVE +# define SPLICE_F_MOVE 1 +# define SPLICE_F_NONBLOCK 2 +# define SPLICE_F_MORE 4 +# define SPLICE_F_GIFT 8 +#endif + +#define POLL_TIMEOUT_MS 1000 + +struct tracecmd_recorder { + int fd; + int fd1; + int fd2; + int trace_fd; + int brass[2]; + int pipe_size; + int page_size; + int cpu; + int stop; + int max; + int pages; + int count; + unsigned fd_flags; + unsigned trace_fd_flags; + unsigned flags; +}; + +static int append_file(int size, int dst, int src) +{ + char buf[size]; + int r; + + lseek64(src, 0, SEEK_SET); + + /* If there's an error, then we are pretty much screwed :-p */ + do { + r = read(src, buf, size); + if (r < 0) + return r; + r = write(dst, buf, r); + if (r < 0) + return r; + } while (r); + return 0; +} + +void tracecmd_free_recorder(struct tracecmd_recorder *recorder) +{ + if (!recorder) + return; + + if (recorder->max) { + /* Need to put everything into fd1 */ + if (recorder->fd == recorder->fd1) { + int ret; + /* + * Crap, the older data is in fd2, and we need + * to append fd1 onto it, and then copy over to fd1 + */ + ret = append_file(recorder->page_size, + recorder->fd2, recorder->fd1); + /* Error on copying, then just keep fd1 */ + if (ret) { + lseek64(recorder->fd1, 0, SEEK_END); + goto close; + } + lseek64(recorder->fd1, 0, SEEK_SET); + ftruncate(recorder->fd1, 0); + } + append_file(recorder->page_size, recorder->fd1, recorder->fd2); + } + close: + if (recorder->brass[0] >= 0) + close(recorder->brass[0]); + + if (recorder->brass[1] >= 0) + close(recorder->brass[1]); + + if (recorder->trace_fd >= 0) + close(recorder->trace_fd); + + if (recorder->fd1 >= 0) + close(recorder->fd1); + + if (recorder->fd2 >= 0) + close(recorder->fd2); + + free(recorder); +} + +static void set_nonblock(struct tracecmd_recorder *recorder) +{ + long flags; + + /* Do not block on reads */ + flags = fcntl(recorder->trace_fd, F_GETFL); + fcntl(recorder->trace_fd, F_SETFL, flags | O_NONBLOCK); + + /* Do not block on streams */ + recorder->fd_flags |= SPLICE_F_NONBLOCK; +} + +struct tracecmd_recorder * +tracecmd_create_buffer_recorder_fd2(int fd, int fd2, int cpu, unsigned flags, + const char *buffer, int maxkb) +{ + struct tracecmd_recorder *recorder; + char *path = NULL; + int pipe_size = 0; + int ret; + + recorder = malloc(sizeof(*recorder)); + if (!recorder) + return NULL; + + recorder->cpu = cpu; + recorder->flags = flags; + + recorder->fd_flags = SPLICE_F_MOVE; + + if (!(recorder->flags & TRACECMD_RECORD_BLOCK_SPLICE)) + recorder->fd_flags |= SPLICE_F_NONBLOCK; + + recorder->trace_fd_flags = SPLICE_F_MOVE; + + /* Init to know what to free and release */ + recorder->trace_fd = -1; + recorder->brass[0] = -1; + recorder->brass[1] = -1; + + recorder->page_size = getpagesize(); + if (maxkb) { + int kb_per_page = recorder->page_size >> 10; + + if (!kb_per_page) + kb_per_page = 1; + recorder->max = maxkb / kb_per_page; + /* keep max half */ + recorder->max >>= 1; + if (!recorder->max) + recorder->max = 1; + } else + recorder->max = 0; + + recorder->count = 0; + recorder->pages = 0; + + /* fd always points to what to write to */ + recorder->fd = fd; + recorder->fd1 = fd; + recorder->fd2 = fd2; + + if (buffer) { + if (flags & TRACECMD_RECORD_SNAPSHOT) + ret = asprintf(&path, "%s/per_cpu/cpu%d/snapshot_raw", + buffer, cpu); + else + ret = asprintf(&path, "%s/per_cpu/cpu%d/trace_pipe_raw", + buffer, cpu); + if (ret < 0) + goto out_free; + + recorder->trace_fd = open(path, O_RDONLY); + free(path); + + if (recorder->trace_fd < 0) + goto out_free; + } + + if (!(recorder->flags & (TRACECMD_RECORD_NOSPLICE | + TRACECMD_RECORD_NOBRASS))) { + ret = pipe(recorder->brass); + if (ret < 0) + goto out_free; + + ret = fcntl(recorder->brass[0], F_GETPIPE_SZ, &pipe_size); + /* + * F_GETPIPE_SZ was introduced in 2.6.35, ftrace was introduced + * in 2.6.31. If we are running on an older kernel, just fall + * back to using page_size for splice(). It could also return + * success, but not modify pipe_size. + */ + if (ret < 0 || !pipe_size) + pipe_size = recorder->page_size; + + recorder->pipe_size = pipe_size; + } + + if (recorder->flags & TRACECMD_RECORD_POLL) + set_nonblock(recorder); + + return recorder; + + out_free: + tracecmd_free_recorder(recorder); + return NULL; +} + +struct tracecmd_recorder * +tracecmd_create_buffer_recorder_fd(int fd, int cpu, unsigned flags, const char *buffer) +{ + return tracecmd_create_buffer_recorder_fd2(fd, -1, cpu, flags, buffer, 0); +} + +static struct tracecmd_recorder * +__tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags, + const char *buffer) +{ + struct tracecmd_recorder *recorder; + int fd; + + fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); + if (fd < 0) + return NULL; + + recorder = tracecmd_create_buffer_recorder_fd(fd, cpu, flags, buffer); + if (!recorder) { + close(fd); + unlink(file); + } + + return recorder; +} + +struct tracecmd_recorder * +tracecmd_create_buffer_recorder_maxkb(const char *file, int cpu, unsigned flags, + const char *buffer, int maxkb) +{ + struct tracecmd_recorder *recorder = NULL; + char *file2; + int len; + int fd; + int fd2; + + if (!maxkb) + return tracecmd_create_buffer_recorder(file, cpu, flags, buffer); + + len = strlen(file); + file2 = malloc(len + 3); + if (!file2) + return NULL; + + sprintf(file2, "%s.1", file); + + fd = open(file, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); + if (fd < 0) + goto out; + + fd2 = open(file2, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); + if (fd2 < 0) + goto err; + + recorder = tracecmd_create_buffer_recorder_fd2(fd, fd2, cpu, flags, buffer, maxkb); + if (!recorder) + goto err2; + out: + /* Unlink file2, we need to add everything to file at the end */ + unlink(file2); + free(file2); + + return recorder; + err2: + close(fd2); + err: + close(fd); + unlink(file); + goto out; +} + +struct tracecmd_recorder * +tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags, + const char *buffer) +{ + return __tracecmd_create_buffer_recorder(file, cpu, flags, buffer); +} + +/** + * tracecmd_create_recorder_virt - Create a recorder reading tracing data + * from the trace_fd file descriptor instead of from the local tracefs + * @file: output filename where tracing data will be written + * @cpu: which CPU is being traced + * @flags: flags configuring the recorder (see TRACECMD_RECORDER_* enums) + * @trace_fd: file descriptor from where tracing data will be read + */ +struct tracecmd_recorder * +tracecmd_create_recorder_virt(const char *file, int cpu, unsigned flags, + int trace_fd) +{ + struct tracecmd_recorder *recorder; + + recorder = __tracecmd_create_buffer_recorder(file, cpu, flags, NULL); + if (recorder) + recorder->trace_fd = trace_fd; + + return recorder; +} + +struct tracecmd_recorder *tracecmd_create_recorder_fd(int fd, int cpu, unsigned flags) +{ + const char *tracing; + + tracing = tracefs_tracing_dir(); + if (!tracing) { + errno = ENODEV; + return NULL; + } + + return tracecmd_create_buffer_recorder_fd(fd, cpu, flags, tracing); +} + +struct tracecmd_recorder *tracecmd_create_recorder(const char *file, int cpu, unsigned flags) +{ + const char *tracing; + + tracing = tracefs_tracing_dir(); + if (!tracing) { + errno = ENODEV; + return NULL; + } + + return tracecmd_create_buffer_recorder(file, cpu, flags, tracing); +} + +struct tracecmd_recorder * +tracecmd_create_recorder_maxkb(const char *file, int cpu, unsigned flags, int maxkb) +{ + const char *tracing; + + tracing = tracefs_tracing_dir(); + if (!tracing) { + errno = ENODEV; + return NULL; + } + + return tracecmd_create_buffer_recorder_maxkb(file, cpu, flags, tracing, maxkb); +} + +static inline void update_fd(struct tracecmd_recorder *recorder, int size) +{ + int fd; + + if (!recorder->max) + return; + + recorder->count += size; + + if (recorder->count >= recorder->page_size) { + recorder->count = 0; + recorder->pages++; + } + + if (recorder->pages < recorder->max) + return; + + recorder->pages = 0; + + fd = recorder->fd; + + /* Swap fd to next file. */ + if (fd == recorder->fd1) + fd = recorder->fd2; + else + fd = recorder->fd1; + + /* Zero out the new file we are writing to */ + lseek64(fd, 0, SEEK_SET); + ftruncate(fd, 0); + + recorder->fd = fd; +} + +/* + * Returns -1 on error. + * or bytes of data read. + */ +static long splice_data(struct tracecmd_recorder *recorder) +{ + long total_read = 0; + long read; + long ret; + + read = splice(recorder->trace_fd, NULL, recorder->brass[1], NULL, + recorder->pipe_size, recorder->trace_fd_flags); + if (read < 0) { + if (errno == EAGAIN || errno == EINTR || errno == ENOTCONN) + return 0; + + tracecmd_warning("recorder error in splice input"); + return -1; + } else if (read == 0) + return 0; + + again: + ret = splice(recorder->brass[0], NULL, recorder->fd, NULL, + read, recorder->fd_flags); + if (ret < 0) { + if (errno != EAGAIN && errno != EINTR) { + tracecmd_warning("recorder error in splice output"); + return -1; + } + return total_read; + } else + update_fd(recorder, ret); + total_read = ret; + read -= ret; + if (read) + goto again; + + return total_read; +} + +/* + * Returns -1 on error. + * or bytes of data read. + */ +static long direct_splice_data(struct tracecmd_recorder *recorder) +{ + struct pollfd pfd = { + .fd = recorder->trace_fd, + .events = POLLIN, + }; + long read; + int ret; + + /* + * splice(2) in Linux used to not check O_NONBLOCK flag of pipe file + * descriptors before [1]. To avoid getting blocked in the splice(2) + * call below after the user had requested to stop tracing, we poll(2) + * here. This poll() is not necessary on newer kernels. + * + * [1] https://github.com/torvalds/linux/commit/ee5e001196d1345b8fee25925ff5f1d67936081e + */ + ret = poll(&pfd, 1, POLL_TIMEOUT_MS); + if (ret < 0) + return -1; + + if (!(pfd.revents | POLLIN)) + return 0; + + read = splice(recorder->trace_fd, NULL, recorder->fd, NULL, + recorder->pipe_size, recorder->fd_flags); + if (read < 0) { + if (errno == EAGAIN || errno == EINTR || errno == ENOTCONN) + return 0; + + tracecmd_warning("recorder error in splice input"); + return -1; + } + + return read; +} + +/* + * Returns -1 on error. + * or bytes of data read. + */ +static long read_data(struct tracecmd_recorder *recorder) +{ + char buf[recorder->page_size]; + long left; + long r, w; + + r = read(recorder->trace_fd, buf, recorder->page_size); + if (r < 0) { + if (errno == EAGAIN || errno == EINTR || errno == ENOTCONN) + return 0; + + tracecmd_warning("recorder error in read input"); + return -1; + } + + left = r; + do { + w = write(recorder->fd, buf + (r - left), left); + if (w > 0) { + left -= w; + update_fd(recorder, w); + } + } while (w >= 0 && left); + + if (w < 0) + r = w; + + return r; +} + +static long move_data(struct tracecmd_recorder *recorder) +{ + if (recorder->flags & TRACECMD_RECORD_NOSPLICE) + return read_data(recorder); + + if (recorder->flags & TRACECMD_RECORD_NOBRASS) + return direct_splice_data(recorder); + + return splice_data(recorder); +} + +long tracecmd_flush_recording(struct tracecmd_recorder *recorder) +{ + char buf[recorder->page_size]; + long total = 0; + long wrote = 0; + long ret; + + set_nonblock(recorder); + + do { + ret = move_data(recorder); + if (ret < 0) + return ret; + total += ret; + } while (ret); + + /* splice only reads full pages */ + do { + ret = read(recorder->trace_fd, buf, recorder->page_size); + if (ret > 0) { + write(recorder->fd, buf, ret); + wrote += ret; + } + + } while (ret > 0); + + /* Make sure we finish off with a page size boundary */ + wrote &= recorder->page_size - 1; + if (wrote) { + memset(buf, 0, recorder->page_size); + write(recorder->fd, buf, recorder->page_size - wrote); + total += recorder->page_size; + } + + return total; +} + +int tracecmd_start_recording(struct tracecmd_recorder *recorder, unsigned long sleep) +{ + struct timespec req = { + .tv_sec = sleep / 1000000, + .tv_nsec = (sleep % 1000000) * 1000, + }; + long read = 1; + long ret; + + recorder->stop = 0; + + do { + /* Only sleep if we did not read anything last time */ + if (!read && sleep) + nanosleep(&req, NULL); + + read = 0; + do { + ret = move_data(recorder); + if (ret < 0) + return ret; + read += ret; + } while (ret); + } while (!recorder->stop); + + /* Flush out the rest */ + ret = tracecmd_flush_recording(recorder); + + if (ret < 0) + return ret; + + return 0; +} + +void tracecmd_stop_recording(struct tracecmd_recorder *recorder) +{ + if (!recorder) + return; + + set_nonblock(recorder); + + recorder->stop = 1; +} diff --git a/lib/trace-cmd/trace-timesync-kvm.c b/lib/trace-cmd/trace-timesync-kvm.c new file mode 100644 index 00000000..12a22d4c --- /dev/null +++ b/lib/trace-cmd/trace-timesync-kvm.c @@ -0,0 +1,559 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2020, VMware, Tzvetomir Stoyanov tz.stoyanov@gmail.com> + * + */ + +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/stat.h> +#include <dirent.h> +#include <ctype.h> + +#include "trace-cmd.h" +#include "trace-cmd-private.h" +#include "tracefs.h" +#include "trace-tsync-local.h" + +#define KVM_DEBUG_FS "/sys/kernel/debug/kvm" +#define KVM_DEBUG_OFFSET_FILE "tsc-offset" +#define KVM_DEBUG_SCALING_FILE "tsc-scaling-ratio" +#define KVM_DEBUG_FRACTION_FILE "tsc-scaling-ratio-frac-bits" +#define KVM_DEBUG_VCPU_DIR "vcpu" + +/* default KVM scaling values, taken from the Linux kernel */ +#define KVM_SCALING_AMD_DEFAULT (1ULL<<32) +#define KVM_SCALING_INTEL_DEFAULT (1ULL<<48) + +#define KVM_SYNC_PKT_REQUEST 1 +#define KVM_SYNC_PKT_RESPONSE 2 + +typedef __s64 s64; + +#define KVM_ACCURACY 0 +#define KVM_NAME "kvm" + +struct kvm_clock_sync { + int vcpu_count; + char **vcpu_offsets; + char **vcpu_scalings; + char **vcpu_frac; + int marker_fd; + struct tep_handle *tep; + int raw_id; + unsigned long long ts; +}; + +struct kvm_clock_offset_msg { + s64 ts; + s64 offset; + s64 scaling; + s64 frac; +}; + +static int read_ll_from_file(char *file, long long *res) +{ + char buf[32]; + int ret; + int fd; + + if (!file) + return -1; + fd = open(file, O_RDONLY | O_NONBLOCK); + if (fd < 0) + return -1; + ret = read(fd, buf, 32); + close(fd); + if (ret <= 0) + return -1; + + *res = strtoll(buf, NULL, 0); + + return 0; +} + +static bool kvm_scaling_check_vm_cpu(char *vname, char *cpu) +{ + long long scaling, frac; + bool has_scaling = false; + bool has_frac = false; + char *path; + int ret; + + if (asprintf(&path, "%s/%s/%s", vname, cpu, KVM_DEBUG_SCALING_FILE) < 0) + return false; + ret = read_ll_from_file(path, &scaling); + free(path); + if (!ret) + has_scaling = true; + + if (asprintf(&path, "%s/%s/%s", vname, cpu, KVM_DEBUG_FRACTION_FILE) < 0) + return false; + ret = read_ll_from_file(path, &frac); + free(path); + if (!ret) + has_frac = true; + + if (has_scaling != has_frac) + return false; + + return true; +} + +static bool kvm_scaling_check_vm(char *name) +{ + struct dirent *entry; + char *vdir; + DIR *dir; + + if (asprintf(&vdir, "%s/%s", KVM_DEBUG_FS, name) < 0) + return true; + + dir = opendir(vdir); + if (!dir) { + free(vdir); + return true; + } + while ((entry = readdir(dir))) { + if (entry->d_type == DT_DIR && !strncmp(entry->d_name, "vcpu", 4) && + !kvm_scaling_check_vm_cpu(vdir, entry->d_name)) + break; + } + + closedir(dir); + free(vdir); + return entry == NULL; +} +static bool kvm_scaling_check(void) +{ + struct dirent *entry; + DIR *dir; + + dir = opendir(KVM_DEBUG_FS); + if (!dir) + return true; + + while ((entry = readdir(dir))) { + if (entry->d_type == DT_DIR && isdigit(entry->d_name[0]) && + !kvm_scaling_check_vm(entry->d_name)) + break; + } + closedir(dir); + return entry == NULL; +} + +static bool kvm_support_check(bool guest) +{ + struct stat st; + int ret; + + if (guest) + return true; + + ret = stat(KVM_DEBUG_FS, &st); + if (ret < 0) + return false; + + if (!S_ISDIR(st.st_mode)) + return false; + + return kvm_scaling_check(); +} + +static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str) +{ + struct dirent *entry; + char path[PATH_MAX]; + DIR *dir; + + dir = opendir(dir_str); + if (!dir) + goto error; + while ((entry = readdir(dir))) { + if (entry->d_type != DT_DIR) { + if (!strcmp(entry->d_name, KVM_DEBUG_OFFSET_FILE)) { + snprintf(path, sizeof(path), "%s/%s", + dir_str, entry->d_name); + kvm->vcpu_offsets[cpu] = strdup(path); + } + if (!strcmp(entry->d_name, KVM_DEBUG_SCALING_FILE)) { + snprintf(path, sizeof(path), "%s/%s", + dir_str, entry->d_name); + kvm->vcpu_scalings[cpu] = strdup(path); + } + if (!strcmp(entry->d_name, KVM_DEBUG_FRACTION_FILE)) { + snprintf(path, sizeof(path), "%s/%s", + dir_str, entry->d_name); + kvm->vcpu_frac[cpu] = strdup(path); + } + } + } + if (!kvm->vcpu_offsets[cpu]) + goto error; + closedir(dir); + return 0; + +error: + if (dir) + closedir(dir); + free(kvm->vcpu_offsets[cpu]); + kvm->vcpu_offsets[cpu] = NULL; + free(kvm->vcpu_scalings[cpu]); + kvm->vcpu_scalings[cpu] = NULL; + free(kvm->vcpu_frac[cpu]); + kvm->vcpu_frac[cpu] = NULL; + return -1; +} + +static int kvm_open_debug_files(struct kvm_clock_sync *kvm, int pid) +{ + char *vm_dir_str = NULL; + struct dirent *entry; + char *pid_str = NULL; + char path[PATH_MAX]; + long vcpu; + DIR *dir; + int i; + + dir = opendir(KVM_DEBUG_FS); + if (!dir) + goto error; + if (asprintf(&pid_str, "%d-", pid) <= 0) + goto error; + while ((entry = readdir(dir))) { + if (!(entry->d_type == DT_DIR && + !strncmp(entry->d_name, pid_str, strlen(pid_str)))) + continue; + asprintf(&vm_dir_str, "%s/%s", KVM_DEBUG_FS, entry->d_name); + break; + } + closedir(dir); + dir = NULL; + if (!vm_dir_str) + goto error; + dir = opendir(vm_dir_str); + if (!dir) + goto error; + while ((entry = readdir(dir))) { + if (!(entry->d_type == DT_DIR && + !strncmp(entry->d_name, KVM_DEBUG_VCPU_DIR, strlen(KVM_DEBUG_VCPU_DIR)))) + continue; + vcpu = strtol(entry->d_name + strlen(KVM_DEBUG_VCPU_DIR), NULL, 10); + if (vcpu < 0 || vcpu >= kvm->vcpu_count) + continue; + snprintf(path, sizeof(path), "%s/%s", vm_dir_str, entry->d_name); + if (kvm_open_vcpu_dir(kvm, vcpu, path) < 0) + goto error; + } + for (i = 0; i < kvm->vcpu_count; i++) { + if (!kvm->vcpu_offsets[i]) + goto error; + } + closedir(dir); + free(pid_str); + free(vm_dir_str); + return 0; +error: + free(pid_str); + free(vm_dir_str); + if (dir) + closedir(dir); + return -1; +} + +static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync, + struct kvm_clock_sync *kvm) +{ + kvm->vcpu_count = tsync->vcpu_count; + kvm->vcpu_offsets = calloc(kvm->vcpu_count, sizeof(char *)); + kvm->vcpu_scalings = calloc(kvm->vcpu_count, sizeof(char *)); + kvm->vcpu_frac = calloc(kvm->vcpu_count, sizeof(char *)); + if (!kvm->vcpu_offsets || !kvm->vcpu_scalings || !kvm->vcpu_frac) + goto error; + if (kvm_open_debug_files(kvm, tsync->guest_pid) < 0) + goto error; + return 0; + +error: + free(kvm->vcpu_offsets); + free(kvm->vcpu_scalings); + free(kvm->vcpu_frac); + return -1; +} + +static int kvm_clock_sync_init_guest(struct tracecmd_time_sync *tsync, + struct kvm_clock_sync *kvm) +{ + const char *systems[] = {"ftrace", NULL}; + struct clock_sync_context *clock_context; + struct tep_event *raw; + char *path; + + clock_context = (struct clock_sync_context *)tsync->context; + path = tracefs_instance_get_dir(clock_context->instance); + if (!path) + goto error; + kvm->tep = tracefs_local_events_system(path, systems); + tracefs_put_tracing_file(path); + if (!kvm->tep) + goto error; + raw = tep_find_event_by_name(kvm->tep, "ftrace", "raw_data"); + if (!raw) + goto error; + + kvm->raw_id = raw->id; + tep_set_file_bigendian(kvm->tep, tracecmd_host_bigendian()); + tep_set_local_bigendian(kvm->tep, tracecmd_host_bigendian()); + + path = tracefs_instance_get_file(clock_context->instance, "trace_marker_raw"); + if (!path) + goto error; + kvm->marker_fd = open(path, O_WRONLY); + tracefs_put_tracing_file(path); + + return 0; + +error: + if (kvm->tep) + tep_free(kvm->tep); + if (kvm->marker_fd >= 0) + close(kvm->marker_fd); + + return -1; +} + +static int kvm_clock_sync_init(struct tracecmd_time_sync *tsync) +{ + struct clock_sync_context *clock_context; + struct kvm_clock_sync *kvm; + int ret; + + if (!tsync || !tsync->context) + return -1; + clock_context = (struct clock_sync_context *)tsync->context; + + if (!kvm_support_check(clock_context->is_guest)) + return -1; + kvm = calloc(1, sizeof(struct kvm_clock_sync)); + if (!kvm) + return -1; + kvm->marker_fd = -1; + if (clock_context->is_guest) + ret = kvm_clock_sync_init_guest(tsync, kvm); + else + ret = kvm_clock_sync_init_host(tsync, kvm); + if (ret < 0) + goto error; + + clock_context->proto_data = kvm; + return 0; + +error: + free(kvm); + return -1; +} + +static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync) +{ + struct clock_sync_context *clock_context; + struct kvm_clock_sync *kvm = NULL; + int i; + + clock_context = (struct clock_sync_context *)tsync->context; + if (clock_context) + kvm = (struct kvm_clock_sync *)clock_context->proto_data; + if (kvm) { + for (i = 0; i < kvm->vcpu_count; i++) { + free(kvm->vcpu_offsets[i]); + kvm->vcpu_offsets[i] = NULL; + free(kvm->vcpu_scalings[i]); + kvm->vcpu_scalings[i] = NULL; + free(kvm->vcpu_frac[i]); + kvm->vcpu_frac[i] = NULL; + } + if (kvm->tep) + tep_free(kvm->tep); + if (kvm->marker_fd >= 0) + close(kvm->marker_fd); + free(kvm); + } + return -1; +} + +static int kvm_clock_host(struct tracecmd_time_sync *tsync, + long long *offset, long long *scaling, long long *frac, + long long *timestamp, unsigned int cpu) +{ + char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH]; + struct clock_sync_context *clock_context; + struct kvm_clock_offset_msg packet; + struct kvm_clock_sync *kvm = NULL; + long long kvm_scaling = 1; + unsigned int sync_msg; + long long kvm_offset; + long long kvm_frac = 0; + unsigned int size; + char *msg; + int ret; + + clock_context = (struct clock_sync_context *)tsync->context; + if (clock_context) + kvm = (struct kvm_clock_sync *)clock_context->proto_data; + if (!kvm || !kvm->vcpu_offsets || !kvm->vcpu_offsets[0]) + return -1; + if (cpu >= kvm->vcpu_count) + return -1; + ret = read_ll_from_file(kvm->vcpu_offsets[cpu], &kvm_offset); + if (ret < 0) + return -1; + + if (kvm->vcpu_scalings && kvm->vcpu_scalings[cpu]) { + read_ll_from_file(kvm->vcpu_scalings[cpu], &kvm_scaling); + if (kvm_scaling == KVM_SCALING_AMD_DEFAULT || + kvm_scaling == KVM_SCALING_INTEL_DEFAULT) + kvm_scaling = 1; + } + + if (kvm->vcpu_frac && kvm->vcpu_frac[cpu] && kvm_scaling != 1) + ret = read_ll_from_file(kvm->vcpu_frac[cpu], &kvm_frac); + msg = (char *)&packet; + size = sizeof(packet); + ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, + sync_proto, &sync_msg, + &size, &msg); + if (ret || strncmp(sync_proto, KVM_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || + sync_msg != KVM_SYNC_PKT_REQUEST) + return -1; + + packet.offset = -kvm_offset; + packet.scaling = kvm_scaling; + packet.frac = kvm_frac; + ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME, + KVM_SYNC_PKT_RESPONSE, sizeof(packet), + (char *)&packet); + if (ret) + return -1; + + *scaling = packet.scaling; + *offset = packet.offset; + *frac = kvm_frac; + *timestamp = packet.ts; + + return 0; +} + +#define KVM_EVENT_MARKER "kvm sync event" +static int kvm_marker_find(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct kvm_clock_sync *kvm = (struct kvm_clock_sync *)context; + struct tep_format_field *field; + struct tep_format_field *id; + char *marker; + + /* Make sure this is our event */ + if (event->id != kvm->raw_id) + return 0; + id = tep_find_field(event, "id"); + field = tep_find_field(event, "buf"); + if (field && id && + record->size >= (id->offset + strlen(KVM_EVENT_MARKER) + 1)) { + marker = (char *)(record->data + id->offset); + if (!strcmp(marker, KVM_EVENT_MARKER)) { + kvm->ts = record->ts; + return 1; + } + } + + return 0; +} + +static int kvm_clock_guest(struct tracecmd_time_sync *tsync, + long long *offset, + long long *scaling, + long long *frac, + long long *timestamp) +{ + char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH]; + struct clock_sync_context *clock_context; + struct kvm_clock_offset_msg packet; + struct kvm_clock_sync *kvm = NULL; + unsigned int sync_msg; + unsigned int size; + char *msg; + int ret; + + clock_context = (struct clock_sync_context *)tsync->context; + if (clock_context) + kvm = (struct kvm_clock_sync *)clock_context->proto_data; + if (!kvm) + return -1; + kvm->ts = 0; + memset(&packet, 0, sizeof(packet)); + tracefs_instance_file_write(clock_context->instance, "trace", "\0"); + write(kvm->marker_fd, KVM_EVENT_MARKER, strlen(KVM_EVENT_MARKER) + 1); + kvm->ts = 0; + tracefs_iterate_raw_events(kvm->tep, clock_context->instance, + NULL, 0, kvm_marker_find, kvm); + packet.ts = kvm->ts; + ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME, + KVM_SYNC_PKT_REQUEST, sizeof(packet), + (char *)&packet); + if (ret) + return -1; + msg = (char *)&packet; + size = sizeof(packet); + ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, + sync_proto, &sync_msg, + &size, &msg); + if (ret || strncmp(sync_proto, KVM_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || + sync_msg != KVM_SYNC_PKT_RESPONSE) + return -1; + + *scaling = packet.scaling; + *offset = packet.offset; + *frac = packet.frac; + *timestamp = packet.ts; + return 0; +} + +static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync, + long long *offset, long long *scaling, long long *frac, + long long *timestamp, unsigned int cpu) +{ + struct clock_sync_context *clock_context; + int ret; + + if (!tsync || !tsync->context) + return -1; + + clock_context = (struct clock_sync_context *)tsync->context; + + if (clock_context->is_guest) + ret = kvm_clock_guest(tsync, offset, scaling, frac, timestamp); + else + ret = kvm_clock_host(tsync, offset, scaling, frac, timestamp, cpu); + return ret; +} + +int kvm_clock_sync_register(void) +{ + int role = TRACECMD_TIME_SYNC_ROLE_GUEST; + int clock = 0; + + if (kvm_support_check(false)) { + role |= TRACECMD_TIME_SYNC_ROLE_HOST; + clock = TRACECMD_CLOCK_X86_TSC; + } + return tracecmd_tsync_proto_register(KVM_NAME, KVM_ACCURACY, + role, clock, 0, + kvm_clock_sync_init, + kvm_clock_sync_free, + kvm_clock_sync_calc); +} + +int kvm_clock_sync_unregister(void) +{ + return tracecmd_tsync_proto_unregister(KVM_NAME); +} diff --git a/lib/trace-cmd/trace-timesync-ptp.c b/lib/trace-cmd/trace-timesync-ptp.c new file mode 100644 index 00000000..20e6e6f1 --- /dev/null +++ b/lib/trace-cmd/trace-timesync-ptp.c @@ -0,0 +1,718 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2019, VMware, Tzvetomir Stoyanov tz.stoyanov@gmail.com> + * + */ + +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/types.h> +#include <linux/types.h> +#include <time.h> +#include <sched.h> +#include <limits.h> + +#include "trace-cmd.h" +#include "trace-cmd-private.h" +#include "tracefs.h" +#include "trace-tsync-local.h" +#include "trace-msg.h" +#include "trace-cmd-local.h" + +typedef __be32 be32; +typedef __u64 u64; +typedef __s64 s64; + +#define PTP_SYNC_LOOP 339 + +#define PTP_SYNC_PKT_START 1 +#define PTP_SYNC_PKT_PROBE 2 +#define PTP_SYNC_PKT_PROBES 3 +#define PTP_SYNC_PKT_OFFSET 4 +#define PTP_SYNC_PKT_END 5 + +/* print time sync debug messages */ +/* #define TSYNC_DEBUG */ + +struct ptp_clock_sync { + struct tep_handle *tep; + struct tep_format_field *id; + int raw_id; + int marker_fd; + int series_id; + int flags; + int debug_fd; +}; + +enum { +/* + * Consider only the probe with fastest response time, + * otherwise make a histogram from all probes. + */ + PTP_FLAG_FASTEST_RESPONSE = (1 << 0), +/* + * Use trace marker to get the clock, + * otherwise use the system clock directly. + */ + PTP_FLAG_USE_MARKER = (1 << 1), +}; +static int ptp_flags = PTP_FLAG_FASTEST_RESPONSE | PTP_FLAG_USE_MARKER; + +/* + * Calculated using formula [CPU rate]*[calculated offset deviation] + * tested on 3GHz CPU, with x86-tsc trace clock and compare the calculated + * offset with /sys/kernel/debug/kvm/<VM ID>/vcpu0/tsc-offset + * measured 2000ns deviation + * using PTP flags PTP_FLAG_FASTEST_RESPONSE | PTP_FLAG_USE_MARKER + */ +#define PTP_ACCURACY 6000 +#define PTP_NAME "ptp" + +struct ptp_clock_start_msg { + be32 series_id; + be32 flags; +} __packed; + +struct ptp_clock_sample { + s64 ts; + be32 id; +} __packed; + +struct ptp_clock_result_msg { + be32 series_id; + be32 count; + struct ptp_clock_sample samples[2*PTP_SYNC_LOOP]; +} __packed; + +struct ptp_clock_offset_msg { + s64 ts; + s64 offset; +}; + +struct ptp_markers_context { + struct clock_sync_context *clock; + struct ptp_clock_sync *ptp; + struct ptp_clock_result_msg msg; + int size; +}; + +struct ptp_marker_buf { + int local_id; + int remote_id; + int count; + int packet_id; +} __packed; + +struct ptp_marker { + int series_id; + struct ptp_marker_buf data; +} __packed; + +static int ptp_clock_sync_init(struct tracecmd_time_sync *tsync) +{ + const char *systems[] = {"ftrace", NULL}; + struct clock_sync_context *clock_context; + struct ptp_clock_sync *ptp; + struct tep_event *raw; + char *path; + + if (!tsync || !tsync->context) + return -1; + clock_context = (struct clock_sync_context *)tsync->context; + if (clock_context->proto_data) + return 0; + + ptp = calloc(1, sizeof(struct ptp_clock_sync)); + if (!ptp) + return -1; + + ptp->marker_fd = -1; + ptp->debug_fd = -1; + + path = tracefs_instance_get_dir(clock_context->instance); + if (!path) + goto error; + ptp->tep = tracefs_local_events_system(path, systems); + tracefs_put_tracing_file(path); + if (!ptp->tep) + goto error; + raw = tep_find_event_by_name(ptp->tep, "ftrace", "raw_data"); + if (!raw) + goto error; + ptp->id = tep_find_field(raw, "id"); + if (!ptp->id) + goto error; + ptp->raw_id = raw->id; + + tep_set_file_bigendian(ptp->tep, tracecmd_host_bigendian()); + tep_set_local_bigendian(ptp->tep, tracecmd_host_bigendian()); + + path = tracefs_instance_get_file(clock_context->instance, "trace_marker_raw"); + if (!path) + goto error; + ptp->marker_fd = open(path, O_WRONLY); + tracefs_put_tracing_file(path); + + clock_context->proto_data = ptp; + +#ifdef TSYNC_DEBUG + if (clock_context->is_server) { + char buff[256]; + int res_fd; + + sprintf(buff, "res-id%d.txt", clock_context->remote_id); + + res_fd = open(buff, O_CREAT|O_WRONLY|O_TRUNC, 0644); + if (res_fd > 0) + close(res_fd); + } +#endif + + return 0; + +error: + if (ptp) { + tep_free(ptp->tep); + if (ptp->marker_fd >= 0) + close(ptp->marker_fd); + } + free(ptp); + return -1; +} + +static int ptp_clock_sync_free(struct tracecmd_time_sync *tsync) +{ + struct clock_sync_context *clock_context; + struct ptp_clock_sync *ptp; + + if (!tsync || !tsync->context) + return -1; + clock_context = (struct clock_sync_context *)tsync->context; + + if (clock_context && clock_context->proto_data) { + ptp = (struct ptp_clock_sync *)clock_context->proto_data; + tep_free(ptp->tep); + if (ptp->marker_fd >= 0) + close(ptp->marker_fd); + if (ptp->debug_fd >= 0) + close(ptp->debug_fd); + free(clock_context->proto_data); + clock_context->proto_data = NULL; + } + return 0; +} + +/* Save the timestamps of sent ('s') and returned ('r') probes in the + * ctx->msg.samples[] array. Depending of the context (server or client), there + * may be only returned probes, or both sent and returned probes. The returned + * probes are saved first in the array, after them are the sent probes. + * Depending of the context, the array can be with size: + * [0 .. max data.count] - holds only returned probes + * [0 .. 2 * max data.count] - holds both returned and sent probes + */ +static void ptp_probe_store(struct ptp_markers_context *ctx, + struct ptp_marker *marker, + unsigned long long ts) +{ + int index = -1; + + if (marker->data.packet_id == 'r' && + marker->data.count <= ctx->size) { + index = marker->data.count - 1; + } else if (marker->data.packet_id == 's' && + marker->data.count * 2 <= ctx->size){ + index = ctx->size / 2 + marker->data.count - 1; + } + + if (index >= 0) { + ctx->msg.samples[index].id = marker->data.count; + ctx->msg.samples[index].ts = ts; + ctx->msg.count++; + } +} + +static int ptp_marker_find(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct ptp_markers_context *ctx; + struct ptp_marker *marker; + + ctx = (struct ptp_markers_context *)context; + + /* Make sure this is our event */ + if (event->id != ctx->ptp->raw_id || !ctx->ptp->id) + return 0; + if (record->size >= (ctx->ptp->id->offset + sizeof(struct ptp_marker))) { + marker = (struct ptp_marker *)(record->data + ctx->ptp->id->offset); + if (marker->data.local_id == ctx->clock->local_id && + marker->data.remote_id == ctx->clock->remote_id && + marker->series_id == ctx->ptp->series_id && + marker->data.count) + ptp_probe_store(ctx, marker, record->ts); + } + + return 0; +} + +static inline bool good_probe(struct ptp_clock_sample *server_sample, + struct ptp_clock_sample *send_sample, + struct ptp_clock_sample *client_sample, + int *bad_probes) +{ + if (server_sample->ts && send_sample->ts && client_sample->ts && + server_sample->id == send_sample->id && + server_sample->id == client_sample->id) + return true; + (*bad_probes)++; + return false; +} + +static int ptp_calc_offset_fastest(struct clock_sync_context *clock, + struct ptp_clock_result_msg *server, + struct ptp_clock_result_msg *client, + long long *offset_ret, long long *ts_ret, + int *bad_probes) +{ + struct ptp_clock_sample *sample_send; + long long delta_min = LLONG_MAX; + long long offset = 0; + long long delta = 0; + long long ts = 0; + int max_i; + int i; + + *bad_probes = 0; + sample_send = server->samples + (server->count / 2); + max_i = server->count / 2 < client->count ? + server->count / 2 : client->count; + for (i = 0; i < max_i; i++) { + if (!good_probe(&server->samples[i], &sample_send[i], + &client->samples[i], bad_probes)) + continue; + ts = (sample_send[i].ts + server->samples[i].ts) / 2; + offset = client->samples[i].ts - ts; + + delta = server->samples[i].ts - sample_send[i].ts; + if (delta_min > delta) { + delta_min = delta; + *offset_ret = offset; + *ts_ret = ts; + } +#ifdef TSYNC_DEBUG + { + struct ptp_clock_sync *ptp; + + ptp = (struct ptp_clock_sync *)clock->proto_data; + if (ptp && ptp->debug_fd > 0) { + char buff[256]; + + sprintf(buff, "%lld %lld %lld\n", + ts, client->samples[i].ts, offset); + write(ptp->debug_fd, buff, strlen(buff)); + } + } +#endif + } + + return 0; +} + +static int ptp_calc_offset_hist(struct clock_sync_context *clock, + struct ptp_clock_result_msg *server, + struct ptp_clock_result_msg *client, + long long *offset_ret, long long *ts_ret, + int *bad_probes) +{ + struct ptp_clock_sample *sample_send; + long long timestamps[PTP_SYNC_LOOP]; + long long offsets[PTP_SYNC_LOOP]; + long long offset_min = LLONG_MAX; + long long offset_max = 0; + int hist[PTP_SYNC_LOOP]; + int ind, max = 0; + long long bin; + int i, k = 0; + + *bad_probes = 0; + memset(hist, 0, sizeof(int) * PTP_SYNC_LOOP); + sample_send = server->samples + (server->count / 2); + for (i = 0; i * 2 < server->count && i < client->count; i++) { + if (!good_probe(&server->samples[i], &sample_send[i], + &client->samples[i], bad_probes)) + continue; + timestamps[k] = (sample_send[i].ts + server->samples[i].ts) / 2; + offsets[k] = client->samples[i].ts - timestamps[k]; + if (offset_max < llabs(offsets[k])) + offset_max = llabs(offsets[k]); + if (offset_min > llabs(offsets[k])) + offset_min = llabs(offsets[k]); +#ifdef TSYNC_DEBUG + { + struct ptp_clock_sync *ptp; + + ptp = (struct ptp_clock_sync *)clock->proto_data; + + if (ptp && ptp->debug_fd > 0) { + char buff[256]; + + sprintf(buff, "%lld %lld %lld\n", + timestamps[k], + client->samples[i].ts, offsets[k]); + write(ptp->debug_fd, buff, strlen(buff)); + } + } +#endif + k++; + } + + bin = (offset_max - offset_min) / PTP_SYNC_LOOP; + for (i = 0; i < k; i++) { + ind = (llabs(offsets[i]) - offset_min) / bin; + if (ind < PTP_SYNC_LOOP) { + hist[ind]++; + if (max < hist[ind]) { + max = hist[ind]; + *offset_ret = offsets[i]; + *ts_ret = timestamps[i]; + } + } + } + + return 0; +} + +static void ntoh_ptp_results(struct ptp_clock_result_msg *msg) +{ + int i; + + msg->count = ntohl(msg->count); + for (i = 0; i < msg->count; i++) { + msg->samples[i].id = ntohl(msg->samples[i].id); + msg->samples[i].ts = ntohll(msg->samples[i].ts); + } + msg->series_id = ntohl(msg->series_id); +} + + +static void hton_ptp_results(struct ptp_clock_result_msg *msg) +{ + int i; + + for (i = 0; i < msg->count; i++) { + msg->samples[i].id = htonl(msg->samples[i].id); + msg->samples[i].ts = htonll(msg->samples[i].ts); + } + msg->series_id = htonl(msg->series_id); + msg->count = htonl(msg->count); +} + +static inline void ptp_track_clock(struct ptp_markers_context *ctx, + struct ptp_marker *marker) +{ + if (ctx->ptp->flags & PTP_FLAG_USE_MARKER) { + write(ctx->ptp->marker_fd, marker, sizeof(struct ptp_marker)); + } else { + struct timespec clock; + unsigned long long ts; + + clock_gettime(CLOCK_MONOTONIC_RAW, &clock); + ts = clock.tv_sec * 1000000000LL; + ts += clock.tv_nsec; + ptp_probe_store(ctx, marker, ts); + } +} + +static int ptp_clock_client(struct tracecmd_time_sync *tsync, + long long *offset, long long *timestamp) +{ + char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH]; + struct clock_sync_context *clock_context; + struct ptp_clock_offset_msg res_offset; + struct ptp_clock_start_msg start; + struct ptp_markers_context ctx; + struct ptp_clock_sync *ptp; + struct ptp_marker marker; + unsigned int sync_msg; + unsigned int size; + char *msg; + int count; + int ret; + + if (!tsync || !tsync->context || !tsync->msg_handle) + return -1; + + clock_context = (struct clock_sync_context *)tsync->context; + if (clock_context->proto_data == NULL) + return -1; + + ptp = (struct ptp_clock_sync *)clock_context->proto_data; + size = sizeof(start); + msg = (char *)&start; + ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, + sync_proto, &sync_msg, + &size, &msg); + if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || + sync_msg != PTP_SYNC_PKT_START) + return -1; + ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, + PTP_SYNC_PKT_START, sizeof(start), + (char *)&start); + marker.data.local_id = clock_context->local_id; + marker.data.remote_id = clock_context->remote_id; + marker.series_id = ntohl(start.series_id); + marker.data.packet_id = 'r'; + ptp->series_id = marker.series_id; + ptp->flags = ntohl(start.flags); + msg = (char *)&count; + size = sizeof(count); + ctx.msg.count = 0; + ctx.size = PTP_SYNC_LOOP; + ctx.ptp = ptp; + ctx.clock = clock_context; + ctx.msg.series_id = ptp->series_id; + while (true) { + count = 0; + ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, + sync_proto, &sync_msg, + &size, &msg); + if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || + sync_msg != PTP_SYNC_PKT_PROBE || !ntohl(count)) + break; + marker.data.count = ntohl(count); + ptp_track_clock(&ctx, &marker); + ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, + PTP_SYNC_PKT_PROBE, + sizeof(count), (char *)&count); + if (ret) + break; + } + + if (strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || + sync_msg != PTP_SYNC_PKT_END) + return -1; + + if (ptp->flags & PTP_FLAG_USE_MARKER) + tracefs_iterate_raw_events(ptp->tep, clock_context->instance, + NULL, 0, ptp_marker_find, &ctx); + + hton_ptp_results(&ctx.msg); + ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, + PTP_SYNC_PKT_PROBES, + sizeof(ctx.msg), (char *)&ctx.msg); + + msg = (char *)&res_offset; + size = sizeof(res_offset); + ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, + sync_proto, &sync_msg, + &size, (char **)&msg); + if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || + sync_msg != PTP_SYNC_PKT_OFFSET) + return -1; + + *offset = ntohll(res_offset.offset); + *timestamp = ntohll(res_offset.ts); + + return 0; +} + + +static int ptp_clock_server(struct tracecmd_time_sync *tsync, + long long *offset, long long *timestamp) +{ + char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH]; + struct ptp_clock_result_msg *results = NULL; + struct clock_sync_context *clock_context; + struct ptp_clock_offset_msg res_offset; + struct ptp_clock_start_msg start; + struct ptp_markers_context ctx; + int sync_loop = PTP_SYNC_LOOP; + struct ptp_clock_sync *ptp; + struct ptp_marker marker; + unsigned int sync_msg; + unsigned int size; + int bad_probes; + int count = 1; + int msg_count; + int msg_ret; + char *msg; + int ret; + + if (!tsync || !tsync->context || !tsync->msg_handle) + return -1; + + clock_context = (struct clock_sync_context *)tsync->context; + if (clock_context->proto_data == NULL) + return -1; + + ptp = (struct ptp_clock_sync *)clock_context->proto_data; + ptp->flags = ptp_flags; + memset(&start, 0, sizeof(start)); + start.series_id = htonl(ptp->series_id + 1); + start.flags = htonl(ptp->flags); + ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, + PTP_SYNC_PKT_START, sizeof(start), + (char *)&start); + if (!ret) + ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, + sync_proto, &sync_msg, + NULL, NULL); + if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || + sync_msg != PTP_SYNC_PKT_START) + return -1; + + tracefs_instance_file_write(clock_context->instance, "trace", "\0"); + + ptp->series_id++; + marker.data.local_id = clock_context->local_id; + marker.data.remote_id = clock_context->remote_id; + marker.series_id = ptp->series_id; + msg = (char *)&msg_ret; + size = sizeof(msg_ret); + ctx.size = 2*PTP_SYNC_LOOP; + ctx.ptp = ptp; + ctx.clock = clock_context; + ctx.msg.count = 0; + ctx.msg.series_id = ptp->series_id; + do { + marker.data.count = count++; + marker.data.packet_id = 's'; + msg_count = htonl(marker.data.count); + ptp_track_clock(&ctx, &marker); + ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, + PTP_SYNC_PKT_PROBE, + sizeof(msg_count), + (char *)&msg_count); + if (!ret) + ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, + sync_proto, &sync_msg, + &size, &msg); + + marker.data.packet_id = 'r'; + ptp_track_clock(&ctx, &marker); + if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || + sync_msg != PTP_SYNC_PKT_PROBE || + ntohl(msg_ret) != marker.data.count) + break; + } while (--sync_loop); + + if (sync_loop) + return -1; + + ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, + PTP_SYNC_PKT_END, 0, NULL); + + size = 0; + ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, + sync_proto, &sync_msg, + &size, (char **)&results); + if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || + sync_msg != PTP_SYNC_PKT_PROBES || size == 0 || results == NULL) + return -1; + + ntoh_ptp_results(results); + if (ptp->flags & PTP_FLAG_USE_MARKER) + tracefs_iterate_raw_events(ptp->tep, clock_context->instance, + NULL, 0, ptp_marker_find, &ctx); + if (ptp->flags & PTP_FLAG_FASTEST_RESPONSE) + ptp_calc_offset_fastest(clock_context, &ctx.msg, results, offset, + timestamp, &bad_probes); + else + ptp_calc_offset_hist(clock_context, &ctx.msg, results, offset, + timestamp, &bad_probes); +#ifdef TSYNC_DEBUG + { + char buff[256]; + int res_fd; + + sprintf(buff, "res-id%d.txt", clock_context->remote_id); + + res_fd = open(buff, O_WRONLY|O_APPEND, 0644); + if (res_fd > 0) { + if (*offset && *timestamp) { + sprintf(buff, "%d %lld %lld\n", + ptp->series_id, *offset, *timestamp); + write(res_fd, buff, strlen(buff)); + } + close(res_fd); + } + + printf("\n calculated offset %d: %lld, %d probes, filtered out %d, PTP flags 0x%X\n\r", + ptp->series_id, *offset, results->count, bad_probes, ptp->flags); + if (ptp && ptp->debug_fd > 0) { + sprintf(buff, "%lld %lld 0\n", *offset, *timestamp); + write(ptp->debug_fd, buff, strlen(buff)); + close(ptp->debug_fd); + ptp->debug_fd = -1; + } + + } +#endif + + res_offset.offset = htonll(*offset); + res_offset.ts = htonll(*timestamp); + ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, + PTP_SYNC_PKT_OFFSET, + sizeof(res_offset), + (char *)&res_offset); + + free(results); + return 0; +} + +static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync, + long long *offset, long long *scaling, long long *frac, + long long *timestamp, unsigned int cpu) +{ + struct clock_sync_context *clock_context; + int ret; + + if (!tsync || !tsync->context) + return -1; + clock_context = (struct clock_sync_context *)tsync->context; + +#ifdef TSYNC_DEBUG + if (clock_context->is_server) { + struct ptp_clock_sync *ptp; + char buff[256]; + + ptp = (struct ptp_clock_sync *)clock_context->proto_data; + if (ptp->debug_fd > 0) + close(ptp->debug_fd); + sprintf(buff, "s-id%d_%d.txt", + clock_context->remote_id, ptp->series_id+1); + ptp->debug_fd = open(buff, O_CREAT|O_WRONLY|O_TRUNC, 0644); + } +#endif + + if (scaling) + *scaling = 1; + if (frac) + *frac = 0; + if (clock_context->is_server) + ret = ptp_clock_server(tsync, offset, timestamp); + else + ret = ptp_clock_client(tsync, offset, timestamp); + + return ret; +} + +int ptp_clock_sync_register(void) +{ + return tracecmd_tsync_proto_register(PTP_NAME, PTP_ACCURACY, + TRACECMD_TIME_SYNC_ROLE_GUEST | + TRACECMD_TIME_SYNC_ROLE_HOST | + TRACECMD_TIME_SYNC_ROLE_CLIENT | + TRACECMD_TIME_SYNC_ROLE_SERVER, + 0, TRACECMD_TSYNC_FLAG_INTERPOLATE, + ptp_clock_sync_init, + ptp_clock_sync_free, + ptp_clock_sync_calc); + +} + +int ptp_clock_sync_unregister(void) +{ + return tracecmd_tsync_proto_unregister(PTP_NAME); +} diff --git a/lib/trace-cmd/trace-timesync.c b/lib/trace-cmd/trace-timesync.c new file mode 100644 index 00000000..bbefda20 --- /dev/null +++ b/lib/trace-cmd/trace-timesync.c @@ -0,0 +1,1079 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2019, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com> + * + */ + +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <linux/limits.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <time.h> +#include <dirent.h> +#include <errno.h> +#include <pthread.h> + +#include "trace-cmd-private.h" +#include "trace-cmd-local.h" +#include "tracefs.h" +#include "event-utils.h" +#include "trace-tsync-local.h" + +struct tsync_proto { + struct tsync_proto *next; + char proto_name[TRACECMD_TSYNC_PNAME_LENGTH]; + enum tracecmd_time_sync_role roles; + int accuracy; + int supported_clocks; + unsigned int flags; + + int (*clock_sync_init)(struct tracecmd_time_sync *clock_context); + int (*clock_sync_free)(struct tracecmd_time_sync *clock_context); + int (*clock_sync_calc)(struct tracecmd_time_sync *clock_context, + long long *offset, long long *scaling, long long *frac, + long long *timestamp, unsigned int cpu); +}; + +struct tsync_probe_request_msg { + unsigned short cpu; +} __packed; + +#ifdef __ANDROID__ +#define __NR_sched_setaffinity 122 +#define __NR_sched_getaffinity 123 + +static int pthread_setaffinity_np(pthread_t thread, size_t cpusetsize, const cpu_set_t *cpuset) +{ + return -syscall(__NR_sched_setaffinity, thread, cpusetsize, cpuset); +} + +static int pthread_getaffinity_np(pthread_t thread, size_t cpusetsize, const cpu_set_t *cpuset) +{ + long ret = syscall(__NR_sched_getaffinity, thread, cpusetsize, cpuset); + + if (ret < 0) + return ret; + if (ret < cpusetsize) + memset((char *)cpuset+ret, 0, cpusetsize-ret); + + return 0; +} +#endif /* __ANDROID__ */ + +static struct tsync_proto *tsync_proto_list; + +static struct tsync_proto *tsync_proto_find(const char *proto_name) +{ + struct tsync_proto *proto; + + if (!proto_name) + return NULL; + for (proto = tsync_proto_list; proto; proto = proto->next) { + if (strlen(proto->proto_name) == strlen(proto_name) && + !strncmp(proto->proto_name, proto_name, TRACECMD_TSYNC_PNAME_LENGTH)) + return proto; + } + return NULL; +} + +/** + * tracecmd_tsync_init - Initialize the global, per task, time sync data. + */ +void tracecmd_tsync_init(void) +{ + ptp_clock_sync_register(); + kvm_clock_sync_register(); +} + +int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int roles, + int supported_clocks, unsigned int flags, + int (*init)(struct tracecmd_time_sync *), + int (*free)(struct tracecmd_time_sync *), + int (*calc)(struct tracecmd_time_sync *, + long long *, long long *, long long *, + long long *, unsigned int)) +{ + struct tsync_proto *proto = NULL; + + if (tsync_proto_find(proto_name)) + return -1; + proto = calloc(1, sizeof(struct tsync_proto)); + if (!proto) + return -1; + strncpy(proto->proto_name, proto_name, TRACECMD_TSYNC_PNAME_LENGTH); + proto->accuracy = accuracy; + proto->roles = roles; + proto->flags = flags; + proto->supported_clocks = supported_clocks; + proto->clock_sync_init = init; + proto->clock_sync_free = free; + proto->clock_sync_calc = calc; + + proto->next = tsync_proto_list; + tsync_proto_list = proto; + return 0; +} + +int tracecmd_tsync_proto_unregister(char *proto_name) +{ + struct tsync_proto **last = &tsync_proto_list; + + if (!proto_name) + return -1; + + for (; *last; last = &(*last)->next) { + if (strlen((*last)->proto_name) == strlen(proto_name) && + !strncmp((*last)->proto_name, proto_name, TRACECMD_TSYNC_PNAME_LENGTH)) { + struct tsync_proto *proto = *last; + + *last = proto->next; + free(proto); + return 0; + } + } + + return -1; +} + +bool __hidden tsync_proto_is_supported(const char *proto_name) +{ + if (tsync_proto_find(proto_name)) + return true; + return false; +} + +/** + * tracecmd_tsync_get_offsets - Return the calculated time offsets + * + * @tsync: Pointer to time sync context + * @cpu: CPU for which to get the calculated offsets + * @count: Returns the number of calculated time offsets + * @ts: Array of size @count containing timestamps of callculated offsets + * @offsets: array of size @count, containing offsets for each timestamp + * @scalings: array of size @count, containing scaling ratios for each timestamp + * @frac: array of size @count, containing fraction bits for each timestamp + * + * Retuns -1 in case of an error, or 0 otherwise + */ +int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu, + int *count, long long **ts, + long long **offsets, long long **scalings, long long **frac) +{ + struct clock_sync_context *tsync_context; + + if (!tsync || !tsync->context) + return -1; + tsync_context = (struct clock_sync_context *)tsync->context; + if (cpu >= tsync_context->cpu_count || !tsync_context->offsets) + return -1; + if (count) + *count = tsync_context->offsets[cpu].sync_count; + if (ts) + *ts = tsync_context->offsets[cpu].sync_ts; + if (offsets) + *offsets = tsync_context->offsets[cpu].sync_offsets; + if (scalings) + *scalings = tsync_context->offsets[cpu].sync_scalings; + if (frac) + *frac = tsync_context->offsets[cpu].sync_frac; + + return 0; +} + +/** + * tsync_get_proto_flags - Get protocol flags + * + * @tsync: Pointer to time sync context + * @flags: Returns the protocol flags, a combination of TRACECMD_TSYNC_FLAG_... + * + * Retuns -1 in case of an error, or 0 otherwise + */ +static int tsync_get_proto_flags(struct tracecmd_time_sync *tsync, + unsigned int *flags) +{ + struct tsync_proto *protocol; + + if (!tsync) + return -1; + protocol = tsync_proto_find(tsync->proto_name); + if (!protocol) + return -1; + + if (flags) + *flags = protocol->flags; + + return 0; +} + + +#define PROTO_MASK_SIZE (sizeof(char)) +#define PROTO_MASK_BITS (PROTO_MASK_SIZE * 8) +/** + * tsync_proto_select - Select time sync protocol, to be used for + * timestamp synchronization with a peer + * + * @protos: list of tsync protocol names + * @clock : trace clock + * @role : local time sync role + * + * Retuns pointer to a protocol name, that can be used with the peer, or NULL + * in case there is no match with supported protocols. + * The returned string MUST NOT be freed by the caller + */ +static const char * +tsync_proto_select(const struct tracecmd_tsync_protos *protos, + const char *clock, enum tracecmd_time_sync_role role) +{ + struct tsync_proto *selected = NULL; + struct tsync_proto *proto; + char **pname; + int clock_id = 0; + + if (!protos) + return NULL; + + clock_id = tracecmd_clock_str2id(clock); + pname = protos->names; + while (*pname) { + for (proto = tsync_proto_list; proto; proto = proto->next) { + if (!(proto->roles & role)) + continue; + if (proto->supported_clocks && clock_id && + !(proto->supported_clocks & clock_id)) + continue; + if (strncmp(proto->proto_name, *pname, TRACECMD_TSYNC_PNAME_LENGTH)) + continue; + if (selected) { + if (selected->accuracy > proto->accuracy) + selected = proto; + } else + selected = proto; + } + pname++; + } + + if (selected) + return selected->proto_name; + + return NULL; +} + +/** + * tracecmd_tsync_proto_getall - Returns list of all supported + * time sync protocols + * @protos: return, allocated list of time sync protocol names, + * supported by the peer. Must be freed by free() + * @clock: selected trace clock + * @role: supported protocol role + * + * If completed successfully 0 is returned and allocated list of strings in @protos. + * The last list entry is NULL. In case of an error, -1 is returned. + * @protos must be freed with free() + */ +int tracecmd_tsync_proto_getall(struct tracecmd_tsync_protos **protos, const char *clock, int role) +{ + struct tracecmd_tsync_protos *plist = NULL; + struct tsync_proto *proto; + int clock_id = 0; + int count = 1; + int i; + + if (clock) + clock_id = tracecmd_clock_str2id(clock); + for (proto = tsync_proto_list; proto; proto = proto->next) { + if (!(proto->roles & role)) + continue; + if (proto->supported_clocks && clock_id && + !(proto->supported_clocks & clock_id)) + continue; + count++; + } + plist = calloc(1, sizeof(struct tracecmd_tsync_protos)); + if (!plist) + goto error; + plist->names = calloc(count, sizeof(char *)); + if (!plist->names) + return -1; + + for (i = 0, proto = tsync_proto_list; proto && i < (count - 1); proto = proto->next) { + if (!(proto->roles & role)) + continue; + if (proto->supported_clocks && clock_id && + !(proto->supported_clocks & clock_id)) + continue; + plist->names[i++] = proto->proto_name; + } + + *protos = plist; + return 0; + +error: + if (plist) { + free(plist->names); + free(plist); + } + return -1; +} + +static int get_first_cpu(cpu_set_t **pin_mask, size_t *m_size) +{ + int cpus = tracecmd_count_cpus(); + cpu_set_t *cpu_mask; + int mask_size; + int i; + + cpu_mask = CPU_ALLOC(cpus); + *pin_mask = CPU_ALLOC(cpus); + if (!cpu_mask || !*pin_mask || 1) + goto error; + + mask_size = CPU_ALLOC_SIZE(cpus); + CPU_ZERO_S(mask_size, cpu_mask); + CPU_ZERO_S(mask_size, *pin_mask); + + if (sched_getaffinity(0, mask_size, cpu_mask) == -1) + goto error; + + for (i = 0; i < cpus; i++) { + if (CPU_ISSET_S(i, mask_size, cpu_mask)) { + CPU_SET_S(i, mask_size, *pin_mask); + break; + } + } + + if (CPU_COUNT_S(mask_size, *pin_mask) < 1) + goto error; + + CPU_FREE(cpu_mask); + *m_size = mask_size; + return 0; + +error: + if (cpu_mask) + CPU_FREE(cpu_mask); + if (*pin_mask) + CPU_FREE(*pin_mask); + *pin_mask = NULL; + *m_size = 0; + return -1; +} + +static struct tracefs_instance * +clock_synch_create_instance(const char *clock, unsigned int cid) +{ + struct tracefs_instance *instance; + char inst_name[256]; + + snprintf(inst_name, 256, "clock_synch-%d", cid); + + instance = tracefs_instance_create(inst_name); + if (!instance) + return NULL; + + tracefs_instance_file_write(instance, "trace", "\0"); + if (clock) + tracefs_instance_file_write(instance, "trace_clock", clock); + return instance; +} + +static void +clock_synch_delete_instance(struct tracefs_instance *inst) +{ + if (!inst) + return; + tracefs_instance_destroy(inst); + tracefs_instance_free(inst); +} + +static int clock_context_init(struct tracecmd_time_sync *tsync, + struct tsync_proto **proto, bool guest) +{ + struct clock_sync_context *clock = NULL; + struct tsync_proto *protocol; + + if (tsync->context) + return 0; + + protocol = tsync_proto_find(tsync->proto_name); + if (!protocol || !protocol->clock_sync_calc) + return -1; + + clock = calloc(1, sizeof(struct clock_sync_context)); + if (!clock) + return -1; + clock->is_guest = guest; + clock->is_server = clock->is_guest; + + clock->instance = clock_synch_create_instance(tsync->clock_str, + tsync->remote_id); + if (!clock->instance) + goto error; + + clock->cpu_count = tsync->vcpu_count; + if (clock->cpu_count) { + clock->offsets = calloc(clock->cpu_count, sizeof(struct clock_sync_offsets)); + if (!clock->offsets) + goto error; + } + + tsync->context = clock; + if (protocol->clock_sync_init && protocol->clock_sync_init(tsync) < 0) + goto error; + + *proto = protocol; + + return 0; +error: + tsync->context = NULL; + if (clock->instance) + clock_synch_delete_instance(clock->instance); + free(clock->offsets); + free(clock); + return -1; +} + +/** + * tracecmd_tsync_free - Free time sync context, allocated by + * tracecmd_tsync_with_host() or tracecmd_tsync_with_guest() APIs + * + * @tsync: Pointer to time sync context + * + */ +void tracecmd_tsync_free(struct tracecmd_time_sync *tsync) +{ + struct clock_sync_context *tsync_context; + struct tsync_proto *proto; + int i; + + if (!tsync) + return; + + tsync_context = (struct clock_sync_context *)tsync->context; + + proto = tsync_proto_find(tsync->proto_name); + if (proto && proto->clock_sync_free) + proto->clock_sync_free(tsync); + + + if (tsync_context) { + clock_synch_delete_instance(tsync_context->instance); + tsync_context->instance = NULL; + + if (tsync_context->cpu_count && tsync_context->offsets) { + for (i = 0; i < tsync_context->cpu_count; i++) { + free(tsync_context->offsets[i].sync_ts); + free(tsync_context->offsets[i].sync_offsets); + free(tsync_context->offsets[i].sync_scalings); + free(tsync_context->offsets[i].sync_frac); + tsync_context->offsets[i].sync_ts = NULL; + tsync_context->offsets[i].sync_offsets = NULL; + tsync_context->offsets[i].sync_scalings = NULL; + tsync_context->offsets[i].sync_frac = NULL; + tsync_context->offsets[i].sync_count = 0; + tsync_context->offsets[i].sync_size = 0; + } + free(tsync_context->offsets); + tsync_context->offsets = NULL; + } + } + + if (tsync->msg_handle) + tracecmd_msg_handle_close(tsync->msg_handle); + + /* These are only created from the host */ + if (tsync->guest_pid) { + pthread_mutex_destroy(&tsync->lock); + pthread_cond_destroy(&tsync->cond); + pthread_barrier_destroy(&tsync->first_sync); + } + + free(tsync->clock_str); + free(tsync->proto_name); + free(tsync); +} + +static cpu_set_t *pin_to_cpu(int cpu) +{ + static size_t size; + static int cpus; + cpu_set_t *mask = NULL; + cpu_set_t *old = NULL; + + if (!cpus) { + cpus = tracecmd_count_cpus(); + size = CPU_ALLOC_SIZE(cpus); + } + if (cpu >= cpus) + goto error; + + mask = CPU_ALLOC(cpus); + if (!mask) + goto error; + old = CPU_ALLOC(cpus); + if (!old) + goto error; + + CPU_ZERO_S(size, mask); + CPU_SET_S(cpu, size, mask); + if (pthread_getaffinity_np(pthread_self(), size, old)) + goto error; + if (pthread_setaffinity_np(pthread_self(), size, mask)) + goto error; + + CPU_FREE(mask); + return old; + +error: + if (mask) + CPU_FREE(mask); + if (old) + CPU_FREE(old); + return NULL; +} + +static void restore_pin_to_cpu(cpu_set_t *mask) +{ + static size_t size; + + if (!size) + size = CPU_ALLOC_SIZE(tracecmd_count_cpus()); + + pthread_setaffinity_np(pthread_self(), size, mask); + CPU_FREE(mask); +} + +static int tsync_send(struct tracecmd_time_sync *tsync, + struct tsync_proto *proto, unsigned int cpu) +{ + cpu_set_t *old_set = NULL; + long long timestamp = 0; + long long scaling = 0; + long long offset = 0; + long long frac = 0; + int ret; + + old_set = pin_to_cpu(cpu); + ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, ×tamp, cpu); + if (old_set) + restore_pin_to_cpu(old_set); + + return ret; +} + +static void tsync_with_host(struct tracecmd_time_sync *tsync) +{ + char protocol[TRACECMD_TSYNC_PNAME_LENGTH]; + struct tsync_probe_request_msg probe; + struct tsync_proto *proto; + unsigned int command; + unsigned int size; + char *msg; + int ret; + + clock_context_init(tsync, &proto, true); + if (!tsync->context) + return; + + msg = (char *)&probe; + size = sizeof(probe); + while (true) { + memset(&probe, 0, size); + ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, + protocol, &command, + &size, &msg); + + if (ret || strncmp(protocol, TRACECMD_TSYNC_PROTO_NONE, TRACECMD_TSYNC_PNAME_LENGTH) || + command != TRACECMD_TIME_SYNC_CMD_PROBE) + break; + ret = tsync_send(tsync, proto, probe.cpu); + if (ret) + break; + } +} + +static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step, + long long offset, long long scaling, long long frac, long long ts) +{ + long long *sync_scalings = NULL; + long long *sync_offsets = NULL; + long long *sync_frac = NULL; + long long *sync_ts = NULL; + + if (offsets->sync_count >= offsets->sync_size) { + sync_ts = realloc(offsets->sync_ts, + (offsets->sync_size + array_step) * sizeof(long long)); + sync_offsets = realloc(offsets->sync_offsets, + (offsets->sync_size + array_step) * sizeof(long long)); + sync_scalings = realloc(offsets->sync_scalings, + (offsets->sync_size + array_step) * sizeof(long long)); + sync_frac = realloc(offsets->sync_frac, + (offsets->sync_size + array_step) * sizeof(long long)); + + if (!sync_ts || !sync_offsets || !sync_scalings || !sync_frac) { + free(sync_ts); + free(sync_offsets); + free(sync_scalings); + free(sync_frac); + return -1; + } + offsets->sync_size += array_step; + offsets->sync_ts = sync_ts; + offsets->sync_offsets = sync_offsets; + offsets->sync_scalings = sync_scalings; + offsets->sync_frac = sync_frac; + } + + offsets->sync_ts[offsets->sync_count] = ts; + offsets->sync_offsets[offsets->sync_count] = offset; + offsets->sync_scalings[offsets->sync_count] = scaling; + offsets->sync_frac[offsets->sync_count] = frac; + offsets->sync_count++; + + return 0; +} + +static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu, + struct tsync_proto *proto, int array_step) +{ + struct clock_sync_context *clock; + long long timestamp = 0; + long long scaling = 0; + long long offset = 0; + long long frac = 0; + int ret; + + ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, ×tamp, cpu); + if (ret) { + tracecmd_warning("Failed to synchronize timestamps with guest"); + return -1; + } + if (!offset || !timestamp || !scaling) + return 0; + clock = tsync->context; + if (!clock || cpu >= clock->cpu_count || !clock->offsets) + return -1; + return record_sync_sample(&clock->offsets[cpu], array_step, + offset, scaling, frac, timestamp); +} + +#define TIMER_SEC_NANO 1000000000LL +static inline void get_ts_loop_delay(struct timespec *timeout, int delay_ms) +{ + memset(timeout, 0, sizeof(struct timespec)); + clock_gettime(CLOCK_REALTIME, timeout); + + timeout->tv_nsec += ((unsigned long long)delay_ms * 1000000LL); + + if (timeout->tv_nsec >= TIMER_SEC_NANO) { + timeout->tv_sec += timeout->tv_nsec / TIMER_SEC_NANO; + timeout->tv_nsec %= TIMER_SEC_NANO; + } +} + +#define CLOCK_TS_ARRAY 5 +static int tsync_with_guest(struct tracecmd_time_sync *tsync) +{ + struct tsync_probe_request_msg probe; + int ts_array_size = CLOCK_TS_ARRAY; + struct tsync_proto *proto; + struct timespec timeout; + bool first = true; + bool end = false; + int ret; + int i; + + clock_context_init(tsync, &proto, false); + if (!tsync->context) { + pthread_barrier_wait(&tsync->first_sync); + return -1; + } + + if (tsync->loop_interval > 0 && + tsync->loop_interval < (CLOCK_TS_ARRAY * 1000)) + ts_array_size = (CLOCK_TS_ARRAY * 1000) / tsync->loop_interval; + + while (true) { + pthread_mutex_lock(&tsync->lock); + for (i = 0; i < tsync->vcpu_count; i++) { + probe.cpu = i; + ret = tracecmd_msg_send_time_sync(tsync->msg_handle, + TRACECMD_TSYNC_PROTO_NONE, + TRACECMD_TIME_SYNC_CMD_PROBE, + sizeof(probe), (char *)&probe); + ret = tsync_get_sample(tsync, i, proto, ts_array_size); + if (ret) + break; + } + if (first) { + first = false; + pthread_barrier_wait(&tsync->first_sync); + } + if (end || i < tsync->vcpu_count) { + pthread_mutex_unlock(&tsync->lock); + break; + } + if (tsync->loop_interval > 0) { + get_ts_loop_delay(&timeout, tsync->loop_interval); + ret = pthread_cond_timedwait(&tsync->cond, &tsync->lock, &timeout); + pthread_mutex_unlock(&tsync->lock); + if (ret && ret != ETIMEDOUT) + break; + else if (!ret) + end = true; + } else { + pthread_cond_wait(&tsync->cond, &tsync->lock); + end = true; + pthread_mutex_unlock(&tsync->lock); + } + }; + + tracecmd_msg_send_time_sync(tsync->msg_handle, + TRACECMD_TSYNC_PROTO_NONE, + TRACECMD_TIME_SYNC_CMD_STOP, + 0, NULL); + return 0; +} + +static void *tsync_host_thread(void *data) +{ + struct tracecmd_time_sync *tsync = data; + + tsync_with_guest(tsync); + pthread_exit(0); +} + +/** + * tracecmd_tsync_with_guest - Synchronize timestamps with guest + * + * @trace_id: Local ID for the current trace session + * @fd: file descriptor of guest + * @guest_pid: PID of the host OS process, running the guest + * @guest_cpus: Number of the guest VCPUs + * @proto_name: Name of the negotiated time synchronization protocol + * @clock: Trace clock, used for that session + * + * On success, a pointer to time sync context is returned, or NULL in + * case of an error. The context must be freed with tracecmd_tsync_free() + * + * This API spawns a pthread, which performs time stamps synchronization + * until tracecmd_tsync_with_guest_stop() is called. + */ +struct tracecmd_time_sync * +tracecmd_tsync_with_guest(unsigned long long trace_id, int loop_interval, + unsigned int fd, int guest_pid, + int guest_cpus, const char *proto_name, const char *clock) +{ + struct tracecmd_time_sync *tsync; + cpu_set_t *pin_mask = NULL; + pthread_attr_t attrib; + size_t mask_size = 0; + int ret; + + if (!proto_name) + return NULL; + + tsync = calloc(1, sizeof(*tsync)); + if (!tsync) + return NULL; + + tsync->trace_id = trace_id; + tsync->loop_interval = loop_interval; + tsync->proto_name = strdup(proto_name); + + tsync->msg_handle = tracecmd_msg_handle_alloc(fd, 0); + if (!tsync->msg_handle) { + ret = -1; + goto error; + } + tsync->guest_pid = guest_pid; + tsync->vcpu_count = guest_cpus; + + if (clock) + tsync->clock_str = strdup(clock); + pthread_mutex_init(&tsync->lock, NULL); + pthread_cond_init(&tsync->cond, NULL); + pthread_barrier_init(&tsync->first_sync, NULL, 2); + pthread_attr_init(&attrib); + pthread_attr_setdetachstate(&attrib, PTHREAD_CREATE_JOINABLE); + + ret = pthread_create(&tsync->thread, &attrib, tsync_host_thread, tsync); + if (ret) + goto error; + tsync->thread_running = true; + + if (!get_first_cpu(&pin_mask, &mask_size)) + pthread_setaffinity_np(tsync->thread, mask_size, pin_mask); + pthread_barrier_wait(&tsync->first_sync); + + if (pin_mask) + CPU_FREE(pin_mask); + pthread_attr_destroy(&attrib); + + return tsync; + +error: + if (tsync->msg_handle) + tracecmd_msg_handle_close(tsync->msg_handle); + else if (fd >= 0) + close(fd); + free(tsync); + + return NULL; +} + +/** + * tracecmd_write_guest_time_shift - Write collected timestamp corrections in a file + * + * @handle: Handle to a trace file, where timestamp corrections will be saved + * @tsync: Time sync context with collected timestamp corrections + * + * Returns 0 on success, or -1 in case of an error. + * + * This API writes collected timestamp corrections in the metadata of the + * trace file, as TRACECMD_OPTION_TIME_SHIFT option. + */ +int tracecmd_write_guest_time_shift(struct tracecmd_output *handle, + struct tracecmd_time_sync *tsync) +{ + struct iovec *vector = NULL; + unsigned int flags; + long long *scalings = NULL; + long long *offsets = NULL; + long long *frac = NULL; + long long *ts = NULL; + int vcount; + int count; + int i, j; + int ret = -1; + + if (!tsync->vcpu_count) + return -1; + vcount = 3 + (5 * tsync->vcpu_count); + vector = calloc(vcount, sizeof(struct iovec)); + if (!vector) + return -1; + ret = tsync_get_proto_flags(tsync, &flags); + if (ret < 0) + goto out; + + j = 0; + vector[j].iov_len = 8; + vector[j++].iov_base = &tsync->trace_id; + vector[j].iov_len = 4; + vector[j++].iov_base = &flags; + vector[j].iov_len = 4; + vector[j++].iov_base = &tsync->vcpu_count; + for (i = 0; i < tsync->vcpu_count; i++) { + if (j >= vcount) + break; + ret = tracecmd_tsync_get_offsets(tsync, i, &count, + &ts, &offsets, &scalings, NULL); + if (ret < 0 || !count || !ts || !offsets || !scalings) + break; + vector[j].iov_len = 4; + vector[j++].iov_base = &count; + vector[j].iov_len = 8 * count; + vector[j++].iov_base = ts; + vector[j].iov_len = 8 * count; + vector[j++].iov_base = offsets; + vector[j].iov_len = 8 * count; + vector[j++].iov_base = scalings; + } + if (i < tsync->vcpu_count) { + ret = -1; + goto out; + } + /* + * Writing fraction bits into the option is implemented in a separate loop for + * backward compatibility. In the trace-cmd 2.9 release, this option has only offset + * and scaling. That legacy code must work with the new extended option. + * + */ + for (i = 0; i < tsync->vcpu_count; i++) { + if (j >= vcount) + break; + ret = tracecmd_tsync_get_offsets(tsync, i, NULL, + NULL, NULL, NULL, &frac); + if (ret < 0) + break; + vector[j].iov_len = 8 * count; + vector[j++].iov_base = frac; + } + if (i < tsync->vcpu_count) { + ret = -1; + goto out; + } + + tracecmd_add_option_v(handle, TRACECMD_OPTION_TIME_SHIFT, vector, vcount); +#ifdef TSYNC_DEBUG + if (count > 1) + printf("Got %d timestamp synch samples in %lld ns trace\n\r", + count, ts[count - 1] - ts[0]); +#endif + ret = 0; +out: + free(vector); + return ret; +} + +/** + * tracecmd_tsync_with_guest_stop - Stop the time sync session with a guest + * + * @tsync: Time sync context, representing a running time sync session + * + * Returns 0 on success, or -1 in case of an error. + * + */ +int tracecmd_tsync_with_guest_stop(struct tracecmd_time_sync *tsync) +{ + if (!tsync || !tsync->thread_running) + return -1; + + /* Signal the time synchronization thread to complete and wait for it */ + pthread_mutex_lock(&tsync->lock); + pthread_cond_signal(&tsync->cond); + pthread_mutex_unlock(&tsync->lock); + pthread_join(tsync->thread, NULL); + return 0; +} + +static void *tsync_agent_thread(void *data) +{ + struct tracecmd_time_sync *tsync = data; + long ret = 0; + int sd; + + while (true) { + tracecmd_debug("Listening on fd:%d\n", tsync->msg_handle->fd); + sd = accept(tsync->msg_handle->fd, NULL, NULL); + tracecmd_debug("Accepted fd:%d\n", sd); + if (sd < 0) { + if (errno == EINTR) + continue; + ret = -1; + goto out; + } + break; + } + close(tsync->msg_handle->fd); + tsync->msg_handle->fd = sd; + + tsync_with_host(tsync); + +out: + pthread_exit((void *)ret); +} + +/** + * tracecmd_tsync_with_host - Synchronize timestamps with host + * @fd: File descriptor connecting with the host + * @tsync_protos: List of tsync protocols, supported by the host + * @clock: Trace clock, used for that session + * @port: returned, VSOCKET port, on which the guest listens for tsync requests + * @remote_id: Identifier to uniquely identify the remote host + * @local_id: Identifier to uniquely identify the local machine + * + * On success, a pointer to time sync context is returned, or NULL in + * case of an error. The context must be freed with tracecmd_tsync_free() + * + * This API spawns a pthread, which performs time stamps synchronization + * until tracecmd_tsync_with_host_stop() is called. + */ +struct tracecmd_time_sync * +tracecmd_tsync_with_host(int fd, + const struct tracecmd_tsync_protos *tsync_protos, + const char *clock, int remote_id, int local_id) +{ + struct tracecmd_time_sync *tsync; + cpu_set_t *pin_mask = NULL; + pthread_attr_t attrib; + size_t mask_size = 0; + const char *proto; + int ret; + + tsync = calloc(1, sizeof(struct tracecmd_time_sync)); + if (!tsync) + return NULL; + + proto = tsync_proto_select(tsync_protos, clock, + TRACECMD_TIME_SYNC_ROLE_GUEST); + if (!proto) + goto error; + tsync->proto_name = strdup(proto); + tsync->msg_handle = tracecmd_msg_handle_alloc(fd, 0); + if (clock) + tsync->clock_str = strdup(clock); + + tsync->remote_id = remote_id; + tsync->local_id = local_id; + + pthread_attr_init(&attrib); + tsync->vcpu_count = tracecmd_count_cpus(); + pthread_attr_setdetachstate(&attrib, PTHREAD_CREATE_JOINABLE); + + ret = pthread_create(&tsync->thread, &attrib, tsync_agent_thread, tsync); + if (ret) { + pthread_attr_destroy(&attrib); + goto error; + } + tsync->thread_running = true; + if (!get_first_cpu(&pin_mask, &mask_size)) + pthread_setaffinity_np(tsync->thread, mask_size, pin_mask); + + if (pin_mask) + CPU_FREE(pin_mask); + pthread_attr_destroy(&attrib); + return tsync; + +error: + if (tsync) { + if (tsync->msg_handle) { + /* Do not close the fd that was passed it */ + tsync->msg_handle->fd = -1; + tracecmd_msg_handle_close(tsync->msg_handle); + } + free(tsync->clock_str); + free(tsync); + } + + return NULL; + +} + +/** + * tracecmd_tsync_with_host_stop - Stop the time sync session with a host + * + * @tsync: Time sync context, representing a running time sync session + * + * Returns 0 on success, or error number in case of an error. + * + */ +int tracecmd_tsync_with_host_stop(struct tracecmd_time_sync *tsync) +{ + return pthread_join(tsync->thread, NULL); +} + +/** + * tracecmd_tsync_get_selected_proto - Return the seleceted time sync protocol + * @tsync: Time sync context, representing a running time sync session + * @selected_proto: return, name of the selected time sync protocol for this session + * + * Returns 0 on success, or -1 in case of an error. + * + */ +int tracecmd_tsync_get_selected_proto(struct tracecmd_time_sync *tsync, + char **selected_proto) +{ + if (!tsync) + return -1; + + if (selected_proto) { + if (!tsync->proto_name) + return -1; + (*selected_proto) = strdup(tsync->proto_name); + } + return 0; +} diff --git a/lib/trace-cmd/trace-util.c b/lib/trace-cmd/trace-util.c new file mode 100644 index 00000000..9564c81a --- /dev/null +++ b/lib/trace-cmd/trace-util.c @@ -0,0 +1,692 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <dirent.h> +#include <ctype.h> +#include <errno.h> +#include <dlfcn.h> +#include <fcntl.h> +#include <unistd.h> +#include <ctype.h> +#include <limits.h> +#include <libgen.h> +#include <sys/mount.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/sysinfo.h> +#include <time.h> +#include <event-parse.h> +#include <event-utils.h> + +#include "trace-cmd-private.h" +#include "trace-cmd-local.h" + +#define LOCAL_PLUGIN_DIR ".trace-cmd/plugins" +#define PROC_STACK_FILE "/proc/sys/kernel/stack_tracer_enabled" + +static bool debug; +static int log_level = TEP_LOG_INFO; +static FILE *logfp; + +const static struct { + const char *clock_str; + enum tracecmd_clocks clock_id; +} trace_clocks[] = { + {"local", TRACECMD_CLOCK_LOCAL}, + {"global", TRACECMD_CLOCK_GLOBAL}, + {"counter", TRACECMD_CLOCK_COUNTER}, + {"uptime", TRACECMD_CLOCK_UPTIME}, + {"perf", TRACECMD_CLOCK_PERF}, + {"mono", TRACECMD_CLOCK_MONO}, + {"mono_raw", TRACECMD_CLOCK_MONO_RAW}, + {"boot", TRACECMD_CLOCK_BOOT}, + {"x86-tsc", TRACECMD_CLOCK_X86_TSC}, + {NULL, -1} +}; + +/** + * tracecmd_clock_str2id - Convert ftrace clock name to clock ID + * @clock: Ftrace clock name + * Returns ID of the ftrace clock + */ +enum tracecmd_clocks tracecmd_clock_str2id(const char *clock) +{ + int i; + + if (!clock) + return TRACECMD_CLOCK_UNKNOWN; + + for (i = 0; trace_clocks[i].clock_str; i++) { + if (!strncmp(clock, trace_clocks[i].clock_str, + strlen(trace_clocks[i].clock_str))) + return trace_clocks[i].clock_id; + } + return TRACECMD_CLOCK_UNKNOWN; +} + +/** + * tracecmd_clock_id2str - Convert clock ID to ftare clock name + * @clock: Clock ID + * Returns name of a ftrace clock + */ +const char *tracecmd_clock_id2str(enum tracecmd_clocks clock) +{ + int i; + + for (i = 0; trace_clocks[i].clock_str; i++) { + if (trace_clocks[i].clock_id == clock) + return trace_clocks[i].clock_str; + } + return NULL; +} + +/** + * tracecmd_set_debug - Set debug mode of the tracecmd library + * @set_debug: The new "debug" mode. If true, the tracecmd library is + * in "debug" mode + */ +void tracecmd_set_debug(bool set_debug) +{ + debug = set_debug; + + if (set_debug) + tracecmd_set_loglevel(TEP_LOG_DEBUG); + else + tracecmd_set_loglevel(TEP_LOG_CRITICAL); +} + +/** + * tracecmd_get_debug - Get debug mode of tracecmd library + * Returns true, if the tracecmd library is in debug mode. + * + */ +bool tracecmd_get_debug(void) +{ + return debug; +} + +void tracecmd_parse_cmdlines(struct tep_handle *pevent, + char *file, int size __maybe_unused) +{ + char *comm; + char *line; + char *next = NULL; + int pid; + + line = strtok_r(file, "\n", &next); + while (line) { + sscanf(line, "%d %m[^\n]s", &pid, &comm); + tep_register_comm(pevent, comm, pid); + free(comm); + line = strtok_r(NULL, "\n", &next); + } +} + +void tracecmd_parse_proc_kallsyms(struct tep_handle *pevent, + char *file, unsigned int size __maybe_unused) +{ + unsigned long long addr; + int sav_errno; + char *func; + char *line; + char *next = NULL; + char *mod; + char ch; + + line = strtok_r(file, "\n", &next); + while (line) { + int func_start, func_end = 0; + int mod_start, mod_end = 0; + int n; + + mod = NULL; + sav_errno = errno; + errno = 0; + n = sscanf(line, "%16llx %c %n%*s%n%*1[\t][%n%*s%n", + &addr, &ch, &func_start, &func_end, &mod_start, &mod_end); + if (errno) + return; + errno = sav_errno; + + if (n != 2 || !func_end) + return; + + func = line + func_start; + /* + * Hacks for + * - arm arch that adds a lot of bogus '$a' functions + * - x86-64 that reports per-cpu variable offsets as absolute + */ + if (func[0] != '$' && ch != 'A' && ch != 'a') { + line[func_end] = 0; + if (mod_end) { + mod = line + mod_start; + /* truncate the extra ']' */ + line[mod_end - 1] = 0; + } + tep_register_function(pevent, func, addr, mod); + } + + line = strtok_r(NULL, "\n", &next); + } +} + +void tracecmd_parse_ftrace_printk(struct tep_handle *pevent, + char *file, unsigned int size __maybe_unused) +{ + unsigned long long addr; + char *printk; + char *line; + char *next = NULL; + char *addr_str; + char *fmt; + + line = strtok_r(file, "\n", &next); + while (line) { + addr_str = strtok_r(line, ":", &fmt); + if (!addr_str) { + tracecmd_warning("printk format with empty entry"); + break; + } + addr = strtoull(addr_str, NULL, 16); + /* fmt still has a space, skip it */ + printk = strdup(fmt+1); + line = strtok_r(NULL, "\n", &next); + tep_register_print_string(pevent, printk, addr); + free(printk); + } +} + +/** + * tracecmd_add_id - add an int to the event id list + * @list: list to add the id to + * @id: id to add + * @len: current length of list of ids. + * + * The typical usage is: + * + * events = tracecmd_add_id(events, id, len++); + * + * Returns the new allocated list with the id included. + * the list will contain a '-1' at the end. + * + * The returned list should be freed with free(). + */ +int *tracecmd_add_id(int *list, int id, int len) +{ + if (!list) + list = malloc(sizeof(*list) * 2); + else + list = realloc(list, sizeof(*list) * (len + 2)); + if (!list) + return NULL; + + list[len++] = id; + list[len] = -1; + + return list; +} + +struct add_plugin_data { + int ret; + int index; + char **files; +}; + +static void add_plugin_file(struct tep_handle *pevent, const char *path, + const char *name, void *data) +{ + struct add_plugin_data *pdata = data; + char **ptr; + int size; + int i; + + if (pdata->ret) + return; + + size = pdata->index + 2; + ptr = realloc(pdata->files, sizeof(char *) * size); + if (!ptr) + goto out_free; + + ptr[pdata->index] = strdup(name); + if (!ptr[pdata->index]) + goto out_free; + + pdata->files = ptr; + pdata->index++; + pdata->files[pdata->index] = NULL; + return; + + out_free: + for (i = 0; i < pdata->index; i++) + free(pdata->files[i]); + free(pdata->files); + pdata->files = NULL; + pdata->ret = errno; +} + +/** + * trace_util_find_plugin_files - find list of possible plugin files + * @suffix: The suffix of the plugin files to find + * + * Searches the plugin directory for files that end in @suffix, and + * will return an allocated array of file names, or NULL if none is + * found. + * + * Must check against TRACECMD_ISERR(ret) as if an error happens + * the errno will be returned with the TRACECMD_ERR_MSK to denote + * such an error occurred. + * + * Use trace_util_free_plugin_files() to free the result. + */ +__hidden char **trace_util_find_plugin_files(const char *suffix) +{ + struct add_plugin_data pdata; + + memset(&pdata, 0, sizeof(pdata)); + + tep_load_plugins_hook(NULL, suffix, add_plugin_file, &pdata); + + if (pdata.ret) + return TRACECMD_ERROR(pdata.ret); + + return pdata.files; +} + +/** + * trace_util_free_plugin_files - free the result of trace_util_find_plugin_files() + * @files: The result from trace_util_find_plugin_files() + * + * Frees the contents that were allocated by trace_util_find_plugin_files(). + */ +void __hidden trace_util_free_plugin_files(char **files) +{ + int i; + + if (!files || TRACECMD_ISERR(files)) + return; + + for (i = 0; files[i]; i++) { + free(files[i]); + } + free(files); +} + +static char *get_source_plugins_dir(void) +{ + char *p, path[PATH_MAX+1]; + int ret; + + ret = readlink("/proc/self/exe", path, PATH_MAX); + if (ret > PATH_MAX || ret < 0) + return NULL; + + path[ret] = 0; + dirname(path); + p = strrchr(path, '/'); + if (!p) + return NULL; + /* Check if we are in the the source tree */ + if (strcmp(p, "/tracecmd") != 0) + return NULL; + + strcpy(p, "/lib/traceevent/plugins"); + return strdup(path); +} + +__hidden struct tep_plugin_list * +trace_load_plugins(struct tep_handle *tep, int flags) +{ + struct tep_plugin_list *list; + char *path; + + if (flags & TRACECMD_FL_LOAD_NO_PLUGINS) + tep_set_flag(tep, TEP_DISABLE_PLUGINS); + if (flags & TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS) + tep_set_flag(tep, TEP_DISABLE_SYS_PLUGINS); + + path = get_source_plugins_dir(); + if (path) + tep_add_plugin_path(tep, path, TEP_PLUGIN_LAST); + free(path); + + list = tep_load_plugins(tep); + + return list; +} + +/** + * tracecmd_set_loglevel - set log level of the library + * @level: desired level of the library messages + */ +void tracecmd_set_loglevel(enum tep_loglevel level) +{ + log_level = level; + tracefs_set_loglevel(level); + tep_set_loglevel(level); +} + +void __weak tracecmd_warning(const char *fmt, ...) +{ + va_list ap; + + if (log_level < TEP_LOG_WARNING) + return; + + va_start(ap, fmt); + tep_vprint("libtracecmd", TEP_LOG_WARNING, true, fmt, ap); + va_end(ap); +} + +void __weak tracecmd_info(const char *fmt, ...) +{ + va_list ap; + + if (log_level < TEP_LOG_INFO) + return; + + va_start(ap, fmt); + tep_vprint("libtracecmd", TEP_LOG_INFO, false, fmt, ap); + va_end(ap); +} + +void __weak tracecmd_critical(const char *fmt, ...) +{ + int ret; + va_list ap; + + if (log_level < TEP_LOG_CRITICAL) + return; + + va_start(ap, fmt); + ret = tep_vprint("libtracecmd", TEP_LOG_CRITICAL, true, fmt, ap); + va_end(ap); + + if (debug) { + if (!ret) + ret = -1; + exit(ret); + } +} + +void __weak tracecmd_debug(const char *fmt, ...) +{ + va_list ap; + + if (!tracecmd_get_debug()) + return; + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + +#define LOG_BUF_SIZE 1024 +static void __plog(const char *prefix, const char *fmt, va_list ap, FILE *fp) +{ + static int newline = 1; + char buf[LOG_BUF_SIZE]; + int r; + + r = vsnprintf(buf, LOG_BUF_SIZE, fmt, ap); + + if (r > LOG_BUF_SIZE) + r = LOG_BUF_SIZE; + + if (logfp) { + if (newline) + fprintf(logfp, "[%d]%s%.*s", getpid(), prefix, r, buf); + else + fprintf(logfp, "[%d]%s%.*s", getpid(), prefix, r, buf); + newline = buf[r - 1] == '\n'; + fflush(logfp); + return; + } + + fprintf(fp, "%.*s", r, buf); +} + +void tracecmd_plog(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + __plog("", fmt, ap, stdout); + va_end(ap); + /* Make sure it gets to the screen, in case we crash afterward */ + fflush(stdout); +} + +void tracecmd_plog_error(const char *fmt, ...) +{ + va_list ap; + char *str = ""; + + va_start(ap, fmt); + __plog("Error: ", fmt, ap, stderr); + va_end(ap); + if (errno) + str = strerror(errno); + if (logfp) + fprintf(logfp, "\n%s\n", str); + else + fprintf(stderr, "\n%s\n", str); +} + +/** + * tracecmd_set_logfile - Set file for logging + * @logfile: Name of the log file + * + * Returns 0 on successful completion or -1 in case of error + */ +int tracecmd_set_logfile(char *logfile) +{ + if (logfp) + fclose(logfp); + logfp = fopen(logfile, "w"); + if (!logfp) + return -1; + return 0; +} + +/** + * tracecmd_stack_tracer_status - Check stack trace status + * @status: Returned stack trace status: + * 0 - not configured, disabled + * non 0 - enabled + * + * Returns -1 in case of an error, 0 if file does not exist + * (stack tracer not configured in kernel) or 1 on successful completion. + */ +int tracecmd_stack_tracer_status(int *status) +{ + struct stat stat_buf; + char buf[64]; + long num; + int fd; + int n; + + if (stat(PROC_STACK_FILE, &stat_buf) < 0) { + /* stack tracer not configured on running kernel */ + *status = 0; /* not configured means disabled */ + return 0; + } + + fd = open(PROC_STACK_FILE, O_RDONLY); + + if (fd < 0) + return -1; + + n = read(fd, buf, sizeof(buf)); + close(fd); + + if (n <= 0) + return -1; + + if (n >= sizeof(buf)) + return -1; + + buf[n] = 0; + + num = strtol(buf, NULL, 10); + + /* Check for various possible errors */ + if (num > INT_MAX || num < INT_MIN || (!num && errno)) + return -1; + + *status = num; + return 1; /* full success */ +} + +/** + * tracecmd_count_cpus - Get the number of CPUs in the system + * + * Returns the number of CPUs in the system, or 0 in case of an error + */ +int tracecmd_count_cpus(void) +{ + static int once; + char buf[1024]; + int cpus = 0; + char *pbuf; + size_t *pn; + FILE *fp; + size_t n; + int r; + + cpus = sysconf(_SC_NPROCESSORS_CONF); + if (cpus > 0) + return cpus; + + if (!once) { + once++; + tracecmd_warning("sysconf could not determine number of CPUS"); + } + + /* Do the hack to figure out # of CPUS */ + n = 1024; + pn = &n; + pbuf = buf; + + fp = fopen("/proc/cpuinfo", "r"); + if (!fp) { + tracecmd_critical("Can not read cpuinfo"); + return 0; + } + + while ((r = getline(&pbuf, pn, fp)) >= 0) { + char *p; + + if (strncmp(buf, "processor", 9) != 0) + continue; + for (p = buf+9; isspace(*p); p++) + ; + if (*p == ':') + cpus++; + } + fclose(fp); + + return cpus; +} + +#define FNV_64_PRIME 0x100000001b3ULL +/* + * tracecmd_generate_traceid - Generate a unique ID, used to identify + * the current tracing session + * + * Returns unique ID + */ +unsigned long long tracecmd_generate_traceid(void) +{ + unsigned long long hash = 0; + unsigned char *ustr; + struct sysinfo sinfo; + struct timespec ts; + char *str = NULL; + + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + sysinfo(&sinfo); + asprintf(&str, "%ld %ld %ld %ld %ld %ld %ld %ld %d", + ts.tv_sec, ts.tv_nsec, + sinfo.loads[0], sinfo.loads[1], sinfo.loads[2], + sinfo.freeram, sinfo.sharedram, sinfo.freeswap, + sinfo.procs); + if (!str) + return 0; + ustr = (unsigned char *)str; + hash = 0; + while (*ustr) { + hash ^= (unsigned long long)*ustr++; + hash *= FNV_64_PRIME; + } + + free(str); + return hash; +} + +/* + * tracecmd_default_file_version - Get default trace file version of the library + * + * Returns the default trace file version + */ +int tracecmd_default_file_version(void) +{ + return FILE_VERSION_DEFAULT; +} + +bool tracecmd_is_version_supported(unsigned int version) +{ + if (version <= FILE_VERSION_MAX) + return true; + return false; +} + +static void __attribute__ ((constructor)) tracecmd_lib_init(void) +{ + tracecmd_compress_init(); +} + +static void __attribute__((destructor)) tracecmd_lib_free(void) +{ + tracecmd_compress_free(); +} + +__hidden bool check_file_state(unsigned long file_version, int current_state, int new_state) +{ + if (file_version >= FILE_VERSION_SECTIONS) { + if (current_state < TRACECMD_FILE_INIT) + return false; + + return true; + } + + switch (new_state) { + case TRACECMD_FILE_HEADERS: + case TRACECMD_FILE_FTRACE_EVENTS: + case TRACECMD_FILE_ALL_EVENTS: + case TRACECMD_FILE_KALLSYMS: + case TRACECMD_FILE_PRINTK: + case TRACECMD_FILE_CMD_LINES: + case TRACECMD_FILE_CPU_COUNT: + if (current_state == (new_state - 1)) + return true; + break; + case TRACECMD_FILE_OPTIONS: + if (file_version < FILE_VERSION_SECTIONS && current_state == TRACECMD_FILE_CPU_COUNT) + return true; + break; + case TRACECMD_FILE_CPU_LATENCY: + case TRACECMD_FILE_CPU_FLYRECORD: + if (current_state == TRACECMD_FILE_OPTIONS) + return true; + break; + } + + return false; +} diff --git a/libtracecmd.pc.template b/libtracecmd.pc.template new file mode 100644 index 00000000..bcf4e39d --- /dev/null +++ b/libtracecmd.pc.template @@ -0,0 +1,11 @@ +prefix=INSTALL_PREFIX +libdir=LIB_DIR +includedir=HEADER_DIR + +Name: libtracecmd +URL: https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ +Description: Library for creating and reading trace-cmd data files +Version: LIB_VERSION +Requires: libtracefs >= LIBTRACEFS_MIN_VERSION +Cflags: -I${includedir} +Libs: -L${libdir} -ltracecmd diff --git a/make-trace-cmd.sh b/make-trace-cmd.sh new file mode 100755 index 00000000..31f32594 --- /dev/null +++ b/make-trace-cmd.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +if [ -z "$INSTALL_PATH" ]; then + echo + echo 'Error: No $INSTALL_PATH defined' + echo + echo " usage: [PREFIX=prefix][BUILD_PATH=/path/to/build] INSTALL_PATH=/path/to/install make-trace-cmd.sh install|install_libs|clean|uninstall" + echo + echo " Used to create a self contained directory to copy to other machines." + echo + echo " Please read PACKAGING for more information." + echo + exit +fi + +if [ ! -d $INSTALL_PATH ]; then + mkdir $INSTALL_PATH +fi + +if [ ! -z "$BUILD_PATH" ]; then + if [ ! -d $BUILD_PATH ]; then + mkdir $BUILD_PATH + fi + O_PATH="O=$BUILD_PATH" +fi + +if [ -z "$PREFIX" ]; then + PREFIX="/usr" +fi + +PKG_PATH=`pkg-config --variable pc_path pkg-config | tr ":" " " | cut -d' ' -f1` + +WITH_PATH="" +# If pkg-config supports --with-path, use that as well +if pkg-config --with-path=/tmp --variable pc_path pkg-config &> /dev/null ; then + WITH_PATH="--with-path=$INSTALL_PATH$PKG_PATH" +fi + +PKG_CONFIG_PATH="$INSTALL_PATH/$PKG_PATH" PKG_CONFIG="pkg-config $WITH_PATH --define-variable=prefix=$INSTALL_PATH/$PREFIX" CFLAGS="-g -Wall -I$INSTALL_PATH/$PREFIX/include" make DESTDIR=$INSTALL_PATH $O_PATH prefix=$PREFIX $@ diff --git a/python/Makefile b/python/Makefile new file mode 100644 index 00000000..63f5736d --- /dev/null +++ b/python/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 + +include $(src)/scripts/utils.mk + +ifdef BUILD_PYTHON_WORKS +PYTHON_SO_INSTALL := ctracecmd.install +PYTHON_PY_PROGS := event-viewer.install +PYTHON_PY_LIBS := tracecmd.install +endif + +ctracecmd.so: ctracecmd.i $(LIBTRACECMD_STATIC) + swig -Wall -python -noproxy -I$(src)/include/trace-cmd $(LIBTRACEEVENT_CFLAGS) ctracecmd.i + $(CC) -fpic -c $(CPPFLAGS) $(CFLAGS) $(PYTHON_INCLUDES) ctracecmd_wrap.c + $(CC) --shared $(LIBTRACECMD_STATIC) $(LDFLAGS) ctracecmd_wrap.o -o ctracecmd.so $(TRACE_LIBS) + +$(PYTHON_SO_INSTALL): %.install : %.so force + $(Q)$(call do_install_data,$<,$(python_dir_SQ)) + +$(PYTHON_PY_PROGS): %.install : %.py force + $(Q)$(call do_install,$<,$(python_dir_SQ)) + +$(PYTHON_PY_LIBS): %.install : %.py force + $(Q)$(call do_install_data,$<,$(python_dir_SQ)) + +install_python: $(PYTHON_SO_INSTALL) $(PYTHON_PY_PROGS) $(PYTHON_PY_LIBS) + + +clean: + $(RM) *.a *.so *.o .*.d ctracecmd_wrap.* + +force: +.PHONY: clean force diff --git a/python/ctracecmd.i b/python/ctracecmd.i new file mode 100644 index 00000000..6d0179e3 --- /dev/null +++ b/python/ctracecmd.i @@ -0,0 +1,250 @@ +// tracecmd.i +%module ctracecmd +%include "typemaps.i" +%include "constraints.i" + +%nodefaultctor record; +%nodefaultdtor record; + +%apply Pointer NONNULL { struct tracecmd_input *handle }; +%apply Pointer NONNULL { struct tep_handle *pevent }; +%apply Pointer NONNULL { struct tep_format_field * }; +%apply unsigned long long *OUTPUT {unsigned long long *} +%apply int *OUTPUT {int *} + + +%{ +#include "trace-cmd.h" +#include "event-parse.h" +#include "event-utils.h" +#include <Python.h> +%} + + +%typemap(in) PyObject *pyfunc { + if (!PyCallable_Check($input)) { + PyErr_SetString(PyExc_TypeError, "Need a callable object!"); + return NULL; + } + $1 = $input; +} + +%ignore python_callback; + +%inline %{ +static int python_callback(struct trace_seq *s, + struct tep_record *record, + struct tep_event *event, + void *context); + +static int skip_output = 0; + +static void py_supress_trace_output(void) +{ + skip_output = 1; +} + +void warning(const char *fmt, ...) +{ + va_list ap; + + if (skip_output) + return; + + va_start(ap, fmt); + tep_vprint("tracecmd", TEP_LOG_WARNING, true, fmt, ap); + va_end(ap); +} + +PyObject *convert_pevent(unsigned long pevent) +{ + void *pev = (void *)pevent; + return SWIG_NewPointerObj(SWIG_as_voidptr(pev), SWIGTYPE_p_tep_handle, 0); +} + +void py_pevent_register_event_handler(struct tep_handle *pevent, int id, + char *subsys, char *evname, + PyObject *pyfunc) +{ + Py_INCREF(pyfunc); + tep_register_event_handler(pevent, id, subsys, evname, + python_callback, pyfunc); +} + +static PyObject *py_field_get_stack(struct tep_handle *pevent, + struct tep_record *record, + struct tep_event *event, + int long_size) +{ + PyObject *list; + struct tep_format_field *field; + void *data = record->data; + const char *func = NULL; + unsigned long addr; + + field = tep_find_any_field(event, "caller"); + if (!field) { + PyErr_SetString(PyExc_TypeError, + "Event doesn't have caller field"); + return NULL; + } + + list = PyList_New(0); + + for (data += field->offset; data < record->data + record->size; + data += long_size) { + addr = tep_read_number(event->tep, data, long_size); + + if ((long_size == 8 && addr == (unsigned long long)-1) || + ((int)addr == -1)) + break; + func = tep_find_function(event->tep, addr); + if (PyList_Append(list, PyUnicode_FromString(func))) { + Py_DECREF(list); + return NULL; + } + } + + return list; +} + +#if PY_MAJOR_VERSION >= 3 +static PyObject *fromMemory(void *buf, size_t len) +{ + return PyMemoryView_FromMemory(buf, len, PyBUF_READ); +} +#define PY_INT_AS_LONG PyLong_AsLong +#else +static PyObject *fromMemory(void *buf, size_t len) +{ + return PyBuffer_FromMemory(buf, len); +} +#define PY_INT_AS_LONG PyInt_AS_LONG +#endif + + + +static PyObject *py_field_get_data(struct tep_format_field *f, struct tep_record *r) +{ + if (!strncmp(f->type, "__data_loc ", 11)) { + unsigned long long val; + int len, offset; + + if (tep_read_number_field(f, r->data, &val)) { + PyErr_SetString(PyExc_TypeError, + "Field is not a valid number"); + return NULL; + } + + /* + * The actual length of the dynamic array is stored + * in the top half of the field, and the offset + * is in the bottom half of the 32 bit field. + */ + offset = val & 0xffff; + len = val >> 16; + + return fromMemory(r->data + offset, len); + } + + return fromMemory(r->data + f->offset, f->size); +} + +static PyObject *py_field_get_str(struct tep_format_field *f, struct tep_record *r) +{ + if (!strncmp(f->type, "__data_loc ", 11)) { + unsigned long long val; + int offset; + + if (tep_read_number_field(f, r->data, &val)) { + PyErr_SetString(PyExc_TypeError, + "Field is not a valid number"); + return NULL; + } + + /* + * The actual length of the dynamic array is stored + * in the top half of the field, and the offset + * is in the bottom half of the 32 bit field. + */ + offset = val & 0xffff; + + return PyUnicode_FromString((char *)r->data + offset); + } + + return PyUnicode_FromStringAndSize((char *)r->data + f->offset, + strnlen((char *)r->data + f->offset, f->size)); +} + +static PyObject *py_format_get_keys(struct tep_event *ef) +{ + PyObject *list; + struct tep_format_field *f; + + list = PyList_New(0); + + for (f = ef->format.fields; f; f = f->next) { + if (PyList_Append(list, PyUnicode_FromString(f->name))) { + Py_DECREF(list); + return NULL; + } + } + + return list; +} +%} + + +%wrapper %{ +static int python_callback(struct trace_seq *s, + struct tep_record *record, + struct tep_event *event, + void *context) +{ + PyObject *arglist, *result; + int r = 0; + + record->ref_count++; + + arglist = Py_BuildValue("(OOO)", + SWIG_NewPointerObj(SWIG_as_voidptr(s), + SWIGTYPE_p_trace_seq, 0), + SWIG_NewPointerObj(SWIG_as_voidptr(record), + SWIGTYPE_p_tep_record, 0), + SWIG_NewPointerObj(SWIG_as_voidptr(event), + SWIGTYPE_p_tep_event, 0)); + + result = PyEval_CallObject(context, arglist); + Py_XDECREF(arglist); + if (result && result != Py_None) { + if (!PyInt_Check(result)) { + PyErr_SetString(PyExc_TypeError, + "callback must return int"); + PyErr_Print(); + Py_XDECREF(result); + return 0; + } + r = PY_INT_AS_LONG(result); + } else if (result == Py_None) + r = 0; + else + PyErr_Print(); + + Py_XDECREF(result); + + return r; +} +%} + + +%ignore trace_seq_vprintf; +%ignore vpr_stat; + +/* SWIG can't grok these, define them to nothing */ +#define __trace +#define __attribute__(x) +#define __thread + +%include "trace-cmd.h" +%include <trace-seq.h> +%include <event-parse.h> diff --git a/python/event-viewer.py b/python/event-viewer.py new file mode 100755 index 00000000..e3b2edd4 --- /dev/null +++ b/python/event-viewer.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python2 + +import getopt +from gobject import * +import gtk +from tracecmd import * +import time + +app = None +data_func_cnt = 0 + +# In a "real" app these width should be determined at runtime testing max length +# strings in the current font. +TS_COL_W = 150 +CPU_COL_W = 35 +EVENT_COL_W = 150 +PID_COL_W = 75 +COMM_COL_W = 250 + + +def timing(func): + def wrapper(*arg): + start = time.time() + ret = func(*arg) + end = time.time() + print('@%s took %0.3f s' % (func.func_name, (end-start))) + return ret + return wrapper + + +class EventStore(gtk.GenericTreeModel): + class EventRef(object): + '''Inner class to build the trace event index''' + def __init__(self, index, timestamp, offset, cpu): + self.index = index + self.offset = offset + self.ts = timestamp + self.cpu = cpu + + def __cmp__(self, other): + if self.ts < other.ts: + return -1 + if self.ts > other.ts: + return 1 + if self.offset < other.offset: + return -1 + if self.offset > other.offset: + return 1 + return 0 + + # The store only returns the record offset into the trace + # The view is responsible for looking up the Event with the offset + column_types = (long,) + + @timing + def __init__(self, trace): + gtk.GenericTreeModel.__init__(self) + self.trace = trace + self.refs = [] + self._load_trace() + self._sort() + self._reindex() + + @timing + def _load_trace(self): + print("Building trace index...") + index = 0 + for cpu in range(0, trace.cpus): + rec = tracecmd_read_data(self.trace._handle, cpu) + while rec: + offset = tep_record_offset_get(rec) + ts = tep_record_ts_get(rec) + self.refs.append(self.EventRef(index, ts, offset, cpu)) + index = index + 1 + rec = tracecmd_read_data(self.trace._handle, cpu) + print("Loaded %d events from trace" % (index)) + + @timing + def _sort(self): + self.refs.sort() + + @timing + def _reindex(self): + for i in range(0, len(self.refs)): + self.refs[i].index = i + + def on_get_flags(self): + return gtk.TREE_MODEL_LIST_ONLY | gtk.TREE_MODEL_ITERS_PERSIST + + def on_get_n_columns(self): + return len(self.column_types) + + def on_get_column_type(self, col): + return self.column_types[col] + + def on_get_iter(self, path): + return self.refs[path[0]] + + def on_get_path(self, ref): + return ref.index + + def on_get_value(self, ref, col): + ''' + The Event record was getting deleted when passed back via this + method, now it just returns the ref itself. Use get_event() instead. + ''' + if col == 0: + #return self.trace.read_event_at(ref.offset) + return ref + return None + + def on_iter_next(self, ref): + try: + return self.refs[ref.index+1] + except IndexError: + return None + + def on_iter_children(self, ref): + if ref: + return None + return self.refs[0] + + def on_iter_has_child(self, ref): + return False + + def on_iter_n_children(self, ref): + if ref: + return 0 + return len(self.refs) + + def on_iter_nth_child(self, ref, n): + if ref: + return None + try: + return self.refs[n] + except IndexError: + return None + + def on_iter_parent(self, child): + return None + + def get_event(self, iter): + '''This allocates a record which must be freed by the caller''' + try: + ref = self.refs[self.get_path(iter)[0]] + ev = self.trace.read_event_at(ref.offset) + return ev + except IndexError: + return None + + +class EventView(gtk.TreeView): + def __init__(self, model): + gtk.TreeView.__init__(self, model) + self.set_fixed_height_mode(True) + + ts_col = gtk.TreeViewColumn("Time (s)") + ts_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) + ts_col.set_fixed_width(TS_COL_W) + ts_cell = gtk.CellRendererText() + ts_col.pack_start(ts_cell, False) + ts_col.set_cell_data_func(ts_cell, self.data_func, "ts") + self.append_column(ts_col) + + cpu_col = gtk.TreeViewColumn("CPU") + cpu_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) + cpu_col.set_fixed_width(CPU_COL_W) + cpu_cell = gtk.CellRendererText() + cpu_col.pack_start(cpu_cell, False) + cpu_col.set_cell_data_func(cpu_cell, self.data_func, "cpu") + self.append_column(cpu_col) + + event_col = gtk.TreeViewColumn("Event") + event_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) + event_col.set_fixed_width(EVENT_COL_W) + event_cell = gtk.CellRendererText() + event_col.pack_start(event_cell, False) + event_col.set_cell_data_func(event_cell, self.data_func, "event") + self.append_column(event_col) + + pid_col = gtk.TreeViewColumn("PID") + pid_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) + pid_col.set_fixed_width(PID_COL_W) + pid_cell = gtk.CellRendererText() + pid_col.pack_start(pid_cell, False) + pid_col.set_cell_data_func(pid_cell, self.data_func, "pid") + self.append_column(pid_col) + + comm_col = gtk.TreeViewColumn("Comm") + comm_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) + comm_col.set_fixed_width(COMM_COL_W) + comm_cell = gtk.CellRendererText() + comm_col.pack_start(comm_cell, False) + comm_col.set_cell_data_func(comm_cell, self.data_func, "comm") + self.append_column(comm_col) + + def data_func(self, col, cell, model, iter, data): + global app, data_func_cnt + + ev = model.get_event(iter) + #ev = model.get_value(iter, 0) + if not ev: + return False + + if data == "ts": + cell.set_property("markup", "%d.%09d" % (ev.ts/1000000000, + ev.ts%1000000000)) + data_func_cnt = data_func_cnt + 1 + if app: + app.inc_data_func() + elif data == "cpu": + cell.set_property("markup", ev.cpu) + elif data == "event": + cell.set_property("markup", ev.name) + elif data == "pid": + cell.set_property("markup", ev.pid) + elif data == "comm": + cell.set_property("markup", ev.comm) + else: + print("Unknown Column:", data) + return False + + return True + + +class EventViewerApp(gtk.Window): + def __init__(self, trace): + gtk.Window.__init__(self) + + self.set_size_request(650, 400) + self.set_position(gtk.WIN_POS_CENTER) + + self.connect("destroy", gtk.main_quit) + self.set_title("Event Viewer") + + store = EventStore(trace) + view = EventView(store) + + sw = gtk.ScrolledWindow() + sw.set_policy(gtk.POLICY_NEVER, gtk.POLICY_ALWAYS) + sw.add(view) + + # track how often the treeview data_func is called + self.data_func_label = gtk.Label("0") + hbox = gtk.HBox() + hbox.pack_start(gtk.Label("TS Data Func Calls:"), False, False) + hbox.pack_start(self.data_func_label, False, False) + + vbox = gtk.VBox() + vbox.pack_start(hbox, False) + vbox.pack_end(sw) + + self.add(vbox) + self.show_all() + + def inc_data_func(self): + global data_func_cnt + self.data_func_label.set_text(str(data_func_cnt)) + + +if __name__ == "__main__": + if len(sys.argv) >=2: + filename = sys.argv[1] + else: + filename = "trace.dat" + + print("Initializing trace...") + trace = Trace(filename) + print("Initializing app...") + app = EventViewerApp(trace) + print("Go!") + gtk.main() diff --git a/python/tracecmd.py b/python/tracecmd.py new file mode 100644 index 00000000..4d481576 --- /dev/null +++ b/python/tracecmd.py @@ -0,0 +1,255 @@ +# +# Copyright (C) International Business Machines Corp., 2009 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# 2009-Dec-17: Initial version by Darren Hart <dvhltc@us.ibm.com> +# + +from functools import update_wrapper +from ctracecmd import * +from UserDict import DictMixin + +""" +Python interface to the tracecmd library for parsing ftrace traces + +Python tracecmd applications should be written to this interface. It will be +updated as the tracecmd C API changes and try to minimze the impact to python +applications. The ctracecmd Python module is automatically generated using SWIG +and it is recommended applications not use it directly. + +TODO: consider a complete class hierarchy of ftrace events... +""" + +def cached_property(func, name=None): + if name is None: + name = func.__name__ + def _get(self): + try: + return self.__cached_properties[name] + except AttributeError: + self.__cached_properties = {} + except KeyError: + pass + value = func(self) + self.__cached_properties[name] = value + return value + update_wrapper(_get, func) + def _del(self): + self.__cached_properties.pop(name, None) + return property(_get, None, _del) + +class Event(object, DictMixin): + """ + This class can be used to access event data + according to an event's record and format. + """ + def __init__(self, pevent, record, format): + self._pevent = pevent + self._record = record + self._format = format + + def __str__(self): + return "%d.%09d CPU%d %s: pid=%d comm=%s type=%d" % \ + (self.ts/1000000000, self.ts%1000000000, self.cpu, self.name, + self.num_field("common_pid"), self.comm, self.type) + + def __del__(self): + free_record(self._record) + + def __getitem__(self, n): + f = tep_find_field(self._format, n) + if f is None: + raise KeyError("no field '%s'" % n) + return Field(self._record, f) + + def keys(self): + return py_format_get_keys(self._format) + + @cached_property + def comm(self): + return tep_data_comm_from_pid(self._pevent, self.pid) + + @cached_property + def cpu(self): + return tep_record_cpu_get(self._record) + + @cached_property + def name(self): + return event_format_name_get(self._format) + + @cached_property + def pid(self): + return tep_data_pid(self._pevent, self._record) + + @cached_property + def ts(self): + return tep_record_ts_get(self._record) + + @cached_property + def type(self): + return tep_data_type(self._pevent, self._record) + + def num_field(self, name): + f = tep_find_any_field(self._format, name) + if f is None: + return None + ret, val = tep_read_number_field(f, tep_record_data_get(self._record)) + if ret: + return None + return val + + def str_field(self, name): + f = tep_find_any_field(self._format, name) + if f is None: + return None + return py_field_get_str(f, self._record) + + def stack_field(self, long_size): + return py_field_get_stack(self._pevent, self._record, self._format, + long_size) + +class TraceSeq(object): + def __init__(self, trace_seq): + self._trace_seq = trace_seq + + def puts(self, s): + return trace_seq_puts(self._trace_seq, s) + +class FieldError(Exception): + pass + +class Field(object): + def __init__(self, record, field): + self._record = record + self._field = field + + @cached_property + def data(self): + return py_field_get_data(self._field, self._record) + + def __long__(self): + ret, val = tep_read_number_field(self._field, + tep_record_data_get(self._record)) + if ret: + raise FieldError("Not a number field") + return val + __int__ = __long__ + + def __str__(self): + return py_field_get_str(self._field, self._record) + +class PEvent(object): + def __init__(self, pevent): + self._pevent = pevent + + def _handler(self, cb, s, record, event_fmt): + return cb(TraceSeq(s), Event(self._pevent, record, event_fmt)) + + def register_event_handler(self, subsys, event_name, callback): + l = lambda s, r, e: self._handler(callback, s, r, e) + + py_pevent_register_event_handler( + self._pevent, -1, subsys, event_name, l) + + @cached_property + def file_endian(self): + if tep_is_file_bigendian(self._pevent): + return '>' + return '<' + + +class FileFormatError(Exception): + pass + +class Trace(object): + """ + Trace object represents the trace file it is created with. + + The Trace object aggregates the tracecmd structures and functions that are + used to manage the trace and extract events from it. + """ + def __init__(self, filename): + self._handle = tracecmd_alloc(filename) + + if tracecmd_read_headers(self._handle): + raise FileFormatError("Invalid headers") + + if tracecmd_init_data(self._handle): + raise FileFormatError("Failed to init data") + + self._pevent = tracecmd_get_pevent(self._handle) + + @cached_property + def cpus(self): + return tracecmd_cpus(self._handle) + + @cached_property + def long_size(self): + return tracecmd_long_size(self._handle) + + def read_event(self, cpu): + rec = tracecmd_read_data(self._handle, cpu) + if rec: + type = tep_data_type(self._pevent, rec) + format = tep_find_event(self._pevent, type) + # rec ownership goes over to Event instance + return Event(self._pevent, rec, format) + return None + + def read_event_at(self, offset): + res = tracecmd_read_at(self._handle, offset) + # SWIG only returns the CPU if the record is None for some reason + if isinstance(res, int): + return None + rec, cpu = res + type = tep_data_type(self._pevent, rec) + format = tep_find_event(self._pevent, type) + # rec ownership goes over to Event instance + return Event(self._pevent, rec, format) + + def read_next_event(self): + res = tracecmd_read_next_data(self._handle) + if isinstance(res, int): + return None + rec, cpu = res + type = tep_data_type(self._pevent, rec) + format = tep_find_event(self._pevent, type) + return Event(self._pevent, rec, format) + + def peek_event(self, cpu): + rec = tracecmd_peek_data_ref(self._handle, cpu) + if rec is None: + return None + type = tep_data_type(self._pevent, rec) + format = tep_find_event(self._pevent, type) + # rec ownership goes over to Event instance + return Event(self._pevent, rec, format) + + +# Basic builtin test, execute module directly +if __name__ == "__main__": + t = Trace("trace.dat") + print("Trace contains data for %d cpus" % (t.cpus)) + + for cpu in range(0, t.cpus): + print("CPU %d" % (cpu)) + ev = t.read_event(cpu) + while ev: + print("\t%s" % (ev)) + ev = t.read_event(cpu) + + + diff --git a/python/tracecmdgui.py b/python/tracecmdgui.py new file mode 100644 index 00000000..01bfd614 --- /dev/null +++ b/python/tracecmdgui.py @@ -0,0 +1,239 @@ +# +# Copyright (C) International Business Machines Corp., 2009 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# 2009-Dec-31: Initial version by Darren Hart <dvhltc@us.ibm.com> +# + +import gobject #delete me ? +import time +import sys +import gtk +from tracecmd import * +from ctracecmdgui import * + +""" +Python interface for tracecmd GTK widgets + +Python tracecmd applications should be written to this interface. It will be +updated as the tracecmd gui C API changes and try to minimze the impact to +python applications. The ctracecmdgui Python module is automatically generated +using SWIG and it is recommended applications not use it directly. +""" + +# In a "real" app these width should be determined at runtime testing max length +# strings in the current font. +TS_COL_W = 150 +CPU_COL_W = 35 +EVENT_COL_W = 150 +PID_COL_W = 75 +COMM_COL_W = 250 + + +def timing(func): + def wrapper(*arg): + start = time.time() + ret = func(*arg) + end = time.time() + print('@%s took %0.3f s' % (func.func_name, (end-start))) + return ret + return wrapper + + +class EventStore(gtk.GenericTreeModel): + # FIXME: get these from the C code: trace_view_store->column_types ... + @timing + def __init__(self, trace): + gtk.GenericTreeModel.__init__(self) + self.trace = trace + self.cstore = trace_view_store_new(trace.handle) + self.gtk_cstore = trace_view_store_as_gtk_tree_model(self.cstore) + num_rows = trace_view_store_num_rows_get(self.cstore) + print("Loaded %d events from trace" % (num_rows)) + + def on_get_flags(self): + return trace_view_store_get_flags(self.gtk_cstore) + + def on_get_n_columns(self): + return trace_view_store_get_n_columns(self.gtk_cstore) + + def on_get_column_type(self, col): + # I couldn't figure out how to convert the C GType into the python + # GType. The current typemap converts the C GType into the python type, + # which is what this function is supposed to return anyway. + pytype = trace_view_store_get_column_type(self.gtk_cstore, col) + return pytype + + def on_get_iter(self, path): + if len(path) > 1 and path[1] != 1: + return None + n = path[0] + rec = trace_view_store_get_row(self.cstore, n) + return rec + + def on_get_path(self, rec): + if not rec: + return None + start_row = trace_view_store_start_row_get(self.cstore) + return (trace_view_record_pos_get(rec) - start_row,) + + def on_get_value(self, rec, col): + # FIXME: write SWIG wrapper to marshal the Gvalue and wrap the rec in an + # Iter + pass + #return trace_view_store_get_value_py(self.cstore, rec, col) + + def on_iter_next(self, rec): + pos = trace_view_record_pos_get(rec) + start_row = trace_view_store_start_row_get(self.cstore) + return trace_view_store_get_row(self.cstore, pos - start_row + 1) + + def on_iter_children(self, rec): + if rec: + return None + return trace_view_store_get_row(self.cstore, 0) + + def on_iter_has_child(self, rec): + return False + + def on_iter_n_children(self, rec): + if rec: + return 0 + return trace_view_store_num_rows_get(self.cstore) + + def on_iter_nth_child(self, rec, n): + if rec: + return None + return trace_view_store_get_row(self.cstore, n) + + def on_iter_parent(self, child): + return None + + def get_event(self, iter): + path = self.get_path(iter) + if not path: + return None + rec = trace_view_store_get_row(self.cstore, path[0]) + if not rec: + return None + ev = self.trace.read_event_at(trace_view_record_offset_get(rec)) + return ev + + +class EventView(gtk.TreeView): + def __init__(self, model): + gtk.TreeView.__init__(self, model) + self.set_fixed_height_mode(True) + + ts_col = gtk.TreeViewColumn("Time (s)") + ts_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) + ts_col.set_fixed_width(TS_COL_W) + ts_cell = gtk.CellRendererText() + ts_col.pack_start(ts_cell, False) + ts_col.set_cell_data_func(ts_cell, self.data_func, "ts") + self.append_column(ts_col) + + cpu_col = gtk.TreeViewColumn("CPU") + cpu_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) + cpu_col.set_fixed_width(CPU_COL_W) + cpu_cell = gtk.CellRendererText() + cpu_col.pack_start(cpu_cell, False) + cpu_col.set_cell_data_func(cpu_cell, self.data_func, "cpu") + self.append_column(cpu_col) + + event_col = gtk.TreeViewColumn("Event") + event_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) + event_col.set_fixed_width(EVENT_COL_W) + event_cell = gtk.CellRendererText() + event_col.pack_start(event_cell, False) + event_col.set_cell_data_func(event_cell, self.data_func, "event") + self.append_column(event_col) + + pid_col = gtk.TreeViewColumn("PID") + pid_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) + pid_col.set_fixed_width(PID_COL_W) + pid_cell = gtk.CellRendererText() + pid_col.pack_start(pid_cell, False) + pid_col.set_cell_data_func(pid_cell, self.data_func, "pid") + self.append_column(pid_col) + + comm_col = gtk.TreeViewColumn("Comm") + comm_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) + comm_col.set_fixed_width(COMM_COL_W) + comm_cell = gtk.CellRendererText() + comm_col.pack_start(comm_cell, False) + comm_col.set_cell_data_func(comm_cell, self.data_func, "comm") + self.append_column(comm_col) + + def data_func(self, col, cell, model, iter, data): + ev = model.get_event(iter) + #ev = model.get_value(iter, 0) + if not ev: + return False + + if data == "ts": + cell.set_property("markup", "%d.%d" % (ev.ts/1000000000, + ev.ts%1000000000)) + elif data == "cpu": + cell.set_property("markup", ev.cpu) + elif data == "event": + cell.set_property("markup", ev.name) + elif data == "pid": + cell.set_property("markup", ev.pid) + elif data == "comm": + cell.set_property("markup", ev.comm) + else: + print("Unknown Column:", data) + return False + + return True + + +class EventViewerApp(gtk.Window): + def __init__(self, trace): + gtk.Window.__init__(self) + + self.set_size_request(650, 400) + self.set_position(gtk.WIN_POS_CENTER) + + self.connect("destroy", gtk.main_quit) + self.set_title("Event Viewer") + + store = EventStore(trace) + view = EventView(store) + + sw = gtk.ScrolledWindow() + sw.set_policy(gtk.POLICY_NEVER, gtk.POLICY_ALWAYS) + sw.add(view) + + # track how often the treeview data_func is called + self.add(sw) + self.show_all() + + +# Basic builtin test, execute module directly +if __name__ == "__main__": + if len(sys.argv) >=2: + filename = sys.argv[1] + else: + filename = "trace.dat" + + print("Initializing trace...") + trace = Trace(filename) + print("Initializing app...") + app = EventViewerApp(trace) + print("Go!") + gtk.main() diff --git a/scripts/debug/tsync_hist.py b/scripts/debug/tsync_hist.py new file mode 100644 index 00000000..819d1e8f --- /dev/null +++ b/scripts/debug/tsync_hist.py @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2019, VMware Inc, Tzvetomir Stoyanov <tz.stoyanov@gmail.com> +# Copyright (C) 2019, VMware Inc, Yordan Karadzhov <ykaradzhov@vmware.com> + + +import matplotlib.pyplot as plt +import matplotlib.lines as mlines +import numpy as np +import sys + +def newline(p1, p2): + ax = plt.gca() + xmin, xmax = ax.get_xbound() + + if(p2[0] == p1[0]): + xmin = xmax = p1[0] + ymin, ymax = ax.get_ybound() + else: + ymax = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmax-p1[0]) + ymin = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmin-p1[0]) + + l = mlines.Line2D([xmin,xmax], [ymin,ymax], color='red') + ax.add_line(l) + return l + + +data = np.loadtxt(fname = sys.argv[1]) +selected_ts = data[-1, 1] +selected_ofs = data[-1, 0] +data = data[:-1,:] + +x = data[:, 1] - data[:, 0] + +mean = x.mean() +std = x.std() + +num_bins = 500 +min = x.min() #+ .4 * (x.max() - x.min()) +max = x.max() #- .4 * (x.max() - x.min()) +bins = np.linspace(min, max, num_bins, endpoint = False, dtype=int) + +fig, ax = plt.subplots() + +# the histogram of the data +n, bins, patches = ax.hist(x, bins, histtype=u'step'); + +ax.set_xlabel('clock offset [$\mu$s]') +ax.set_ylabel('entries') +ax.set_title("$\sigma$=%i" % std) + +x1, y1 = [selected_ofs, min], [selected_ofs, max] +newline(x1, y1) + +# Tweak spacing to prevent clipping of ylabel +fig.tight_layout() +plt.show() diff --git a/scripts/debug/tsync_readme b/scripts/debug/tsync_readme new file mode 100644 index 00000000..f3ebb25d --- /dev/null +++ b/scripts/debug/tsync_readme @@ -0,0 +1,12 @@ +PTP-like algorithm debug +======================== + +tsync_*.py scripts can be used to visualise debug files, written when the PTP-like algorithm +is compiled with TSYNC_DEBUG defined. The files are located in the guest machine: + s-cid*.txt - For each offset calculation: host and guest clocks and calculated offset. + res-cid*.txt - For each tracing session: all calculated clock offsets. + +tsync_hist.py plots a histogram, using data from a s-cid*.txt file: + "python tsync_hist.py s-cid2_1.txt" +tsync_res.py plots a line, using data from res-cid*.txt file: + "python tsync_res.py res-cid2.txt" diff --git a/scripts/debug/tsync_res.py b/scripts/debug/tsync_res.py new file mode 100644 index 00000000..7d109863 --- /dev/null +++ b/scripts/debug/tsync_res.py @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2019, VMware Inc, Tzvetomir Stoyanov <tz.stoyanov@gmail.com> +# Copyright (C) 2019, VMware Inc, Yordan Karadzhov <ykaradzhov@vmware.com> + + +import matplotlib.pyplot as plt +import matplotlib.lines as mlines +import numpy as np +import sys + +def newline(p1, p2): + ax = plt.gca() + xmin, xmax = ax.get_xbound() + + if(p2[0] == p1[0]): + xmin = xmax = p1[0] + ymin, ymax = ax.get_ybound() + else: + ymax = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmax-p1[0]) + ymin = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmin-p1[0]) + + l = mlines.Line2D([xmin,xmax], [ymin,ymax], color='red') + ax.add_line(l) + return l + +data = np.loadtxt(fname = sys.argv[1]) +x = data[:, 0] +y = data[:, 1] + +fig, ax = plt.subplots() + +ax.set_xlabel('samples (t)') +ax.set_ylabel('clock offset') +ax.set_title("$\delta$=%i ns" % (max(y) - min(y))) + +l = mlines.Line2D(x, y) +ax.add_line(l) +ax.set_xlim(min(x), max(x)) +ax.set_ylim(min(y), max(y) ) + +print(min(y), max(y), max(y) - min(y)) + +# Tweak spacing to prevent clipping of ylabel +fig.tight_layout() +plt.show() diff --git a/scripts/utils.mk b/scripts/utils.mk new file mode 100644 index 00000000..3fc2d74f --- /dev/null +++ b/scripts/utils.mk @@ -0,0 +1,210 @@ +# SPDX-License-Identifier: GPL-2.0 + +# Utils + +ifeq ($(BUILDGUI), 1) + GUI = 'GUI ' + GSPACE = +else + GUI = + GSPACE = " " +endif + + GOBJ = $(GSPACE)$(notdir $(strip $@)) + + +ifeq ($(VERBOSE),1) + Q = + S = +else + Q = @ + S = -s +endif + +# Use empty print_* macros if either SILENT or VERBOSE. +ifeq ($(findstring 1,$(SILENT)$(VERBOSE)),1) + print_compile = + print_app_build = + print_fpic_compile = + print_shared_lib_compile = + print_plugin_obj_compile = + print_plugin_build = + print_install = + print_uninstall = + print_update = + print_asciidoc = + print_xsltproc = + print_install = + hide_xsltproc_output = +else + print_compile = echo ' $(GUI)COMPILE '$(GOBJ); + print_app_build = echo ' $(GUI)BUILD '$(GOBJ); + print_fpic_compile = echo ' $(GUI)COMPILE FPIC '$(GOBJ); + print_shared_lib_compile = echo ' $(GUI)COMPILE SHARED LIB '$(GOBJ); + print_plugin_obj_compile = echo ' $(GUI)COMPILE PLUGIN OBJ '$(GOBJ); + print_plugin_build = echo ' $(GUI)BUILD PLUGIN '$(GOBJ); + print_static_lib_build = echo ' $(GUI)BUILD STATIC LIB '$(GOBJ); + print_install = echo ' $(GUI)INSTALL '$(GSPACE)$1' to $(DESTDIR_SQ)$2'; + print_update = echo ' $(GUI)UPDATE '$(GOBJ); + print_uninstall = echo ' $(GUI)UNINSTALLING $(DESTDIR_SQ)$1'; + print_asciidoc = echo ' ASCIIDOC '`basename $@`; + print_xsltproc = echo ' XSLTPROC '`basename $@`; + print_install = echo ' INSTALL '`basename $1`' to $(DESTDIR_SQ)'$2; + hide_xsltproc_output = 2> /dev/null +endif + +do_fpic_compile = \ + ($(print_fpic_compile) \ + $(CC) -c $(CPPFLAGS) $(CFLAGS) $(EXT) -fPIC $< -o $@) + +do_compile = \ + ($(if $(GENERATE_PIC), $(do_fpic_compile), \ + $(print_compile) \ + $(CC) -c $(CPPFLAGS) $(CFLAGS) $(EXT) $< -o $@)) + +do_app_build = \ + ($(print_app_build) \ + $(CC) $^ -rdynamic -Wl,-rpath=$(libdir) -o $@ $(LDFLAGS) $(CONFIG_LIBS) $(LIBS)) + +do_build_static_lib = \ + ($(print_static_lib_build) \ + $(RM) $@; $(AR) rcs $@ $^) + +do_compile_shared_library = \ + ($(print_shared_lib_compile) \ + $(CC) --shared $^ '-Wl,-soname,$(1),-rpath=$$ORIGIN' -o $@ $(LDFLAGS) $(LIBS)) + +do_compile_plugin_obj = \ + ($(print_plugin_obj_compile) \ + $(CC) -c $(CPPFLAGS) $(CFLAGS) -fPIC -o $@ $<) + +do_plugin_build = \ + ($(print_plugin_build) \ + $(CC) $(CFLAGS) $(LDFLAGS) -shared -nostartfiles -o $@ $<) + +do_compile_python_plugin_obj = \ + ($(print_plugin_obj_compile) \ + $(CC) -c $(CPPFLAGS) $(CFLAGS) $(PYTHON_DIR_SQ) $(PYTHON_INCLUDES) -fPIC -o $@ $<) + +do_python_plugin_build = \ + ($(print_plugin_build) \ + $(CC) $< -shared $(LDFLAGS) $(PYTHON_LDFLAGS) -o $@) + +define make_version.h + (echo '/* This file is automatically generated. Do not modify. */'; \ + echo \#define VERSION_CODE $(shell \ + expr $(VERSION) \* 256 + $(PATCHLEVEL)); \ + echo '#define EXTRAVERSION ' $(EXTRAVERSION); \ + echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \ + echo '#define FILE_VERSION '$(FILE_VERSION); \ + if [ -d $(src)/.git ]; then \ + d=`git diff`; \ + x=""; \ + if [ ! -z "$$d" ]; then x="+"; fi; \ + echo '#define VERSION_GIT "'$(shell \ + git log -1 --pretty=format:"%H" 2>/dev/null)$$x'"'; \ + else \ + echo '#define VERSION_GIT "not-a-git-repo"'; \ + fi \ + ) > $1 +endef + +define update_version.h + ($(call make_version.h, $@.tmp); \ + if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + rm -f $@.tmp; \ + else \ + $(print_update) \ + mv -f $@.tmp $@; \ + fi); +endef + +define update_dir + (echo $1 > $@.tmp; \ + if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + rm -f $@.tmp; \ + else \ + $(print_update) \ + mv -f $@.tmp $@; \ + fi); +endef + +define build_prefix + (echo $1 > $@.tmp; \ + if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + rm -f $@.tmp; \ + else \ + $(print_update) \ + mv -f $@.tmp $@; \ + fi); +endef + +define do_install + $(print_install) \ + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' +endef + +define do_install_data + $(print_install) \ + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) -m 644 $1 '$(DESTDIR_SQ)$2' +endef + +define do_install_pkgconfig_file + if [ -n "${pkgconfig_dir}" ]; then \ + $(call do_install,$(PKG_CONFIG_FILE),$(pkgconfig_dir),644); \ + else \ + (echo Failed to locate pkg-config directory) 1>&2; \ + fi +endef + +define do_make_pkgconfig_file + $(print_app_build) + $(Q)cp -f $(srctree)/${PKG_CONFIG_SOURCE_FILE}.template ${PKG_CONFIG_FILE}; \ + sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; \ + sed -i "s|LIB_VERSION|${LIBTRACECMD_VERSION}|g" ${PKG_CONFIG_FILE}; \ + sed -i "s|LIB_DIR|$(libdir)|g" ${PKG_CONFIG_FILE}; \ + sed -i "s|LIBTRACEFS_MIN_VERSION|$(LIBTRACEFS_MIN_VERSION)|g" ${PKG_CONFIG_FILE}; \ + sed -i "s|HEADER_DIR|$(includedir)/trace-cmd|g" ${PKG_CONFIG_FILE}; +endef + +do_asciidoc_build = \ + ($(print_asciidoc) \ + asciidoc -d manpage -b docbook -o $@ $<) + +do_xsltproc_build = \ + ($(print_xsltproc) \ + xsltproc --nonet -o $@ ${MANPAGE_DOCBOOK_XSL} $< $(hide_xsltproc_output)) + +# +# asciidoc requires a synopsis, but file format man pages (5) do +# not require them. This removes it from the file in the final step. +define remove_synopsis + (sed -e '/^\.SH "SYNOPSIS"/,/ignore/d' $1 > $1.tmp;\ + mv $1.tmp $1) +endef + +define do_install_docs + $(print_install) \ + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) -m 644 $1 '$(DESTDIR_SQ)$2' +endef + +ifneq ($(findstring $(MAKEFLAGS),s),s) +ifneq ($(V),1) + QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@; + QUIET_XMLTO = @echo ' XMLTO '$@; + QUIET_SUBDIR0 = +@subdir= + QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ + echo ' SUBDIR ' $$subdir; \ + $(MAKE) $(PRINT_DIR) -C $$subdir + export V +endif +endif diff --git a/tracecmd/.gitignore b/tracecmd/.gitignore new file mode 100644 index 00000000..30f93eb4 --- /dev/null +++ b/tracecmd/.gitignore @@ -0,0 +1 @@ +trace-cmd diff --git a/tracecmd/Makefile b/tracecmd/Makefile new file mode 100644 index 00000000..0114948f --- /dev/null +++ b/tracecmd/Makefile @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: GPL-2.0 + +VERSION := $(TC_VERSION) +PATCHLEVEL := $(TC_PATCHLEVEL) +EXTRAVERSION := $(TC_EXTRAVERSION) + +bdir:=$(obj)/tracecmd + +TC_VERSION := $(bdir)/include/tc_version.h +TARGETS = $(bdir)/trace-cmd $(TC_VERSION) + +BUILDGUI := 0 +include $(src)/scripts/utils.mk + +CFLAGS += -I$(bdir)/include + +TRACE_CMD_OBJS = +TRACE_CMD_OBJS += trace-cmd.o +TRACE_CMD_OBJS += trace-record.o +TRACE_CMD_OBJS += trace-read.o +TRACE_CMD_OBJS += trace-split.o +TRACE_CMD_OBJS += trace-listen.o +TRACE_CMD_OBJS += trace-stack.o +TRACE_CMD_OBJS += trace-hist.o +TRACE_CMD_OBJS += trace-mem.o +TRACE_CMD_OBJS += trace-snapshot.o +TRACE_CMD_OBJS += trace-stat.o +TRACE_CMD_OBJS += trace-profile.o +TRACE_CMD_OBJS += trace-stream.o +TRACE_CMD_OBJS += trace-record.o +TRACE_CMD_OBJS += trace-restore.o +TRACE_CMD_OBJS += trace-check-events.o +TRACE_CMD_OBJS += trace-show.o +TRACE_CMD_OBJS += trace-list.o +TRACE_CMD_OBJS += trace-usage.o +TRACE_CMD_OBJS += trace-dump.o +TRACE_CMD_OBJS += trace-clear.o +TRACE_CMD_OBJS += trace-vm.o +TRACE_CMD_OBJS += trace-convert.o +TRACE_CMD_OBJS += trace-agent.o +TRACE_CMD_OBJS += trace-setup-guest.o +ifeq ($(VSOCK_DEFINED), 1) +TRACE_CMD_OBJS += trace-vsock.o +endif + +ALL_OBJS := $(TRACE_CMD_OBJS:%.o=$(bdir)/%.o) + +all_objs := $(sort $(ALL_OBJS)) +all_deps := $(all_objs:$(bdir)/%.o=$(bdir)/.%.d) + +CONFIG_INCLUDES = +CONFIG_LIBS = -lrt -lpthread $(TRACE_LIBS) $(LIBZSTD_LDLAGS) +CONFIG_FLAGS = + +ifeq ($(ZLIB_INSTALLED), 1) +CONFIG_LIBS += -lz +endif + +all: $(TARGETS) + +$(bdir): + @mkdir -p $(bdir) + +$(bdir)/include: | $(bdir) + @mkdir -p $(bdir)/include + +$(TC_VERSION): force | $(bdir)/include + $(Q)$(call update_version.h) + +$(all_deps): | $(bdir) +$(all_objs): | $(bdir) + +$(bdir)/trace-cmd: $(ALL_OBJS) + $(Q)$(do_app_build) + +$(bdir)/trace-cmd: $(LIBTRACECMD_STATIC) + +$(bdir)/%.o: %.c + $(Q)$(call do_compile) + +$(all_deps): $(bdir)/.%.d: %.c + $(Q)$(CC) -M -MT $(bdir)/$*.o $(CPPFLAGS) $(CFLAGS) $< > $@ + +$(all_deps): $(TC_VERSION) + +$(all_objs): $(bdir)/%.o : $(bdir)/.%.d + +dep_includes := $(wildcard $(DEPS)) + +ifneq ($(dep_includes),) + include $(dep_includes) +endif + +clean: + $(RM) $(bdir)/*.a $(bdir)/*.so $(bdir)/*.o $(bdir)/.*.d $(TARGETS) + +force: +.PHONY: clean diff --git a/tracecmd/include/bug.h b/tracecmd/include/bug.h new file mode 100644 index 00000000..9222f935 --- /dev/null +++ b/tracecmd/include/bug.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +#ifndef __TRACE_CMD_BUG +#define __TRACE_CMD_BUG + +#define unlikely(cond) __builtin_expect(!!(cond), 0) + +#define WARN_ONCE(cond, fmt, ...) \ + ({ \ + int __c__ = cond; \ + if (unlikely(__c__)) { \ + warning(fmt, ##__VA_ARGS__); \ + } \ + __c__; \ + }) +#endif /* __TRACE_CMD_BUG */ diff --git a/tracecmd/include/list.h b/tracecmd/include/list.h new file mode 100644 index 00000000..fa0de6df --- /dev/null +++ b/tracecmd/include/list.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#ifndef __LIST_H +#define __LIST_H + +#define offset_of(type, field) __builtin_offsetof(type, field) +#define container_of(p, type, field) (type *)((long)p - offset_of(type, field)) + +struct list_head { + struct list_head *next; + struct list_head *prev; +}; + +static inline void list_head_init(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +static inline void list_add(struct list_head *p, struct list_head *head) +{ + struct list_head *next = head->next; + + p->prev = head; + p->next = next; + next->prev = p; + head->next = p; +} + +static inline void list_add_tail(struct list_head *p, struct list_head *head) +{ + struct list_head *prev = head->prev; + + p->prev = prev; + p->next = head; + prev->next = p; + head->prev = p; +} + +static inline void list_del(struct list_head *p) +{ + struct list_head *next = p->next; + struct list_head *prev = p->prev; + + next->prev = prev; + prev->next = next; +} + +static inline int list_empty(struct list_head *list) +{ + return list->next == list; +} + +#define list_for_each_entry(p, list, field) \ + for (p = container_of((list)->next, typeof(*p), field); \ + &(p)->field != list; \ + p = container_of((p)->field.next, typeof(*p), field)) + +#define list_for_each_entry_safe(p, n, list, field) \ + for (p = container_of((list)->next, typeof(*p), field), \ + n = container_of((p)->field.next, typeof(*p), field); \ + &(p)->field != list; \ + p = n, n = container_of((p)->field.next, typeof(*p), field)) + +#endif /* __LIST_H */ diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h new file mode 100644 index 00000000..e3fec131 --- /dev/null +++ b/tracecmd/include/trace-local.h @@ -0,0 +1,437 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#ifndef __TRACE_LOCAL_H +#define __TRACE_LOCAL_H + +#include <sys/types.h> +#include <dirent.h> /* for DIR */ +#include <ctype.h> /* for isdigit() */ +#include <errno.h> +#include <limits.h> + +#include "trace-cmd-private.h" +#include "event-utils.h" + +#define TRACE_AGENT_DEFAULT_PORT 823 + +#define DEFAULT_INPUT_FILE "trace.dat" +#define GUEST_PIPE_NAME "trace-pipe-cpu" +#define GUEST_DIR_FMT "/var/lib/trace-cmd/virt/%s" +#define GUEST_FIFO_FMT GUEST_DIR_FMT "/" GUEST_PIPE_NAME "%d" +#define VIRTIO_FIFO_FMT "/dev/virtio-ports/" GUEST_PIPE_NAME "%d" + +/* fix stupid glib guint64 typecasts and printf formats */ +typedef unsigned long long u64; + +struct buffer_instance; + +#define __printf(a, b) __attribute__((format(printf,a,b))) + +__printf(1,2) +void warning(const char *fmt, ...); + +/* for local shared information with trace-cmd executable */ + +void usage(char **argv); + +extern int silence_warnings; +extern int show_status; + +int trace_set_verbose(char *level); + +enum port_type { + USE_UDP = 0, /* Default setting */ + USE_TCP, + USE_VSOCK +}; + +struct pid_record_data { + int pid; + int brass[2]; + int cpu; + int closed; + struct tracecmd_input *stream; + struct buffer_instance *instance; + struct tep_record *record; +}; + +void show_file(const char *name); + +struct tracecmd_input *read_trace_header(const char *file, int flags); +int read_trace_files(void); + +void trace_record(int argc, char **argv); + +void trace_stop(int argc, char **argv); + +void trace_restart(int argc, char **argv); + +void trace_reset(int argc, char **argv); + +void trace_start(int argc, char **argv); + +void trace_set(int argc, char **argv); + +void trace_extract(int argc, char **argv); + +void trace_stream(int argc, char **argv); + +void trace_profile(int argc, char **argv); + +void trace_report(int argc, char **argv); + +void trace_split(int argc, char **argv); + +void trace_listen(int argc, char **argv); + +void trace_agent(int argc, char **argv); + +void trace_setup_guest(int argc, char **argv); + +void trace_restore(int argc, char **argv); + +void trace_clear(int argc, char **argv); + +void trace_check_events(int argc, char **argv); + +void trace_stack(int argc, char **argv); + +void trace_option(int argc, char **argv); + +void trace_hist(int argc, char **argv); + +void trace_snapshot(int argc, char **argv); + +void trace_mem(int argc, char **argv); + +void trace_stat(int argc, char **argv); + +void trace_show(int argc, char **argv); + +void trace_list(int argc, char **argv); + +void trace_usage(int argc, char **argv); + +void trace_dump(int argc, char **argv); + +void trace_convert(int argc, char **argv); + +int trace_record_agent(struct tracecmd_msg_handle *msg_handle, + int cpus, int *fds, + int argc, char **argv, bool use_fifos, + unsigned long long trace_id, const char *host); + +struct hook_list; + +void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hooks, + int global); +int do_trace_profile(void); +void trace_profile_set_merge_like_comms(void); + +struct tracecmd_input * +trace_stream_init(struct buffer_instance *instance, int cpu, int fd, int cpus, + struct hook_list *hooks, + tracecmd_handle_init_func handle_init, int global); +int trace_stream_read(struct pid_record_data *pids, int nr_pids, struct timeval *tv); + +void trace_show_data(struct tracecmd_input *handle, struct tep_record *record); + +/* --- event interation --- */ + +/* + * Use this to iterate through the event directories + */ + + +enum event_process { + PROCESSED_NONE, + PROCESSED_EVENT, + PROCESSED_SYSTEM +}; + +enum process_type { + PROCESS_EVENT, + PROCESS_SYSTEM +}; + +struct event_iter { + DIR *system_dir; + DIR *event_dir; + struct dirent *system_dent; + struct dirent *event_dent; +}; + +enum event_iter_type { + EVENT_ITER_NONE, + EVENT_ITER_SYSTEM, + EVENT_ITER_EVENT +}; + +struct event_iter *trace_event_iter_alloc(const char *path); +enum event_iter_type trace_event_iter_next(struct event_iter *iter, + const char *path, const char *system); +void trace_event_iter_free(struct event_iter *iter); + +char *append_file(const char *dir, const char *name); +char *get_file_content(const char *file); + +char *strstrip(char *str); + +/* --- instance manipulation --- */ + +enum buffer_instance_flags { + BUFFER_FL_KEEP = 1 << 0, + BUFFER_FL_PROFILE = 1 << 1, + BUFFER_FL_GUEST = 1 << 2, + BUFFER_FL_AGENT = 1 << 3, + BUFFER_FL_HAS_CLOCK = 1 << 4, + BUFFER_FL_TSC2NSEC = 1 << 5, + BUFFER_FL_NETWORK = 1 << 6, +}; + +struct func_list { + struct func_list *next; + const char *func; + const char *mod; +}; + +struct pid_addr_maps { + struct pid_addr_maps *next; + struct tracecmd_proc_addr_map *lib_maps; + unsigned int nr_lib_maps; + char *proc_name; + int pid; +}; + +struct opt_list { + struct opt_list *next; + const char *option; +}; + +struct filter_pids { + struct filter_pids *next; + int pid; + int exclude; +}; + +struct tsc_nsec { + int mult; + int shift; + unsigned long long offset; +}; + +struct buffer_instance { + struct buffer_instance *next; + char *name; + struct tracefs_instance *tracefs; + unsigned long long trace_id; + char *cpumask; + char *output_file; + struct event_list *events; + struct event_list **event_next; + bool delete; + + struct event_list *sched_switch_event; + struct event_list *sched_wakeup_event; + struct event_list *sched_wakeup_new_event; + + const char *plugin; + char *filter_mod; + struct func_list *filter_funcs; + struct func_list *notrace_funcs; + + struct opt_list *options; + struct filter_pids *filter_pids; + struct filter_pids *process_pids; + char *common_pid_filter; + int nr_filter_pids; + int len_filter_pids; + int nr_process_pids; + bool ptrace_child; + + int have_set_event_pid; + int have_event_fork; + int have_func_fork; + int get_procmap; + + const char *clock; + unsigned int *client_ports; + + struct trace_seq *s_save; + struct trace_seq *s_print; + + struct tracecmd_input *handle; + + struct tracecmd_msg_handle *msg_handle; + struct tracecmd_output *network_handle; + const char *host; + + struct pid_addr_maps *pid_maps; + + char *max_graph_depth; + + int flags; + int tracing_on_init_val; + int tracing_on_fd; + int buffer_size; + int cpu_count; + + int argc; + char **argv; + + struct addrinfo *result; + unsigned int cid; + unsigned int port; + int *fds; + bool use_fifos; + + enum port_type port_type; /* Default to USE_UDP (zero) */ + int tsync_loop_interval; + struct tracecmd_time_sync *tsync; +}; + +void init_top_instance(void); + +extern struct buffer_instance top_instance; +extern struct buffer_instance *buffer_instances; +extern struct buffer_instance *first_instance; + +#define for_each_instance(i) for (i = buffer_instances; i; i = (i)->next) +#define for_all_instances(i) for (i = first_instance; i; \ + i = i == &top_instance ? buffer_instances : (i)->next) + +#define is_agent(instance) ((instance)->flags & BUFFER_FL_AGENT) +#define is_guest(instance) ((instance)->flags & BUFFER_FL_GUEST) +#define is_network(instance) ((instance)->flags & BUFFER_FL_NETWORK) + +#define START_PORT_SEARCH 1500 +#define MAX_PORT_SEARCH 6000 + +struct sockaddr_storage; + +int trace_net_make(int port, enum port_type type); +int trace_net_search(int start_port, int *sfd, enum port_type type); +int trace_net_print_connection(int fd); +bool trace_net_cmp_connection(struct sockaddr_storage *addr, const char *name); +bool trace_net_cmp_connection_fd(int fd, const char *name); + +struct buffer_instance *allocate_instance(const char *name); +void add_instance(struct buffer_instance *instance, int cpu_count); +void update_first_instance(struct buffer_instance *instance, int topt); + +void show_instance_file(struct buffer_instance *instance, const char *name); +void show_options(const char *prefix, struct buffer_instance *buffer); + +struct trace_guest { + struct tracefs_instance *instance; + char *name; + int cid; + int pid; + int cpu_max; + int *cpu_pid; + int *task_pids; +}; +struct trace_guest *trace_get_guest(unsigned int cid, const char *name); +bool trace_have_guests_pid(void); +void read_qemu_guests(void); +int get_guest_pid(unsigned int guest_cid); +int get_guest_vcpu_pid(unsigned int guest_cid, unsigned int guest_vcpu); + +/* moved from trace-cmd.h */ +void tracecmd_remove_instances(void); +int tracecmd_add_event(const char *event_str, int stack); +void tracecmd_enable_events(void); +void tracecmd_disable_all_tracing(int disable_tracer); +void tracecmd_disable_tracing(void); +void tracecmd_enable_tracing(void); +void tracecmd_stat_cpu(struct trace_seq *s, int cpu); + +int tracecmd_host_tsync(struct buffer_instance *instance, + unsigned int tsync_port); +void tracecmd_host_tsync_complete(struct buffer_instance *instance); +const char *tracecmd_guest_tsync(struct tracecmd_tsync_protos *tsync_protos, + char *clock, unsigned int *tsync_port, + pthread_t *thr_id); + +int trace_make_vsock(unsigned int port); +int trace_get_vsock_port(int sd, unsigned int *port); +int trace_open_vsock(unsigned int cid, unsigned int port); + +int get_local_cid(unsigned int *cid); + +char *trace_get_guest_file(const char *file, const char *guest); + +#ifdef VSOCK +int trace_vsock_open(unsigned int cid, unsigned int port); +int trace_vsock_make(unsigned int port); +int trace_vsock_make_any(void); +int get_vsocket_params(int fd, unsigned int *lcid, unsigned int *rcid); +int trace_vsock_get_port(int sd, unsigned int *port); +bool trace_vsock_can_splice_read(void); +int trace_vsock_local_cid(void); +int trace_vsock_print_connection(int fd); +#else +static inline int trace_vsock_open(unsigned int cid, unsigned int port) +{ + return -ENOTSUP; +} + +static inline int trace_vsock_make(unsigned int port) +{ + return -ENOTSUP; + +} + +static inline int trace_vsock_make_any(void) +{ + return -ENOTSUP; + +} + +static inline int get_vsocket_params(int fd, unsigned int *lcid, unsigned int *rcid) +{ + return -ENOTSUP; +} + +static inline int trace_vsock_get_port(int sd, unsigned int *port) +{ + return -ENOTSUP; +} + +static inline bool trace_vsock_can_splice_read(void) +{ + return false; +} + +static inline int trace_vsock_local_cid(void) +{ + return -ENOTSUP; +} +static inline int trace_vsock_print_connection(int fd) +{ + return -1; +} +#endif /* VSOCK */ + +/* No longer in event-utils.h */ +__printf(1,2) +void __noreturn die(const char *fmt, ...); /* Can be overriden */ +void *malloc_or_die(unsigned int size); /* Can be overridden */ +__printf(1,2) +void __noreturn __die(const char *fmt, ...); +void __noreturn _vdie(const char *fmt, va_list ap); + +static inline bool is_digits(const char *s) +{ + for (; *s; s++) + if (!isdigit(*s)) + return false; + return true; +} + +bool trace_tsc2nsec_is_supported(void); + +#endif /* __TRACE_LOCAL_H */ diff --git a/tracecmd/trace-agent.c b/tracecmd/trace-agent.c new file mode 100644 index 00000000..f0723a66 --- /dev/null +++ b/tracecmd/trace-agent.c @@ -0,0 +1,384 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 VMware Inc, Slavomir Kaslev <kaslevs@vmware.com> + * + * based on prior implementation by Yoshihiro Yunomae + * Copyright (C) 2013 Hitachi, Ltd. + * Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com> + */ + +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <unistd.h> +#include <pthread.h> + +#include "trace-local.h" +#include "trace-msg.h" + +#define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__) + +static void make_vsocks(int nr, int *fds, unsigned int *ports) +{ + unsigned int port; + int i, fd, ret; + + for (i = 0; i < nr; i++) { + fd = trace_vsock_make_any(); + if (fd < 0) + die("Failed to open vsocket"); + + ret = trace_vsock_get_port(fd, &port); + if (ret < 0) + die("Failed to get vsocket address"); + + fds[i] = fd; + ports[i] = port; + } +} + +static void make_net(int nr, int *fds, unsigned int *ports) +{ + int port; + int i, fd; + int start_port = START_PORT_SEARCH; + + for (i = 0; i < nr; i++) { + port = trace_net_search(start_port, &fd, USE_TCP); + if (port < 0) + die("Failed to open socket"); + if (listen(fd, 5) < 0) + die("Failed to listen on port %d\n", port); + fds[i] = fd; + ports[i] = port; + dprint("CPU[%d]: fd:%d port:%d\n", i, fd, port); + start_port = port + 1; + } +} + +static void make_sockets(int nr, int *fds, unsigned int *ports, + const char * network) +{ + if (network) + return make_net(nr, fds, ports); + else + return make_vsocks(nr, fds, ports); +} + +static int open_agent_fifos(int nr_cpus, int *fds) +{ + char path[PATH_MAX]; + int i, fd, ret; + + for (i = 0; i < nr_cpus; i++) { + snprintf(path, sizeof(path), VIRTIO_FIFO_FMT, i); + fd = open(path, O_WRONLY); + if (fd < 0) { + ret = -errno; + goto cleanup; + } + + fds[i] = fd; + } + + return 0; + +cleanup: + while (--i >= 0) + close(fds[i]); + + return ret; +} + +static char *get_clock(int argc, char **argv) +{ + int i; + + if (!argc || !argv) + return NULL; + + for (i = 0; i < argc - 1; i++) { + if (!strcmp("-C", argv[i])) + return argv[i+1]; + } + return NULL; +} + +static void trace_print_connection(int fd, const char *network) +{ + int ret; + + if (network) + ret = trace_net_print_connection(fd); + else + ret = trace_vsock_print_connection(fd); + if (ret < 0) + tracecmd_debug("Could not print connection fd:%d\n", fd); +} + +static void agent_handle(int sd, int nr_cpus, int page_size, const char *network) +{ + struct tracecmd_tsync_protos *tsync_protos = NULL; + struct tracecmd_time_sync *tsync = NULL; + struct tracecmd_msg_handle *msg_handle; + char *tsync_proto = NULL; + unsigned long long trace_id; + unsigned int remote_id; + unsigned int local_id; + unsigned int tsync_port = 0; + unsigned int *ports; + char **argv = NULL; + int argc = 0; + bool use_fifos; + int *fds; + int ret; + int fd; + + fds = calloc(nr_cpus, sizeof(*fds)); + ports = calloc(nr_cpus, sizeof(*ports)); + if (!fds || !ports) + die("Failed to allocate memory"); + + msg_handle = tracecmd_msg_handle_alloc(sd, 0); + if (!msg_handle) + die("Failed to allocate message handle"); + + ret = tracecmd_msg_recv_trace_req(msg_handle, &argc, &argv, + &use_fifos, &trace_id, + &tsync_protos); + if (ret < 0) + die("Failed to receive trace request"); + + if (use_fifos && open_agent_fifos(nr_cpus, fds)) + use_fifos = false; + + if (!use_fifos) + make_sockets(nr_cpus, fds, ports, network); + if (tsync_protos && tsync_protos->names) { + if (network) { + /* For now just use something */ + remote_id = 2; + local_id = 1; + tsync_port = trace_net_search(START_PORT_SEARCH, &fd, USE_TCP); + if (listen(fd, 5) < 0) + die("Failed to listen on %d\n", tsync_port); + } else { + if (get_vsocket_params(msg_handle->fd, &local_id, + &remote_id)) { + warning("Failed to get local and remote ids"); + /* Just make something up */ + remote_id = -1; + local_id = -2; + } + fd = trace_vsock_make_any(); + if (fd >= 0 && + trace_vsock_get_port(fd, &tsync_port) < 0) { + close(fd); + fd = -1; + } + } + if (fd >= 0) { + tsync = tracecmd_tsync_with_host(fd, tsync_protos, + get_clock(argc, argv), + remote_id, local_id); + } + if (tsync) { + tracecmd_tsync_get_selected_proto(tsync, &tsync_proto); + } else { + warning("Failed to negotiate timestamps synchronization with the host"); + if (fd >= 0) + close(fd); + } + } + trace_id = tracecmd_generate_traceid(); + ret = tracecmd_msg_send_trace_resp(msg_handle, nr_cpus, page_size, + ports, use_fifos, trace_id, + tsync_proto, tsync_port); + if (ret < 0) + die("Failed to send trace response"); + + trace_record_agent(msg_handle, nr_cpus, fds, argc, argv, + use_fifos, trace_id, network); + + if (tsync) { + tracecmd_tsync_with_host_stop(tsync); + tracecmd_tsync_free(tsync); + } + + if (tsync_protos) { + free(tsync_protos->names); + free(tsync_protos); + } + free(argv[0]); + free(argv); + free(ports); + free(fds); + tracecmd_msg_handle_close(msg_handle); + exit(0); +} + +static volatile pid_t handler_pid; + +static void handle_sigchld(int sig) +{ + int wstatus; + pid_t pid; + + for (;;) { + pid = waitpid(-1, &wstatus, WNOHANG); + if (pid <= 0) + break; + + if (pid == handler_pid) + handler_pid = 0; + } +} + +static pid_t do_fork() +{ + /* in debug mode, we do not fork off children */ + if (tracecmd_get_debug()) + return 0; + + return fork(); +} + +static void agent_serve(unsigned int port, bool do_daemon, const char *network) +{ + struct sockaddr_storage net_addr; + struct sockaddr *addr = NULL; + socklen_t *addr_len_p = NULL; + socklen_t addr_len = sizeof(net_addr); + int sd, cd, nr_cpus; + unsigned int cid; + pid_t pid; + + signal(SIGCHLD, handle_sigchld); + + if (network) { + addr = (struct sockaddr *)&net_addr; + addr_len_p = &addr_len; + } + + nr_cpus = tracecmd_count_cpus(); + page_size = getpagesize(); + + if (network) { + sd = trace_net_make(port, USE_TCP); + if (listen(sd, 5) < 0) + die("Failed to listen on %d\n", port); + } else + sd = trace_vsock_make(port); + if (sd < 0) + die("Failed to open socket"); + tracecmd_tsync_init(); + + if (!network) { + cid = trace_vsock_local_cid(); + if (cid >= 0) + printf("listening on @%u:%u\n", cid, port); + } + + if (do_daemon && daemon(1, 0)) + die("daemon"); + + for (;;) { + cd = accept(sd, addr, addr_len_p); + if (cd < 0) { + if (errno == EINTR) + continue; + die("accept"); + } + if (tracecmd_get_debug()) + trace_print_connection(cd, network); + + if (network && !trace_net_cmp_connection(&net_addr, network)) { + dprint("Client does not match '%s'\n", network); + close(cd); + continue; + } + + if (handler_pid) + goto busy; + + pid = do_fork(); + if (pid == 0) { + close(sd); + signal(SIGCHLD, SIG_DFL); + agent_handle(cd, nr_cpus, page_size, network); + } + if (pid > 0) + handler_pid = pid; + +busy: + close(cd); + } +} + +enum { + OPT_verbose = 254, + DO_DEBUG = 255 +}; + +void trace_agent(int argc, char **argv) +{ + bool do_daemon = false; + unsigned int port = TRACE_AGENT_DEFAULT_PORT; + const char *network = NULL; + + if (argc < 2) + usage(argv); + + if (strcmp(argv[1], "agent") != 0) + usage(argv); + + for (;;) { + int c, option_index = 0; + static struct option long_options[] = { + {"port", required_argument, NULL, 'p'}, + {"help", no_argument, NULL, '?'}, + {"debug", no_argument, NULL, DO_DEBUG}, + {"verbose", optional_argument, NULL, OPT_verbose}, + {NULL, 0, NULL, 0} + }; + + c = getopt_long(argc-1, argv+1, "+hp:DN:", + long_options, &option_index); + if (c == -1) + break; + switch (c) { + case 'h': + usage(argv); + break; + case 'N': + network = optarg; + break; + case 'p': + port = atoi(optarg); + break; + case 'D': + do_daemon = true; + break; + case DO_DEBUG: + tracecmd_set_debug(true); + break; + case OPT_verbose: + if (trace_set_verbose(optarg) < 0) + die("invalid verbose level %s", optarg); + break; + default: + usage(argv); + } + } + + if (optind < argc-1) + usage(argv); + + agent_serve(port, do_daemon, network); +} diff --git a/tracecmd/trace-check-events.c b/tracecmd/trace-check-events.c new file mode 100644 index 00000000..46f57e17 --- /dev/null +++ b/tracecmd/trace-check-events.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdlib.h> +#include <getopt.h> +#include <errno.h> + +#include "tracefs.h" +#include "trace-local.h" + +enum { + OPT_verbose = 255, +}; + +void trace_check_events(int argc, char **argv) +{ + const char *tracing; + int ret, c; + int parsing_failures = 0; + struct tep_handle *pevent = NULL; + struct tep_plugin_list *list = NULL; + int open_flags = 0; + int option_index = 0; + static struct option long_options[] = { + {"verbose", optional_argument, NULL, OPT_verbose}, + {NULL, 0, NULL, 0} + }; + + + while ((c = getopt_long(argc-1, argv+1, "+hN", long_options, &option_index)) >= 0) { + switch (c) { + case 'h': + default: + usage(argv); + break; + case 'N': + open_flags |= TRACECMD_FL_LOAD_NO_PLUGINS; + break; + case OPT_verbose: + if (trace_set_verbose(optarg) < 0) + die("invalid verbose level %s", optarg); + break; + } + } + tracing = tracefs_tracing_dir(); + + if (!tracing) { + printf("Can not find or mount tracing directory!\n" + "Either tracing is not configured for this " + "kernel\n" + "or you do not have the proper permissions to " + "mount the directory"); + exit(EINVAL); + } + + pevent = tep_alloc(); + if (!pevent) + exit(EINVAL); + + list = trace_load_plugins(pevent, open_flags); + ret = tracefs_fill_local_events(tracing, pevent, &parsing_failures); + if (ret || parsing_failures) + ret = EINVAL; + tep_unload_plugins(list, pevent); + tep_free(pevent); + + return; +} diff --git a/tracecmd/trace-clear.c b/tracecmd/trace-clear.c new file mode 100644 index 00000000..999e80fe --- /dev/null +++ b/tracecmd/trace-clear.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * Updates: + * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com> + * + */ +#include <stdlib.h> +#include <unistd.h> +#include <getopt.h> + +#include "tracefs.h" +#include "trace-local.h" + +struct instances_list { + struct instances_list *next; + struct tracefs_instance *instance; +}; + +static int add_new_instance(struct instances_list **list, char *name) +{ + struct instances_list *new; + + if (!tracefs_instance_exists(name)) + return -1; + new = calloc(1, sizeof(*new)); + if (!new) + return -1; + new->instance = tracefs_instance_create(name); + if (!new->instance) { + free(new); + return -1; + } + + new->next = *list; + *list = new; + return 0; +} + +static int add_instance_walk(const char *name, void *data) +{ + return add_new_instance((struct instances_list **)data, (char *)name); +} + +static void clear_list(struct instances_list *list) +{ + struct instances_list *del; + + while (list) { + del = list; + list = list->next; + tracefs_instance_free(del->instance); + free(del); + } +} + +static void clear_instance_trace(struct tracefs_instance *instance) +{ + FILE *fp; + char *path; + + /* reset the trace */ + path = tracefs_instance_get_file(instance, "trace"); + fp = fopen(path, "w"); + if (!fp) + die("writing to '%s'", path); + tracefs_put_tracing_file(path); + fwrite("0", 1, 1, fp); + fclose(fp); +} + +static void clear_trace(struct instances_list *instances) +{ + if (instances) { + while (instances) { + clear_instance_trace(instances->instance); + instances = instances->next; + } + } else + clear_instance_trace(NULL); +} + +void trace_clear(int argc, char **argv) +{ + struct instances_list *instances = NULL; + bool all = false; + int c; + + for (;;) { + int option_index = 0; + static struct option long_options[] = { + {"all", no_argument, NULL, 'a'}, + {"help", no_argument, NULL, '?'}, + {NULL, 0, NULL, 0} + }; + + c = getopt_long (argc-1, argv+1, "+haB:", + long_options, &option_index); + if (c == -1) + break; + switch (c) { + case 'B': + if (add_new_instance(&instances, optarg)) + die("Failed to allocate instance %s", optarg); + break; + case 'a': + all = true; + if (tracefs_instances_walk(add_instance_walk, &instances)) + die("Failed to add all instances"); + break; + case 'h': + case '?': + default: + usage(argv); + break; + } + } + + clear_trace(instances); + if (all) + clear_trace(NULL); + clear_list(instances); + exit(0); +} diff --git a/tracecmd/trace-cmd.bash b/tracecmd/trace-cmd.bash new file mode 100644 index 00000000..66bd6f4b --- /dev/null +++ b/tracecmd/trace-cmd.bash @@ -0,0 +1,345 @@ +show_instances() +{ + local cur="$1" + local bufs=$(trace-cmd list -B) + if [ "$bufs" == "No buffer instances defined" ]; then + return 0 + fi + COMPREPLY=( $(compgen -W "${bufs}" -- "${cur}") ) + return 0 +} + +show_virt() +{ + local cur="$1" + if ! which virsh &>/dev/null; then + return 1 + fi + local virt=`virsh list | awk '/^ *[0-9]/ { print $2 }'` + COMPREPLY=( $(compgen -W "${virt}" -- "${cur}") ) + return 0 +} + +show_options() +{ + local cur="$1" + local options=$(trace-cmd list -o | sed -e 's/^\(no\)*\(.*\)/\2 no\2/') + COMPREPLY=( $(compgen -W "${options}" -- "${cur}") ) + return 0 +} + +__show_files() +{ + COMPREPLY=( $(compgen -f -- "$cur") ) + if [ ${#COMPREPLY[@]} -gt 1 ]; then + return 0; + fi + # directories get '/' instead of space + DIRS=( $(compgen -d -- "$cur")) + if [ ${#DIRS[@]} -eq 1 ]; then + compopt -o nospace + COMPREPLY="$DIRS/" + return 0; + fi + return 0 +} + +cmd_options() +{ + local type="$1" + local cur="$2" + local cmds=$(trace-cmd $type -h 2>/dev/null|grep "^ *-" | \ + sed -e 's/ *\(-[^ ]*\).*/\1/') + COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") ) + if [ ${#COMPREPLY[@]} -eq 0 ]; then + __show_files "${cur}" + fi +} + +plugin_options() +{ + local cur="$1" + + local opts=$(trace-cmd list -O | sed -ne 's/option://p') + COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) +} + +compression_param() +{ + local opts=$(trace-cmd list -c | grep -v 'Supported' | cut -d "," -f1) + opts+=" any none " + COMPREPLY=( $(compgen -W "${opts}") ) +} + +__trace_cmd_list_complete() +{ + local prev=$1 + local cur=$2 + shift 2 + local words=("$@") + + case "$prev" in + list) + local cmds=$(trace-cmd list -h |egrep "^ {10}-" | \ + sed -e 's/.*\(-.\).*/\1/') + COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") ) + ;; + *) + size=${#words[@]} + if [ $size -gt 3 ]; then + if [ "$cur" == "-" ]; then + let size=$size-3 + else + let size=$size-2 + fi + local w="${words[$size]}" + if [ "$w" == "-e" ]; then + local cmds=$(trace-cmd list -h |egrep "^ {12}-" | \ + sed -e 's/.*\(-.\).*/\1/') + COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") ) + fi + fi + ;; + esac +} + +__trace_cmd_show_complete() +{ + local prev=$1 + local cur=$2 + shift 2 + local words=("$@") + + case "$prev" in + -B) + show_instances "$cur" + ;; + *) + cmd_options show "$cur" + ;; + esac +} + +__trace_cmd_extract_complete() +{ + local prev=$1 + local cur=$2 + shift 2 + local words=("$@") + + case "$prev" in + extract) + cmd_options "$prev" "$cur" + ;; + -B) + show_instances "$cur" + ;; + *) + __show_files + ;; + esac +} + +__trace_cmd_record_complete() +{ + local prev=$1 + local cur=$2 + shift 2 + local words=("$@") + + case "$prev" in + -e) + local list=$(trace-cmd list -e "$cur") + local prefix=${cur%%:*} + if [ -z "$cur" -o "$cur" != "$prefix" ]; then + COMPREPLY=( $(compgen -W "all ${list}" -- "${cur}") ) + else + local events=$(for e in $list; do echo ${e/*:/}; done | sort -u) + local systems=$(for s in $list; do echo ${s/:*/:}; done | sort -u) + + COMPREPLY=( $(compgen -W "all ${events} ${systems}" -- "${cur}") ) + fi + + # This is still to handle the "*:*" special case + if [[ -n "$prefix" ]]; then + local reply_n=${#COMPREPLY[*]} + for (( i = 0; i < $reply_n; i++)); do + COMPREPLY[$i]=${COMPREPLY[i]##${prefix}:} + done + fi + ;; + -p) + local plugins=$(trace-cmd list -p) + COMPREPLY=( $(compgen -W "${plugins}" -- "${cur}" ) ) + ;; + -l|-n|-g) + # This is extremely slow still (may take >1sec). + local funcs=$(trace-cmd list -f | sed 's/ .*//') + COMPREPLY=( $(compgen -W "${funcs}" -- "${cur}") ) + ;; + -B) + show_instances "$cur" + ;; + -O) + show_options "$cur" + ;; + -A) + if ! show_virt "$cur"; then + cmd_options record "$cur" + fi + ;; + --compression) + compression_param + ;; + *) + # stream start and profile do not show all options + cmd_options record "$cur" + ;; + esac +} + +__trace_cmd_report_complete() +{ + local prev=$1 + local cur=$2 + shift 2 + local words=("$@") + + case "$prev" in + -O) + plugin_options "$cur" + ;; + *) + cmd_options report "$cur" + ;; + esac +} + +__trace_cmd_dump_complete() +{ + local prev=$1 + local cur=$2 + shift 2 + local words=("$@") + + case "$prev" in + -i) + __show_files + ;; + *) + cmd_options dump "$cur" + ;; + esac +} + +__trace_cmd_convert_complete() +{ + local prev=$1 + local cur=$2 + shift 2 + local words=("$@") + + case "$prev" in + -i) + __show_files + ;; + -o) + __show_files + ;; + --compression) + compression_param + ;; + *) + cmd_options convert "$cur" + ;; + esac +} + +__show_command_options() +{ + local command="$1" + local prev="$2" + local cur="$3" + local cmds=( $(trace-cmd --help 2>/dev/null | \ + grep " - " | sed 's/^ *//; s/ -.*//') ) + + for cmd in ${cmds[@]}; do + if [ $cmd == "$command" ]; then + local opts=$(trace-cmd $cmd -h 2>/dev/null|grep "^ *-" | \ + sed -e 's/ *\(-[^ ]*\).*/\1/') + if [ "$prev" == "-B" ]; then + for opt in ${opts[@]}; do + if [ "$opt" == "-B" ]; then + show_instances "$cur" + return 0 + fi + done + fi + COMPREPLY=( $(compgen -W "${opts}" -- "$cur")) + break + fi + done + if [ ${#COMPREPLY[@]} -eq 0 ]; then + __show_files "${cur}" + fi +} + +_trace_cmd_complete() +{ + local cur="" + local prev="" + local words=() + + # Not to use COMP_WORDS to avoid buggy behavior of Bash when + # handling with words including ":", like: + # + # prev="${COMP_WORDS[COMP_CWORD-1]}" + # cur="${COMP_WORDS[COMP_CWORD]}" + # + # Instead, we use _get_comp_words_by_ref() magic. + _get_comp_words_by_ref -n : cur prev words + + if [ "$prev" == "trace-cmd" ]; then + local cmds=$(trace-cmd --help 2>/dev/null | \ + grep " - " | sed 's/^ *//; s/ -.*//') + COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") ) + return; + fi + + local w="${words[1]}" + + case "$w" in + list) + __trace_cmd_list_complete "${prev}" "${cur}" ${words[@]} + return 0 + ;; + show) + __trace_cmd_show_complete "${prev}" "${cur}" ${words[@]} + return 0 + ;; + extract) + __trace_cmd_extract_complete "${prev}" "${cur}" ${words[@]} + return 0 + ;; + record|stream|start|profile) + __trace_cmd_record_complete "${prev}" "${cur}" ${words[@]} + return 0 + ;; + report) + __trace_cmd_report_complete "${prev}" "${cur}" ${words[@]} + return 0 + ;; + dump) + __trace_cmd_dump_complete "${prev}" "${cur}" ${words[@]} + return 0 + ;; + convert) + __trace_cmd_convert_complete "${prev}" "${cur}" ${words[@]} + return 0 + ;; + *) + __show_command_options "$w" "${prev}" "${cur}" + ;; + esac +} +complete -F _trace_cmd_complete trace-cmd diff --git a/tracecmd/trace-cmd.c b/tracecmd/trace-cmd.c new file mode 100644 index 00000000..69800d26 --- /dev/null +++ b/tracecmd/trace-cmd.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdio.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <dirent.h> +#include <errno.h> +#include <stdlib.h> +#include <sys/syscall.h> + +#include "trace-local.h" + +int silence_warnings; +int show_status; + +#ifndef gettid +#define gettid() syscall(__NR_gettid) +#endif + +void warning(const char *fmt, ...) +{ + va_list ap; + + if (silence_warnings) + return; + + if (errno) + perror("trace-cmd"); + errno = 0; + + va_start(ap, fmt); + fprintf(stderr, " "); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, "\n"); +} + +void *malloc_or_die(unsigned int size) +{ + void *data; + + data = malloc(size); + if (!data) + die("malloc"); + return data; +} + +void tracecmd_debug(const char *fmt, ...) +{ + va_list ap; + + if (!tracecmd_get_debug()) + return; + + va_start(ap, fmt); + printf("[%d] ", (int)gettid()); + vprintf(fmt, ap); + va_end(ap); +} + +static struct trace_log_severity { + int id; + const char *name; +} log_severity[] = { + { .id = TEP_LOG_NONE, .name = "none" }, + { .id = TEP_LOG_CRITICAL, .name = "crit" }, + { .id = TEP_LOG_ERROR, .name = "err" }, + { .id = TEP_LOG_WARNING, .name = "warn" }, + { .id = TEP_LOG_INFO, .name = "info" }, + { .id = TEP_LOG_DEBUG, .name = "debug" }, + { .id = TEP_LOG_ALL, .name = "all" }, +}; + +int trace_set_verbose(char *level) +{ + int id; + + /* Default level is info */ + if (!level) + level = "info"; + + if (isdigit(level[0])) { + id = atoi(level); + if (id >= TEP_LOG_NONE) { + if (id > TEP_LOG_ALL) + id = TEP_LOG_ALL; + tracecmd_set_loglevel(id); + return 0; + } + } else { + int size = ARRAY_SIZE(log_severity); + int i; + + for (i = 0; i < size; i++) { + if (!strncmp(level, log_severity[i].name, strlen(log_severity[i].name))) { + tracecmd_set_loglevel(log_severity[i].id); + return 0; + } + } + } + + return -1; +} + +/** + * struct command + * @name command name + * @run function to execute on command `name` + */ +struct command { + char *name; + void (*run)(int argc, char **argv); +}; + + +/** + * Lookup table that maps command names to functions + */ +struct command commands[] = { + {"report", trace_report}, + {"snapshot", trace_snapshot}, + {"hist", trace_hist}, + {"mem", trace_mem}, + {"listen", trace_listen}, + {"agent", trace_agent}, + {"setup-guest", trace_setup_guest}, + {"split", trace_split}, + {"restore", trace_restore}, + {"stack", trace_stack}, + {"check-events", trace_check_events}, + {"record", trace_record}, + {"start", trace_start}, + {"set", trace_set}, + {"extract", trace_extract}, + {"stop", trace_stop}, + {"stream", trace_stream}, + {"profile", trace_profile}, + {"restart", trace_restart}, + {"clear", trace_clear}, + {"reset", trace_reset}, + {"stat", trace_stat}, + {"options", trace_option}, + {"show", trace_show}, + {"list", trace_list}, + {"help", trace_usage}, + {"dump", trace_dump}, + {"convert", trace_convert}, + {"-h", trace_usage}, +}; + +int main (int argc, char **argv) +{ + int i; + + errno = 0; + + if (argc < 2) + trace_usage(argc, argv); + + for (i = 0; i < ARRAY_SIZE(commands); ++i) { + if (strcmp(argv[1], commands[i].name) == 0 ){ + commands[i].run(argc, argv); + goto out; + } + } + + /* No valid command found, show help */ + trace_usage(argc, argv); +out: + exit(0); +} diff --git a/tracecmd/trace-convert.c b/tracecmd/trace-convert.c new file mode 100644 index 00000000..88935dc7 --- /dev/null +++ b/tracecmd/trace-convert.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com> + */ +#include <stdlib.h> +#include <unistd.h> +#include <getopt.h> +#include <fcntl.h> +#include <errno.h> + +#include "trace-local.h" +#include "trace-cmd.h" +#include "trace-cmd-private.h" + +static void convert_file(const char *in, const char *out, int file_version, char *compr) +{ + struct tracecmd_input *ihandle; + struct tracecmd_output *ohandle; + + ihandle = tracecmd_open_head(in, 0); + if (!ihandle) + die("error reading %s", in); + + ohandle = tracecmd_copy(ihandle, out, TRACECMD_FILE_CPU_FLYRECORD, file_version, compr); + if (!ohandle) + die("error writing %s", out); + + tracecmd_output_close(ohandle); + tracecmd_close(ihandle); +} + +enum { + OPT_file_version = 254, + OPT_compression = 255, +}; + +void trace_convert(int argc, char **argv) +{ + char *input_file = NULL; + char *output_file = NULL; + char *compression = NULL; + int file_version = tracecmd_default_file_version(); + int c; + + if (argc < 2) + usage(argv); + + if (strcmp(argv[1], "convert") != 0) + usage(argv); + + for (;;) { + int option_index = 0; + static struct option long_options[] = { + {"compression", required_argument, NULL, OPT_compression}, + {"file-version", required_argument, NULL, OPT_file_version}, + {"help", no_argument, NULL, '?'}, + {NULL, 0, NULL, 0} + }; + + c = getopt_long (argc-1, argv+1, "+hi:o:", long_options, &option_index); + if (c == -1) + break; + switch (c) { + case 'i': + if (input_file) + die("Only one input file is supported, %s already set", + input_file); + input_file = optarg; + break; + case 'o': + if (output_file) + die("Only one output file is supported, %s already set", + output_file); + output_file = optarg; + break; + case OPT_compression: + if (strcmp(optarg, "any") && strcmp(optarg, "none") && + !tracecmd_compress_is_supported(optarg, NULL)) + die("Compression algorithm %s is not supported", optarg); + compression = optarg; + break; + case OPT_file_version: + file_version = atoi(optarg); + if (file_version < FILE_VERSION_MIN || file_version > FILE_VERSION_MAX) + die("Unsupported file version %d, " + "supported versions are from %d to %d", + file_version, FILE_VERSION_MIN, FILE_VERSION_MAX); + + break; + case 'h': + case '?': + default: + usage(argv); + } + } + + if ((argc - optind) >= 2) { + if (output_file) + usage(argv); + output_file = argv[optind + 1]; + } + + if (!input_file) + input_file = DEFAULT_INPUT_FILE; + if (!output_file) + usage(argv); + + convert_file(input_file, output_file, file_version, compression); +} diff --git a/tracecmd/trace-dump.c b/tracecmd/trace-dump.c new file mode 100644 index 00000000..22e3d871 --- /dev/null +++ b/tracecmd/trace-dump.c @@ -0,0 +1,1355 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * Updates: + * Copyright (C) 2019, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com> + */ +#include <stdlib.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> + +#include "trace-local.h" + +#define TRACING_STR "tracing" +#define HEAD_PAGE_STR "header_page" +#define HEAD_PAGE_EVENT "header_event" +#define HEAD_OPTIONS "options " +#define HEAD_LATENCY "latency " +#define HEAD_FLYRECORD "flyrecord" + +#define DUMP_SIZE 1024 + +static struct tep_handle *tep; +static unsigned int trace_cpus; +static int has_clock; +static unsigned long file_version; +static bool read_compress; +static struct tracecmd_compression *compress; +static char *meta_strings; +static int meta_strings_size; + +enum dump_items { + SUMMARY = (1 << 0), + HEAD_PAGE = (1 << 1), + HEAD_EVENT = (1 << 2), + FTRACE_FORMAT = (1 << 3), + EVENT_SYSTEMS = (1 << 4), + EVENT_FORMAT = (1 << 5), + KALLSYMS = (1 << 6), + TRACE_PRINTK = (1 << 7), + CMDLINES = (1 << 8), + OPTIONS = (1 << 9), + FLYRECORD = (1 << 10), + CLOCK = (1 << 11), + SECTIONS = (1 << 12), + STRINGS = (1 << 13), +}; + +struct file_section { + int id; + unsigned long long offset; + struct file_section *next; + enum dump_items verbosity; +}; + +static struct file_section *sections; + +enum dump_items verbosity; + +#define DUMP_CHECK(X) ((X) & verbosity) + +#define do_print(ids, fmt, ...) \ + do { \ + if (!(ids) || DUMP_CHECK(ids)) \ + tracecmd_plog(fmt, ##__VA_ARGS__); \ + } while (0) + +static int read_fd(int fd, char *dst, int len) +{ + size_t size = 0; + int r; + + do { + r = read(fd, dst+size, len); + if (r > 0) { + size += r; + len -= r; + } else + break; + } while (r > 0); + + if (len) + return -1; + return size; +} + +static int read_compressed(int fd, char *dst, int len) +{ + + if (read_compress) + return tracecmd_compress_buffer_read(compress, dst, len); + + return read_fd(fd, dst, len); +} + +static int do_lseek(int fd, int offset, int whence) +{ + if (read_compress) + return tracecmd_compress_lseek(compress, offset, whence); + + return lseek64(fd, offset, whence); +} + +static int read_file_string(int fd, char *dst, int len) +{ + size_t size = 0; + int r; + + do { + r = read_compressed(fd, dst+size, 1); + if (r > 0) { + size++; + len--; + } else + break; + if (!dst[size - 1]) + break; + } while (r > 0 && len); + + if (!size || dst[size - 1]) + return -1; + return 0; +} + +static int read_file_bytes(int fd, char *dst, int len) +{ + int ret; + + ret = read_compressed(fd, dst, len); + return ret < 0 ? ret : 0; +} + +static void read_dump_string(int fd, int size, enum dump_items id) +{ + char buf[DUMP_SIZE]; + int lsize; + + while (size) { + lsize = (size < DUMP_SIZE) ? size : DUMP_SIZE - 1; + if (read_file_bytes(fd, buf, lsize)) + die("cannot read %d bytes", lsize); + buf[lsize] = 0; + do_print(id, "%s", buf); + size -= lsize; + } + + do_print(id, "\n"); +} + +static int read_file_number(int fd, void *digit, int size) +{ + unsigned long long val; + char buf[8]; + + if (size > 8) + return -1; + + if (read_file_bytes(fd, buf, size)) + return -1; + + val = tep_read_number(tep, buf, size); + switch (size) { + case 1: + *((char *)digit) = val; + break; + case 2: + *((unsigned short *)digit) = val; + break; + case 4: + *((unsigned int *)digit) = val; + break; + case 8: + *((unsigned long long *)digit) = val; + break; + default: + return -1; + } + + return 0; +} + +static const char *get_metadata_string(int offset) +{ + if (!meta_strings || offset < 0 || meta_strings_size <= offset) + return NULL; + + return meta_strings + offset; +} + +static void dump_initial_format(int fd) +{ + char magic[] = TRACECMD_MAGIC; + char buf[DUMP_SIZE]; + int val4; + + do_print(SUMMARY, "\t[Initial format]\n"); + + /* check initial bytes */ + if (read_file_bytes(fd, buf, sizeof(magic))) + die("cannot read %zu bytes magic", sizeof(magic)); + if (memcmp(buf, magic, sizeof(magic)) != 0) + die("wrong file magic"); + + /* check initial tracing string */ + if (read_file_bytes(fd, buf, strlen(TRACING_STR))) + die("cannot read %zu bytes tracing string", strlen(TRACING_STR)); + buf[strlen(TRACING_STR)] = 0; + if (strncmp(buf, TRACING_STR, strlen(TRACING_STR)) != 0) + die("wrong tracing string: %s", buf); + + /* get file version */ + if (read_file_string(fd, buf, DUMP_SIZE)) + die("no version string"); + + do_print(SUMMARY, "\t\t%s\t[Version]\n", buf); + file_version = strtol(buf, NULL, 10); + if (!file_version && errno) + die("Invalid file version string %s", buf); + if (!tracecmd_is_version_supported(file_version)) + die("Unsupported file version %lu", file_version); + + /* get file endianness*/ + if (read_file_bytes(fd, buf, 1)) + die("cannot read file endianness"); + do_print(SUMMARY, "\t\t%d\t[%s endian]\n", buf[0], buf[0]?"Big":"Little"); + + tep_set_file_bigendian(tep, buf[0]); + tep_set_local_bigendian(tep, tracecmd_host_bigendian()); + + /* get file bytes per long*/ + if (read_file_bytes(fd, buf, 1)) + die("cannot read file bytes per long"); + do_print(SUMMARY, "\t\t%d\t[Bytes in a long]\n", buf[0]); + + if (read_file_number(fd, &val4, 4)) + die("cannot read file page size"); + do_print(SUMMARY, "\t\t%d\t[Page size, bytes]\n", val4); +} + +static void dump_compress(int fd) +{ + char zname[DUMP_SIZE]; + char zver[DUMP_SIZE]; + + if (file_version < FILE_VERSION_COMPRESSION) + return; + + /* get compression header */ + if (read_file_string(fd, zname, DUMP_SIZE)) + die("no compression header"); + + if (read_file_string(fd, zver, DUMP_SIZE)) + die("no compression version"); + + do_print((SUMMARY), "\t\t%s\t[Compression algorithm]\n", zname); + do_print((SUMMARY), "\t\t%s\t[Compression version]\n", zver); + + if (strcmp(zname, "none")) { + compress = tracecmd_compress_alloc(zname, zver, fd, tep, NULL); + if (!compress) + die("cannot uncompress the file"); + } +} + +static void dump_header_page(int fd) +{ + unsigned long long size; + char buf[DUMP_SIZE]; + + do_print((SUMMARY | HEAD_PAGE), "\t[Header page, "); + + /* check header string */ + if (read_file_bytes(fd, buf, strlen(HEAD_PAGE_STR) + 1)) + die("cannot read %zu bytes header string", strlen(HEAD_PAGE_STR)); + if (strncmp(buf, HEAD_PAGE_STR, strlen(HEAD_PAGE_STR)) != 0) + die("wrong header string: %s", buf); + + if (read_file_number(fd, &size, 8)) + die("cannot read the size of the page header information"); + + do_print((SUMMARY | HEAD_PAGE), "%lld bytes]\n", size); + + read_dump_string(fd, size, HEAD_PAGE); +} + +static void dump_header_event(int fd) +{ + unsigned long long size; + char buf[DUMP_SIZE]; + + do_print((SUMMARY | HEAD_EVENT), "\t[Header event, "); + + /* check header string */ + if (read_file_bytes(fd, buf, strlen(HEAD_PAGE_EVENT) + 1)) + die("cannot read %zu bytes header string", strlen(HEAD_PAGE_EVENT)); + if (strncmp(buf, HEAD_PAGE_EVENT, strlen(HEAD_PAGE_EVENT)) != 0) + die("wrong header string: %s", buf); + + if (read_file_number(fd, &size, 8)) + die("cannot read the size of the page header information"); + + do_print((SUMMARY | HEAD_EVENT), "%lld bytes]\n", size); + + read_dump_string(fd, size, HEAD_EVENT); +} + +static void uncompress_reset(void) +{ + if (compress && file_version >= FILE_VERSION_COMPRESSION) { + read_compress = false; + tracecmd_compress_reset(compress); + } +} + +static int uncompress_block(void) +{ + int ret = 0; + + if (compress && file_version >= FILE_VERSION_COMPRESSION) { + ret = tracecmd_uncompress_block(compress); + if (!ret) + read_compress = true; + + } + + return ret; +} + +static void dump_ftrace_events_format(int fd) +{ + unsigned long long size; + unsigned int count; + + do_print((SUMMARY | FTRACE_FORMAT), "\t[Ftrace format, "); + if (read_file_number(fd, &count, 4)) + die("cannot read the count of the ftrace events"); + + do_print((SUMMARY | FTRACE_FORMAT), "%d events]\n", count); + + while (count) { + if (read_file_number(fd, &size, 8)) + die("cannot read the size of the %d ftrace event", count); + read_dump_string(fd, size, FTRACE_FORMAT); + count--; + } +} + +static void dump_events_format(int fd) +{ + unsigned long long size; + unsigned int systems; + unsigned int events; + char buf[DUMP_SIZE]; + + do_print((SUMMARY | EVENT_FORMAT | EVENT_SYSTEMS), "\t[Events format, "); + + if (read_file_number(fd, &systems, 4)) + die("cannot read the count of the event systems"); + + do_print((SUMMARY | EVENT_FORMAT | EVENT_SYSTEMS), "%d systems]\n", systems); + + while (systems) { + + if (read_file_string(fd, buf, DUMP_SIZE)) + die("cannot read the name of the %dth system", systems); + if (read_file_number(fd, &events, 4)) + die("cannot read the count of the events in system %s", + buf); + do_print(EVENT_SYSTEMS, "\t\t%s %d [system, events]\n", buf, events); + while (events) { + if (read_file_number(fd, &size, 8)) + die("cannot read the format size of the %dth event from system %s", + events, buf); + read_dump_string(fd, size, EVENT_FORMAT); + events--; + } + systems--; + } +} + +static void dump_kallsyms(int fd) +{ + unsigned int size; + + do_print((SUMMARY | KALLSYMS), "\t[Kallsyms, "); + + if (read_file_number(fd, &size, 4)) + die("cannot read the size of the kallsyms"); + + do_print((SUMMARY | KALLSYMS), "%d bytes]\n", size); + + read_dump_string(fd, size, KALLSYMS); +} + +static void dump_printk(int fd) +{ + unsigned int size; + + do_print((SUMMARY | TRACE_PRINTK), "\t[Trace printk, "); + + if (read_file_number(fd, &size, 4)) + die("cannot read the size of the trace printk"); + + do_print((SUMMARY | TRACE_PRINTK), "%d bytes]\n", size); + + read_dump_string(fd, size, TRACE_PRINTK); +} + +static void dump_cmdlines(int fd) +{ + unsigned long long size; + + do_print((SUMMARY | CMDLINES), "\t[Saved command lines, "); + + if (read_file_number(fd, &size, 8)) + die("cannot read the size of the saved command lines"); + + do_print((SUMMARY | CMDLINES), "%d bytes]\n", size); + + read_dump_string(fd, size, CMDLINES); +} + +static void dump_cpus_count(int fd) +{ + if (read_file_number(fd, &trace_cpus, 4)) + die("cannot read the cpu count"); + + do_print(SUMMARY, "\t%d [CPUs with tracing data]\n", trace_cpus); +} + +static void dump_option_string(int fd, int size, char *desc) +{ + do_print(OPTIONS, "\t\t[Option %s, %d bytes]\n", desc, size); + if (size) + read_dump_string(fd, size, OPTIONS); +} + +static void dump_section_header(int fd, enum dump_items v, unsigned short *flags) +{ + unsigned long long offset, size; + unsigned short fl; + unsigned short id; + const char *desc; + int desc_id; + + offset = lseek64(fd, 0, SEEK_CUR); + if (read_file_number(fd, &id, 2)) + die("cannot read the section id"); + + if (read_file_number(fd, &fl, 2)) + die("cannot read the section flags"); + + if (read_file_number(fd, &desc_id, 4)) + die("no section description"); + + desc = get_metadata_string(desc_id); + if (!desc) + desc = "Unknown"; + + if (read_file_number(fd, &size, 8)) + die("cannot read section size"); + + do_print(v, "\t[Section %d @ %lld: \"%s\", flags 0x%X, %lld bytes]\n", + id, offset, desc, fl, size); + + if (flags) + *flags = fl; +} + +static void dump_option_buffer(int fd, unsigned short option, int size) +{ + unsigned long long total_size = 0; + unsigned long long data_size; + unsigned long long current; + unsigned long long offset; + unsigned short flags; + char clock[DUMP_SIZE]; + char name[DUMP_SIZE]; + int page_size; + int cpus = 0; + int id; + int i; + + if (size < 8) + die("broken buffer option with size %d", size); + + if (read_file_number(fd, &offset, 8)) + die("cannot read the offset of the buffer option"); + + if (read_file_string(fd, name, DUMP_SIZE)) + die("cannot read the name of the buffer option"); + + if (file_version < FILE_VERSION_SECTIONS) { + do_print(OPTIONS|FLYRECORD, "\t\t[Option BUFFER, %d bytes]\n", size); + do_print(OPTIONS|FLYRECORD, "%lld [offset]\n", offset); + do_print(OPTIONS|FLYRECORD, "\"%s\" [name]\n", name); + return; + } + + current = lseek64(fd, 0, SEEK_CUR); + if (lseek64(fd, offset, SEEK_SET) == (off_t)-1) + die("cannot goto buffer offset %lld", offset); + + dump_section_header(fd, FLYRECORD, &flags); + + if (lseek64(fd, current, SEEK_SET) == (off_t)-1) + die("cannot go back to buffer option"); + + do_print(OPTIONS|FLYRECORD, "\t\t[Option BUFFER, %d bytes]\n", size); + do_print(OPTIONS|FLYRECORD, "%lld [offset]\n", offset); + do_print(OPTIONS|FLYRECORD, "\"%s\" [name]\n", name); + + if (read_file_string(fd, clock, DUMP_SIZE)) + die("cannot read clock of the buffer option"); + + do_print(OPTIONS|FLYRECORD, "\"%s\" [clock]\n", clock); + if (option == TRACECMD_OPTION_BUFFER) { + if (read_file_number(fd, &page_size, 4)) + die("cannot read the page size of the buffer option"); + do_print(OPTIONS|FLYRECORD, "%d [Page size, bytes]\n", page_size); + + if (read_file_number(fd, &cpus, 4)) + die("cannot read the cpu count of the buffer option"); + + do_print(OPTIONS|FLYRECORD, "%d [CPUs]:\n", cpus); + for (i = 0; i < cpus; i++) { + if (read_file_number(fd, &id, 4)) + die("cannot read the id of cpu %d from the buffer option", i); + + if (read_file_number(fd, &offset, 8)) + die("cannot read the offset of cpu %d from the buffer option", i); + + if (read_file_number(fd, &data_size, 8)) + die("cannot read the data size of cpu %d from the buffer option", i); + + total_size += data_size; + do_print(OPTIONS|FLYRECORD, " %d %lld\t%lld\t[id, data offset and size]\n", + id, offset, data_size); + } + do_print(SUMMARY, "\t\[buffer \"%s\", \"%s\" clock, %d page size, " + "%d cpus, %lld bytes flyrecord data]\n", + name, clock, page_size, cpus, total_size); + } else { + do_print(SUMMARY, "\t\[buffer \"%s\", \"%s\" clock, latency data]\n", name, clock); + } + +} + +static void dump_option_int(int fd, int size, char *desc) +{ + int val; + + do_print(OPTIONS, "\t\t[Option %s, %d bytes]\n", desc, size); + read_file_number(fd, &val, size); + do_print(OPTIONS, "%d\n", val); +} + +static void dump_option_xlong(int fd, int size, char *desc) +{ + long long val; + + do_print(OPTIONS, "\t\t[Option %s, %d bytes]\n", desc, size); + read_file_number(fd, &val, size); + do_print(OPTIONS, "0x%llX\n", val); +} + +struct time_shift_cpu { + unsigned int count; + long long *scalings; + long long *frac; + long long *offsets; + unsigned long long *times; +}; + +static void dump_option_timeshift(int fd, int size) +{ + struct time_shift_cpu *cpus_data; + long long trace_id; + unsigned int flags; + unsigned int cpus; + int i, j; + + /* + * long long int (8 bytes) trace session ID + * int (4 bytes) count of timestamp offsets. + * long long array of size [count] of times, + * when the offsets were calculated. + * long long array of size [count] of timestamp offsets. + */ + if (size < 12) { + do_print(OPTIONS, "Broken time shift option, size %s", size); + return; + } + do_print(OPTIONS, "\t\t[Option TimeShift, %d bytes]\n", size); + read_file_number(fd, &trace_id, 8); + size -= 8; + do_print(OPTIONS, "0x%llX [peer's trace id]\n", trace_id); + read_file_number(fd, &flags, 4); + size -= 4; + do_print(OPTIONS, "0x%llX [peer's protocol flags]\n", flags); + read_file_number(fd, &cpus, 4); + size -= 4; + do_print(OPTIONS, "0x%llX [peer's CPU count]\n", cpus); + cpus_data = calloc(cpus, sizeof(struct time_shift_cpu)); + if (!cpus_data) + return; + for (j = 0; j < cpus; j++) { + if (size < 4) + goto out; + read_file_number(fd, &cpus_data[j].count, 4); + size -= 4; + do_print(OPTIONS, "%lld [samples count for CPU %d]\n", cpus_data[j].count, j); + cpus_data[j].times = calloc(cpus_data[j].count, sizeof(long long)); + cpus_data[j].offsets = calloc(cpus_data[j].count, sizeof(long long)); + cpus_data[j].scalings = calloc(cpus_data[j].count, sizeof(long long)); + cpus_data[j].frac = calloc(cpus_data[j].count, sizeof(long long)); + if (!cpus_data[j].times || !cpus_data[j].offsets || + !cpus_data[j].scalings || !cpus_data[j].frac) + goto out; + for (i = 0; i < cpus_data[j].count; i++) { + if (size < 8) + goto out; + read_file_number(fd, cpus_data[j].times + i, 8); + size -= 8; + } + for (i = 0; i < cpus_data[j].count; i++) { + if (size < 8) + goto out; + read_file_number(fd, cpus_data[j].offsets + i, 8); + size -= 8; + } + for (i = 0; i < cpus_data[j].count; i++) { + if (size < 8) + goto out; + read_file_number(fd, cpus_data[j].scalings + i, 8); + size -= 8; + } + } + + if (size > 0) { + for (j = 0; j < cpus; j++) { + if (!cpus_data[j].frac) + goto out; + for (i = 0; i < cpus_data[j].count; i++) { + if (size < 8) + goto out; + read_file_number(fd, cpus_data[j].frac + i, 8); + size -= 8; + } + } + } + + for (j = 0; j < cpus; j++) { + for (i = 0; i < cpus_data[j].count; i++) + do_print(OPTIONS, "\t%lld %lld %llu %llu[offset * scaling >> fraction @ time]\n", + cpus_data[j].offsets[i], cpus_data[j].scalings[i], + cpus_data[j].frac[i], cpus_data[j].times[i]); + + } + +out: + if (j < cpus) + do_print(OPTIONS, "Broken time shift option\n"); + for (j = 0; j < cpus; j++) { + free(cpus_data[j].times); + free(cpus_data[j].offsets); + free(cpus_data[j].scalings); + free(cpus_data[j].frac); + } + free(cpus_data); +} + +void dump_option_guest(int fd, int size) +{ + unsigned long long trace_id; + char *buf, *p; + int cpu, pid; + int cpus; + int i; + + do_print(OPTIONS, "\t\t[Option GUEST, %d bytes]\n", size); + + /* + * Guest name, null terminated string + * long long (8 bytes) trace-id + * int (4 bytes) number of guest CPUs + * array of size number of guest CPUs: + * int (4 bytes) Guest CPU id + * int (4 bytes) Host PID, running the guest CPU + */ + buf = calloc(1, size); + if (!buf) + return; + if (read_file_bytes(fd, buf, size)) + goto out; + + p = buf; + do_print(OPTIONS, "%s [Guest name]\n", p); + size -= strlen(buf) + 1; + p += strlen(buf) + 1; + + if (size < sizeof(long long)) + goto out; + trace_id = tep_read_number(tep, p, sizeof(long long)); + size -= sizeof(long long); + p += sizeof(long long); + do_print(OPTIONS, "0x%llX [trace id]\n", trace_id); + + if (size < sizeof(int)) + goto out; + cpus = tep_read_number(tep, p, sizeof(int)); + size -= sizeof(int); + p += sizeof(int); + do_print(OPTIONS, "%d [Guest CPUs]\n", cpus); + + for (i = 0; i < cpus; i++) { + if (size < 2 * sizeof(int)) + goto out; + cpu = tep_read_number(tep, p, sizeof(int)); + size -= sizeof(int); + p += sizeof(int); + pid = tep_read_number(tep, p, sizeof(int)); + size -= sizeof(int); + p += sizeof(int); + do_print(OPTIONS, " %d %d [guest cpu, host pid]\n", cpu, pid); + } + +out: + free(buf); +} + +void dump_option_tsc2nsec(int fd, int size) +{ + int mult, shift; + unsigned long long offset; + + do_print(OPTIONS, "\n\t\t[Option TSC2NSEC, %d bytes]\n", size); + + if (read_file_number(fd, &mult, 4)) + die("cannot read tsc2nsec multiplier"); + if (read_file_number(fd, &shift, 4)) + die("cannot read tsc2nsec shift"); + if (read_file_number(fd, &offset, 8)) + die("cannot read tsc2nsec offset"); + do_print(OPTIONS, "%d %d %llu [multiplier, shift, offset]\n", mult, shift, offset); +} + +static void dump_option_section(int fd, unsigned int size, + unsigned short id, char *desc, enum dump_items v) +{ + struct file_section *sec; + + sec = calloc(1, sizeof(struct file_section)); + if (!sec) + die("cannot allocate new section"); + + sec->next = sections; + sections = sec; + sec->id = id; + sec->verbosity = v; + if (read_file_number(fd, &sec->offset, 8)) + die("cannot read the option %d offset", id); + + do_print(OPTIONS, "\t\t[Option %s, %d bytes] @ %lld\n", desc, size, sec->offset); +} + +static void dump_sections(int fd, int count) +{ + struct file_section *sec = sections; + unsigned short flags; + + while (sec) { + if (lseek64(fd, sec->offset, SEEK_SET) == (off_t)-1) + die("cannot goto option offset %lld", sec->offset); + + dump_section_header(fd, sec->verbosity, &flags); + + if ((flags & TRACECMD_SEC_FL_COMPRESS) && uncompress_block()) + die("cannot uncompress section block"); + + switch (sec->id) { + case TRACECMD_OPTION_HEADER_INFO: + dump_header_page(fd); + dump_header_event(fd); + break; + case TRACECMD_OPTION_FTRACE_EVENTS: + dump_ftrace_events_format(fd); + break; + case TRACECMD_OPTION_EVENT_FORMATS: + dump_events_format(fd); + break; + case TRACECMD_OPTION_KALLSYMS: + dump_kallsyms(fd); + break; + case TRACECMD_OPTION_PRINTK: + dump_printk(fd); + break; + case TRACECMD_OPTION_CMDLINES: + dump_cmdlines(fd); + break; + } + uncompress_reset(); + sec = sec->next; + } + do_print(SUMMARY|SECTIONS, "\t[%d sections]\n", count); +} + +static int dump_options_read(int fd); + +static int dump_option_done(int fd, int size) +{ + unsigned long long offset; + + do_print(OPTIONS, "\t\t[Option DONE, %d bytes]\n", size); + + if (file_version < FILE_VERSION_SECTIONS || size < 8) + return 0; + + if (read_file_number(fd, &offset, 8)) + die("cannot read the next options offset"); + + do_print(OPTIONS, "%lld\n", offset); + if (!offset) + return 0; + + if (lseek64(fd, offset, SEEK_SET) == (off_t)-1) + die("cannot goto next options offset %lld", offset); + + do_print(OPTIONS, "\n\n"); + + return dump_options_read(fd); +} + +static int dump_options_read(int fd) +{ + unsigned short flags = 0; + unsigned short option; + unsigned int size; + int count = 0; + + if (file_version >= FILE_VERSION_SECTIONS) + dump_section_header(fd, OPTIONS, &flags); + + if ((flags & TRACECMD_SEC_FL_COMPRESS) && uncompress_block()) + die("cannot uncompress file block"); + + for (;;) { + if (read_file_number(fd, &option, 2)) + die("cannot read the option id"); + if (option == TRACECMD_OPTION_DONE && file_version < FILE_VERSION_SECTIONS) + break; + if (read_file_number(fd, &size, 4)) + die("cannot read the option size"); + + count++; + switch (option) { + case TRACECMD_OPTION_DATE: + dump_option_string(fd, size, "DATE"); + break; + case TRACECMD_OPTION_CPUSTAT: + dump_option_string(fd, size, "CPUSTAT"); + break; + case TRACECMD_OPTION_BUFFER: + case TRACECMD_OPTION_BUFFER_TEXT: + dump_option_buffer(fd, option, size); + break; + case TRACECMD_OPTION_TRACECLOCK: + do_print(OPTIONS, "\t\t[Option TRACECLOCK, %d bytes]\n", size); + read_dump_string(fd, size, OPTIONS | CLOCK); + has_clock = 1; + break; + case TRACECMD_OPTION_UNAME: + dump_option_string(fd, size, "UNAME"); + break; + case TRACECMD_OPTION_HOOK: + dump_option_string(fd, size, "HOOK"); + break; + case TRACECMD_OPTION_OFFSET: + dump_option_string(fd, size, "OFFSET"); + break; + case TRACECMD_OPTION_CPUCOUNT: + dump_option_int(fd, size, "CPUCOUNT"); + break; + case TRACECMD_OPTION_VERSION: + dump_option_string(fd, size, "VERSION"); + break; + case TRACECMD_OPTION_PROCMAPS: + dump_option_string(fd, size, "PROCMAPS"); + break; + case TRACECMD_OPTION_TRACEID: + dump_option_xlong(fd, size, "TRACEID"); + break; + case TRACECMD_OPTION_TIME_SHIFT: + dump_option_timeshift(fd, size); + break; + case TRACECMD_OPTION_GUEST: + dump_option_guest(fd, size); + break; + case TRACECMD_OPTION_TSC2NSEC: + dump_option_tsc2nsec(fd, size); + break; + case TRACECMD_OPTION_HEADER_INFO: + dump_option_section(fd, size, option, "HEADERS", HEAD_PAGE | HEAD_EVENT); + break; + case TRACECMD_OPTION_FTRACE_EVENTS: + dump_option_section(fd, size, option, "FTRACE EVENTS", FTRACE_FORMAT); + break; + case TRACECMD_OPTION_EVENT_FORMATS: + dump_option_section(fd, size, option, + "EVENT FORMATS", EVENT_SYSTEMS | EVENT_FORMAT); + break; + case TRACECMD_OPTION_KALLSYMS: + dump_option_section(fd, size, option, "KALLSYMS", KALLSYMS); + break; + case TRACECMD_OPTION_PRINTK: + dump_option_section(fd, size, option, "PRINTK", TRACE_PRINTK); + break; + case TRACECMD_OPTION_CMDLINES: + dump_option_section(fd, size, option, "CMDLINES", CMDLINES); + break; + case TRACECMD_OPTION_DONE: + uncompress_reset(); + count += dump_option_done(fd, size); + return count; + default: + do_print(OPTIONS, " %d %d\t[Unknown option, size - skipping]\n", + option, size); + do_lseek(fd, size, SEEK_CUR); + break; + } + } + uncompress_reset(); + return count; +} + +static void dump_options(int fd) +{ + int count; + + count = dump_options_read(fd); + do_print(SUMMARY|OPTIONS, "\t[%d options]\n", count); +} + +static void dump_latency(int fd) +{ + do_print(SUMMARY, "\t[Latency tracing data]\n"); +} + +static void dump_clock(int fd) +{ + long long size; + char *clock; + + do_print((SUMMARY | CLOCK), "\t[Tracing clock]\n"); + if (!has_clock) { + do_print((SUMMARY | CLOCK), "\t\t No tracing clock saved in the file\n"); + return; + } + if (read_file_number(fd, &size, 8)) + die("cannot read clock size"); + clock = calloc(1, size); + if (!clock) + die("cannot allocate clock %lld bytes", size); + + if (read_file_bytes(fd, clock, size)) + die("cannot read clock %lld bytes", size); + clock[size] = 0; + do_print((SUMMARY | CLOCK), "\t\t%s\n", clock); + free(clock); +} + +static void dump_flyrecord(int fd) +{ + long long cpu_offset; + long long cpu_size; + int i; + + do_print((SUMMARY | FLYRECORD), "\t[Flyrecord tracing data]\n"); + + for (i = 0; i < trace_cpus; i++) { + if (read_file_number(fd, &cpu_offset, 8)) + die("cannot read the cpu %d offset", i); + if (read_file_number(fd, &cpu_size, 8)) + die("cannot read the cpu %d size", i); + do_print(FLYRECORD, "\t %10.lld %10.lld\t[offset, size of cpu %d]\n", + cpu_offset, cpu_size, i); + } + dump_clock(fd); +} + +static void dump_therest(int fd) +{ + char str[10]; + + for (;;) { + if (read_file_bytes(fd, str, 10)) + die("cannot read the rest of the header"); + + if (strncmp(str, HEAD_OPTIONS, 10) == 0) + dump_options(fd); + else if (strncmp(str, HEAD_LATENCY, 10) == 0) + dump_latency(fd); + else if (strncmp(str, HEAD_FLYRECORD, 10) == 0) + dump_flyrecord(fd); + else { + lseek64(fd, -10, SEEK_CUR); + break; + } + } +} + +static void dump_v6_file(int fd) +{ + dump_header_page(fd); + dump_header_event(fd); + dump_ftrace_events_format(fd); + dump_events_format(fd); + dump_kallsyms(fd); + dump_printk(fd); + dump_cmdlines(fd); + dump_cpus_count(fd); + dump_therest(fd); +} + +static int read_metadata_strings(int fd, unsigned long long size) +{ + char *str, *strings; + int psize; + int ret; + + strings = realloc(meta_strings, meta_strings_size + size); + if (!strings) + return -1; + meta_strings = strings; + + ret = read_file_bytes(fd, meta_strings + meta_strings_size, size); + if (ret < 0) + return -1; + + do_print(STRINGS, "\t[String @ offset]\n"); + psize = 0; + while (psize < size) { + str = meta_strings + meta_strings_size + psize; + do_print(STRINGS, "\t\t\"%s\" @ %d\n", str, meta_strings_size + psize); + psize += strlen(str) + 1; + } + + meta_strings_size += size; + + return 0; +} + +static void get_meta_strings(int fd) +{ + unsigned long long offset, size; + unsigned int csize, rsize; + unsigned short fl, id; + int desc_id; + + offset = lseek64(fd, 0, SEEK_CUR); + do { + if (read_file_number(fd, &id, 2)) + break; + if (read_file_number(fd, &fl, 2)) + die("cannot read section flags"); + if (read_file_number(fd, &desc_id, 4)) + die("cannot read section description"); + if (read_file_number(fd, &size, 8)) + die("cannot read section size"); + if (id == TRACECMD_OPTION_STRINGS) { + if ((fl & TRACECMD_SEC_FL_COMPRESS)) { + read_file_number(fd, &csize, 4); + read_file_number(fd, &rsize, 4); + lseek64(fd, -8, SEEK_CUR); + if (uncompress_block()) + break; + } else { + rsize = size; + } + read_metadata_strings(fd, rsize); + uncompress_reset(); + } else { + if (lseek64(fd, size, SEEK_CUR) == (off_t)-1) + break; + } + } while (1); + + if (lseek64(fd, offset, SEEK_SET) == (off_t)-1) + die("cannot restore the original file location"); +} + +static int walk_v7_sections(int fd) +{ + unsigned long long offset, soffset, size; + unsigned short fl; + unsigned short id; + int csize, rsize; + int count = 0; + int desc_id; + const char *desc; + + offset = lseek64(fd, 0, SEEK_CUR); + do { + soffset = lseek64(fd, 0, SEEK_CUR); + if (read_file_number(fd, &id, 2)) + break; + + if (read_file_number(fd, &fl, 2)) + die("cannot read section flags"); + + if (read_file_number(fd, &desc_id, 4)) + die("cannot read section description"); + + desc = get_metadata_string(desc_id); + if (!desc) + desc = "Unknown"; + + if (read_file_number(fd, &size, 8)) + die("cannot read section size"); + + if (id >= TRACECMD_OPTION_MAX) + do_print(SECTIONS, "Unknown section id %d: %s", id, desc); + + count++; + if (fl & TRACECMD_SEC_FL_COMPRESS) { + if (id == TRACECMD_OPTION_BUFFER || + id == TRACECMD_OPTION_BUFFER_TEXT) { + do_print(SECTIONS, + "\t[Section %2d @ %-16lld\t\"%s\", flags 0x%X, " + "%lld compressed bytes]\n", + id, soffset, desc, fl, size); + } else { + if (read_file_number(fd, &csize, 4)) + die("cannot read section size"); + + if (read_file_number(fd, &rsize, 4)) + die("cannot read section size"); + + do_print(SECTIONS, "\t[Section %2d @ %-16lld\t\"%s\", flags 0x%X, " + "%d compressed, %d uncompressed]\n", + id, soffset, desc, fl, csize, rsize); + size -= 8; + } + } else { + do_print(SECTIONS, "\t[Section %2d @ %-16lld\t\"%s\", flags 0x%X, %lld bytes]\n", + id, soffset, desc, fl, size); + } + + if (lseek64(fd, size, SEEK_CUR) == (off_t)-1) + break; + } while (1); + + if (lseek64(fd, offset, SEEK_SET) == (off_t)-1) + die("cannot restore the original file location"); + + return count; +} + +static void dump_v7_file(int fd) +{ + long long offset; + int sections; + + if (read_file_number(fd, &offset, 8)) + die("cannot read offset of the first option section"); + + get_meta_strings(fd); + sections = walk_v7_sections(fd); + + if (lseek64(fd, offset, SEEK_SET) == (off_t)-1) + die("cannot goto options offset %lld", offset); + + dump_options(fd); + dump_sections(fd, sections); +} + +static void free_sections(void) +{ + struct file_section *del; + + while (sections) { + del = sections; + sections = sections->next; + free(del); + } +} + +static void dump_file(const char *file) +{ + int fd; + + tep = tep_alloc(); + if (!tep) + return; + + fd = open(file, O_RDONLY); + if (fd < 0) + die("cannot open '%s'\n", file); + + do_print(SUMMARY, "\n Tracing meta data in file %s:\n", file); + + dump_initial_format(fd); + dump_compress(fd); + if (file_version < FILE_VERSION_SECTIONS) + dump_v6_file(fd); + else + dump_v7_file(fd); + free_sections(); + tep_free(tep); + tep = NULL; + close(fd); +} + +enum { + OPT_sections = 240, + OPT_strings = 241, + OPT_verbose = 242, + OPT_clock = 243, + OPT_all = 244, + OPT_summary = 245, + OPT_flyrecord = 246, + OPT_options = 247, + OPT_cmd_lines = 248, + OPT_printk = 249, + OPT_kallsyms = 250, + OPT_events = 251, + OPT_systems = 252, + OPT_ftrace = 253, + OPT_head_event = 254, + OPT_head_page = 255, +}; + +void trace_dump(int argc, char **argv) +{ + char *input_file = NULL; + bool validate = false; + int c; + + if (argc < 2) + usage(argv); + + if (strcmp(argv[1], "dump") != 0) + usage(argv); + for (;;) { + int option_index = 0; + static struct option long_options[] = { + {"all", no_argument, NULL, OPT_all}, + {"summary", no_argument, NULL, OPT_summary}, + {"head-page", no_argument, NULL, OPT_head_page}, + {"head-event", no_argument, NULL, OPT_head_event}, + {"ftrace-events", no_argument, NULL, OPT_ftrace}, + {"systems", no_argument, NULL, OPT_systems}, + {"events", no_argument, NULL, OPT_events}, + {"kallsyms", no_argument, NULL, OPT_kallsyms}, + {"printk", no_argument, NULL, OPT_printk}, + {"cmd-lines", no_argument, NULL, OPT_cmd_lines}, + {"options", no_argument, NULL, OPT_options}, + {"flyrecord", no_argument, NULL, OPT_flyrecord}, + {"clock", no_argument, NULL, OPT_clock}, + {"strings", no_argument, NULL, OPT_strings}, + {"sections", no_argument, NULL, OPT_sections}, + {"validate", no_argument, NULL, 'v'}, + {"help", no_argument, NULL, '?'}, + {"verbose", optional_argument, NULL, OPT_verbose}, + {NULL, 0, NULL, 0} + }; + + c = getopt_long (argc-1, argv+1, "+hvai:", + long_options, &option_index); + if (c == -1) + break; + switch (c) { + case 'h': + usage(argv); + break; + case 'i': + input_file = optarg; + break; + case 'v': + validate = true; + break; + case OPT_all: + verbosity = 0xFFFFFFFF; + break; + case OPT_summary: + verbosity |= SUMMARY; + break; + case OPT_flyrecord: + verbosity |= FLYRECORD; + break; + case OPT_options: + verbosity |= OPTIONS; + break; + case OPT_cmd_lines: + verbosity |= CMDLINES; + break; + case OPT_printk: + verbosity |= TRACE_PRINTK; + break; + case OPT_kallsyms: + verbosity |= KALLSYMS; + break; + case OPT_events: + verbosity |= EVENT_FORMAT; + break; + case OPT_systems: + verbosity |= EVENT_SYSTEMS; + break; + case OPT_ftrace: + verbosity |= FTRACE_FORMAT; + break; + case OPT_head_event: + verbosity |= HEAD_EVENT; + break; + case OPT_head_page: + verbosity |= HEAD_PAGE; + break; + case OPT_clock: + verbosity |= CLOCK; + break; + case OPT_verbose: + if (trace_set_verbose(optarg) < 0) + die("invalid verbose level %s", optarg); + break; + case OPT_strings: + verbosity |= STRINGS; + break; + case OPT_sections: + verbosity |= SECTIONS; + break; + default: + usage(argv); + } + } + + if ((argc - optind) >= 2) { + if (input_file) + usage(argv); + input_file = argv[optind + 1]; + } + + if (!input_file) + input_file = DEFAULT_INPUT_FILE; + + if (!verbosity && !validate) + verbosity = SUMMARY; + + dump_file(input_file); + + if (validate) + tracecmd_plog("File %s is a valid trace-cmd file\n", input_file); +} diff --git a/tracecmd/trace-hist.c b/tracecmd/trace-hist.c new file mode 100644 index 00000000..efb790ac --- /dev/null +++ b/tracecmd/trace-hist.c @@ -0,0 +1,1076 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2013 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * Several of the ideas in this file came from Arnaldo Carvalho de Melo's + * work on the perf ui. + */ +#define _LARGEFILE64_SOURCE +#include <dirent.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <signal.h> + +#include "trace-hash-local.h" +#include "trace-local.h" +#include "list.h" + +static int sched_wakeup_type; +static int sched_wakeup_new_type; +static int sched_switch_type; +static int function_type; +static int function_graph_entry_type; +static int function_graph_exit_type; +static int kernel_stack_type; + +static int long_size; + +static struct tep_format_field *common_type_hist; +static struct tep_format_field *common_pid_field; +static struct tep_format_field *sched_wakeup_comm_field; +static struct tep_format_field *sched_wakeup_new_comm_field; +static struct tep_format_field *sched_wakeup_pid_field; +static struct tep_format_field *sched_wakeup_new_pid_field; +static struct tep_format_field *sched_switch_prev_field; +static struct tep_format_field *sched_switch_next_field; +static struct tep_format_field *sched_switch_prev_pid_field; +static struct tep_format_field *sched_switch_next_pid_field; +static struct tep_format_field *function_ip_field; +static struct tep_format_field *function_parent_ip_field; +static struct tep_format_field *function_graph_entry_func_field; +static struct tep_format_field *function_graph_entry_depth_field; +static struct tep_format_field *function_graph_exit_func_field; +static struct tep_format_field *function_graph_exit_depth_field; +static struct tep_format_field *function_graph_exit_calltime_field; +static struct tep_format_field *function_graph_exit_rettime_field; +static struct tep_format_field *function_graph_exit_overrun_field; +static struct tep_format_field *kernel_stack_caller_field; + +static int compact; + +static void *zalloc(size_t size) +{ + return calloc(1, size); +} + +static const char **ips; +static int ips_idx; +static int func_depth; +static int current_pid = -1; + +struct stack_save { + struct stack_save *next; + const char **ips; + int ips_idx; + int func_depth; + int pid; +}; + +struct stack_save *saved_stacks; + +static void reset_stack(void) +{ + current_pid = -1; + ips_idx = 0; + func_depth = 0; + /* Don't free here, it may be saved */ + ips = NULL; +} + +static void save_stack(void) +{ + struct stack_save *stack; + + stack = zalloc(sizeof(*stack)); + if (!stack) + die("malloc"); + + stack->pid = current_pid; + stack->ips_idx = ips_idx; + stack->func_depth = func_depth; + stack->ips = ips; + + stack->next = saved_stacks; + saved_stacks = stack; + + reset_stack(); +} + +static void restore_stack(int pid) +{ + struct stack_save *last = NULL, *stack; + + for (stack = saved_stacks; stack; last = stack, stack = stack->next) { + if (stack->pid == pid) + break; + } + + if (!stack) + return; + + if (last) + last->next = stack->next; + else + saved_stacks = stack->next; + + current_pid = stack->pid; + ips_idx = stack->ips_idx; + func_depth = stack->func_depth; + free(ips); + ips = stack->ips; + free(stack); +} + +struct pid_list; + +struct chain { + struct chain *next; + struct chain *sibling; + const char *func; + struct chain *parents; + struct pid_list *pid_list; + int nr_parents; + int count; + int total; + int event; +}; +static struct chain *chains; +static int nr_chains; +static int total_counts; + +struct pid_list { + struct pid_list *next; + struct chain chain; + int pid; +}; +static struct pid_list *list_pids; +static struct pid_list all_pid_list; + +static void add_chain(struct chain *chain) +{ + if (chain->next) + die("chain not null?"); + chain->next = chains; + chains = chain; + nr_chains++; +} + +static void +insert_chain(struct pid_list *pid_list, struct chain *chain_list, + const char **chain_str, int size, int event) +{ + struct chain *chain; + + /* Record all counts */ + if (!chain_list->func) + total_counts++; + + chain_list->count++; + + if (!size--) + return; + + for (chain = chain_list->parents; chain; chain = chain->sibling) { + if (chain->func == chain_str[size]) { + insert_chain(pid_list, chain, chain_str, size, 0); + return; + } + } + + chain_list->nr_parents++; + chain = zalloc(sizeof(struct chain)); + if (!chain) + die("malloc"); + chain->sibling = chain_list->parents; + chain_list->parents = chain; + chain->func = chain_str[size]; + chain->pid_list = pid_list; + chain->event = event; + + /* NULL func means this is the top level of the chain. Store it */ + if (!chain_list->func) + add_chain(chain); + + insert_chain(pid_list, chain, chain_str, size, 0); +} + +static void save_call_chain(int pid, const char **chain, int size, int event) +{ + static struct pid_list *pid_list; + + if (compact) + pid_list = &all_pid_list; + + else if (!pid_list || pid_list->pid != pid) { + for (pid_list = list_pids; pid_list; pid_list = pid_list->next) { + if (pid_list->pid == pid) + break; + } + if (!pid_list) { + pid_list = zalloc(sizeof(*pid_list)); + if (!pid_list) + die("malloc"); + pid_list->pid = pid; + pid_list->next = list_pids; + list_pids = pid_list; + } + } + insert_chain(pid_list, &pid_list->chain, chain, size, event); +} + +static void save_stored_stacks(void) +{ + while (saved_stacks) { + restore_stack(saved_stacks->pid); + save_call_chain(current_pid, ips, ips_idx, 0); + } +} + +static void flush_stack(void) +{ + if (current_pid < 0) + return; + + save_call_chain(current_pid, ips, ips_idx, 0); + free(ips); + reset_stack(); +} + +static void push_stack_func(const char *func) +{ + ips_idx++; + ips = realloc(ips, ips_idx * sizeof(char *)); + ips[ips_idx - 1] = func; +} + +static void pop_stack_func(void) +{ + ips_idx--; + ips[ips_idx] = NULL; +} + +static void +process_function(struct tep_handle *pevent, struct tep_record *record) +{ + unsigned long long parent_ip; + unsigned long long ip; + unsigned long long val; + const char *parent; + const char *func; + int pid; + int ret; + + ret = tep_read_number_field(common_pid_field, record->data, &val); + if (ret < 0) + die("no pid field for function?"); + + ret = tep_read_number_field(function_ip_field, record->data, &ip); + if (ret < 0) + die("no ip field for function?"); + + ret = tep_read_number_field(function_parent_ip_field, record->data, &parent_ip); + if (ret < 0) + die("no parent ip field for function?"); + + pid = val; + + func = tep_find_function(pevent, ip); + parent = tep_find_function(pevent, parent_ip); + + if (current_pid >= 0 && pid != current_pid) { + save_stack(); + restore_stack(pid); + } + + current_pid = pid; + + if (ips_idx) { + if (ips[ips_idx - 1] == parent) + push_stack_func(func); + else { + save_call_chain(pid, ips, ips_idx, 0); + while (ips_idx) { + pop_stack_func(); + if (ips[ips_idx - 1] == parent) { + push_stack_func(func); + break; + } + } + } + } + + /* The above check can set ips_idx to zero again */ + if (!ips_idx) { + push_stack_func(parent); + push_stack_func(func); + } +} + +static void +process_function_graph_entry(struct tep_handle *pevent, struct tep_record *record) +{ + unsigned long long depth; + unsigned long long ip; + unsigned long long val; + const char *func; + int pid; + int ret; + + ret = tep_read_number_field(common_pid_field, record->data, &val); + if (ret < 0) + die("no pid field for function graph entry?"); + + ret = tep_read_number_field(function_graph_entry_func_field, + record->data, &ip); + if (ret < 0) + die("no ip field for function graph entry?"); + + ret = tep_read_number_field(function_graph_entry_depth_field, + record->data, &depth); + if (ret < 0) + die("no parent ip field for function entry?"); + + pid = val; + + func = tep_find_function(pevent, ip); + + if (current_pid >= 0 && pid != current_pid) { + save_stack(); + restore_stack(pid); + } + + current_pid = pid; + + if (depth != ips_idx) { + save_call_chain(pid, ips, ips_idx, 0); + while (ips_idx > depth) + pop_stack_func(); + } + + func_depth = depth; + + push_stack_func(func); +} + +static void +process_function_graph_exit(struct tep_handle *pevent, struct tep_record *record) +{ + unsigned long long depth; + unsigned long long val; + int pid; + int ret; + + ret = tep_read_number_field(common_pid_field, record->data, &val); + if (ret < 0) + die("no pid field for function graph exit?"); + + ret = tep_read_number_field(function_graph_exit_depth_field, + record->data, &depth); + if (ret < 0) + die("no parent ip field for function?"); + + pid = val; + + if (current_pid >= 0 && pid != current_pid) { + save_stack(); + restore_stack(pid); + } + + current_pid = pid; + + if (ips_idx != depth) { + save_call_chain(pid, ips, ips_idx, 0); + while (ips_idx > depth) + pop_stack_func(); + } + + func_depth = depth - 1; +} + +static int pending_pid = -1; +static const char **pending_ips; +static int pending_ips_idx; + +static void reset_pending_stack(void) +{ + pending_pid = -1; + pending_ips_idx = 0; + free(pending_ips); + pending_ips = NULL; +} + +static void copy_stack_to_pending(int pid) +{ + pending_pid = pid; + pending_ips = zalloc(sizeof(char *) * ips_idx); + memcpy(pending_ips, ips, sizeof(char *) * ips_idx); + pending_ips_idx = ips_idx; +} + +static void +process_kernel_stack(struct tep_handle *pevent, struct tep_record *record) +{ + struct tep_format_field *field = kernel_stack_caller_field; + unsigned long long val; + void *data = record->data; + int do_restore = 0; + int pid; + int ret; + + ret = tep_read_number_field(common_pid_field, record->data, &val); + if (ret < 0) + die("no pid field for function?"); + pid = val; + + if (pending_pid >= 0 && pid != pending_pid) { + reset_pending_stack(); + return; + } + + if (!field) + die("no caller field for kernel stack?"); + + if (pending_pid >= 0) { + if (current_pid >= 0) { + save_stack(); + do_restore = 1; + } + } else { + /* function stack trace? */ + if (current_pid >= 0) { + copy_stack_to_pending(current_pid); + free(ips); + reset_stack(); + } + } + + current_pid = pid; + + /* Need to start at the end of the callers and work up */ + for (data += field->offset; data < record->data + record->size; + data += long_size) { + unsigned long long addr; + + addr = tep_read_number(pevent, data, long_size); + + if ((long_size == 8 && addr == (unsigned long long)-1) || + ((int)addr == -1)) + break; + } + + for (data -= long_size; data >= record->data + field->offset; data -= long_size) { + unsigned long long addr; + const char *func; + + addr = tep_read_number(pevent, data, long_size); + func = tep_find_function(pevent, addr); + if (func) + push_stack_func(func); + } + + if (pending_pid >= 0) { + push_stack_func(pending_ips[pending_ips_idx - 1]); + reset_pending_stack(); + } + save_call_chain(current_pid, ips, ips_idx, 1); + if (do_restore) + restore_stack(current_pid); +} + +static void +process_sched_wakeup(struct tep_handle *pevent, struct tep_record *record, int type) +{ + unsigned long long val; + const char *comm; + int pid; + int ret; + + if (type == sched_wakeup_type) { + comm = (char *)(record->data + sched_wakeup_comm_field->offset); + ret = tep_read_number_field(sched_wakeup_pid_field, record->data, &val); + if (ret < 0) + die("no pid field in sched_wakeup?"); + } else { + comm = (char *)(record->data + sched_wakeup_new_comm_field->offset); + ret = tep_read_number_field(sched_wakeup_new_pid_field, record->data, &val); + if (ret < 0) + die("no pid field in sched_wakeup_new?"); + } + + pid = val; + + tep_register_comm(pevent, comm, pid); +} + +static void +process_sched_switch(struct tep_handle *pevent, struct tep_record *record) +{ + unsigned long long val; + const char *comm; + int pid; + int ret; + + comm = (char *)(record->data + sched_switch_prev_field->offset); + ret = tep_read_number_field(sched_switch_prev_pid_field, record->data, &val); + if (ret < 0) + die("no prev_pid field in sched_switch?"); + pid = val; + tep_register_comm(pevent, comm, pid); + + comm = (char *)(record->data + sched_switch_next_field->offset); + ret = tep_read_number_field(sched_switch_next_pid_field, record->data, &val); + if (ret < 0) + die("no next_pid field in sched_switch?"); + pid = val; + tep_register_comm(pevent, comm, pid); +} + +static void +process_event(struct tep_handle *pevent, struct tep_record *record, int type) +{ + struct tep_event *event; + const char *event_name; + unsigned long long val; + int pid; + int ret; + + if (pending_pid >= 0) { + save_call_chain(pending_pid, pending_ips, pending_ips_idx, 1); + reset_pending_stack(); + } + + event = tep_find_event(pevent, type); + event_name = event->name; + + ret = tep_read_number_field(common_pid_field, record->data, &val); + if (ret < 0) + die("no pid field for function?"); + + pid = val; + + /* + * Even if function or function graph tracer is running, + * if the user ran with stack traces on events, we want to use + * that instead. But unfortunately, that stack doesn't come + * until after the event. Thus, we only add the event into + * the pending stack. + */ + push_stack_func(event_name); + copy_stack_to_pending(pid); + pop_stack_func(); +} + +static void +process_record(struct tep_handle *pevent, struct tep_record *record) +{ + unsigned long long val; + int type; + + tep_read_number_field(common_type_hist, record->data, &val); + type = val; + + if (type == function_type) + return process_function(pevent, record); + + if (type == function_graph_entry_type) + return process_function_graph_entry(pevent, record); + + if (type == function_graph_exit_type) + return process_function_graph_exit(pevent, record); + + if (type == kernel_stack_type) + return process_kernel_stack(pevent, record); + + if (type == sched_wakeup_type || type == sched_wakeup_new_type) + process_sched_wakeup(pevent, record, type); + + else if (type == sched_switch_type) + process_sched_switch(pevent, record); + + process_event(pevent, record, type); +} + +static struct tep_event * +update_event(struct tep_handle *pevent, + const char *sys, const char *name, int *id) +{ + struct tep_event *event; + + event = tep_find_event_by_name(pevent, sys, name); + if (!event) + return NULL; + + *id = event->id; + + return event; +} + +static void update_sched_wakeup(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "sched", "sched_wakeup", &sched_wakeup_type); + if (!event) + return; + + sched_wakeup_comm_field = tep_find_field(event, "comm"); + sched_wakeup_pid_field = tep_find_field(event, "pid"); +} + +static void update_sched_wakeup_new(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "sched", "sched_wakeup_new", &sched_wakeup_new_type); + if (!event) + return; + + sched_wakeup_new_comm_field = tep_find_field(event, "comm"); + sched_wakeup_new_pid_field = tep_find_field(event, "pid"); +} + +static void update_sched_switch(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "sched", "sched_switch", &sched_switch_type); + if (!event) + return; + + sched_switch_prev_field = tep_find_field(event, "prev_comm"); + sched_switch_next_field = tep_find_field(event, "next_comm"); + sched_switch_prev_pid_field = tep_find_field(event, "prev_pid"); + sched_switch_next_pid_field = tep_find_field(event, "next_pid"); +} + +static void update_function(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "ftrace", "function", &function_type); + if (!event) + return; + + function_ip_field = tep_find_field(event, "ip"); + function_parent_ip_field = tep_find_field(event, "parent_ip"); +} + +static void update_function_graph_entry(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "ftrace", "funcgraph_entry", &function_graph_entry_type); + if (!event) + return; + + function_graph_entry_func_field = tep_find_field(event, "func"); + function_graph_entry_depth_field = tep_find_field(event, "depth"); +} + +static void update_function_graph_exit(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "ftrace", "funcgraph_exit", &function_graph_exit_type); + if (!event) + return; + + function_graph_exit_func_field = tep_find_field(event, "func"); + function_graph_exit_depth_field = tep_find_field(event, "depth"); + function_graph_exit_calltime_field = tep_find_field(event, "calltime"); + function_graph_exit_rettime_field = tep_find_field(event, "rettime"); + function_graph_exit_overrun_field = tep_find_field(event, "overrun"); +} + +static void update_kernel_stack(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "ftrace", "kernel_stack", &kernel_stack_type); + if (!event) + return; + + kernel_stack_caller_field = tep_find_field(event, "caller"); +} + +enum field { NEXT_PTR, SIB_PTR }; + +static struct chain *next_ptr(struct chain *chain, enum field field) +{ + if (field == NEXT_PTR) + return chain->next; + return chain->sibling; +} + +static struct chain *split_chain(struct chain *orig, int size, enum field field) +{ + struct chain *chain; + int i; + + if (size < 2) + return NULL; + + for (i = 1; i < (size + 1) / 2; i++, orig = next_ptr(orig, field)) + ; + + if (field == NEXT_PTR) { + chain = orig->next; + orig->next = NULL; + } else { + chain = orig->sibling; + orig->sibling = NULL; + } + + return chain; +} + +static struct chain * +merge_chains(struct chain *a, int nr_a, struct chain *b, int nr_b, enum field field) +{ + struct chain *chain; + struct chain *final; + struct chain **next = &final; + int i; + + if (!a) + return b; + if (!b) + return a; + + for (i = 0, chain = a; chain; i++, chain = next_ptr(chain, field)) + ; + if (i != nr_a) + die("WTF %d %d", i, nr_a); + + chain = split_chain(a, nr_a, field); + a = merge_chains(chain, nr_a / 2, a, (nr_a + 1) / 2, field); + + chain = split_chain(b, nr_b, field); + b = merge_chains(chain, nr_b / 2, b, (nr_b + 1) / 2, field); + + while (a && b) { + if (a->count > b->count) { + *next = a; + if (field == NEXT_PTR) + next = &a->next; + else + next = &a->sibling; + a = *next; + *next = NULL; + } else { + *next = b; + if (field == NEXT_PTR) + next = &b->next; + else + next = &b->sibling; + b = *next; + *next = NULL; + } + } + if (a) + *next = a; + else + *next = b; + + return final; +} + +static void sort_chain_parents(struct chain *chain) +{ + struct chain *parent; + + parent = split_chain(chain->parents, chain->nr_parents, SIB_PTR); + chain->parents = merge_chains(parent, chain->nr_parents / 2, + chain->parents, (chain->nr_parents + 1) / 2, + SIB_PTR); + + for (chain = chain->parents; chain; chain = chain->sibling) + sort_chain_parents(chain); +} + +static void sort_chains(void) +{ + struct chain *chain; + + chain = split_chain(chains, nr_chains, NEXT_PTR); + + /* The original always has more or equal to the split */ + chains = merge_chains(chain, nr_chains / 2, chains, (nr_chains + 1) / 2, NEXT_PTR); + + for (chain = chains; chain; chain = chain->next) + sort_chain_parents(chain); +} + +static double get_percent(int total, int partial) +{ + return ((double)partial / (double)total) * 100.0; +} + +static int single_chain(struct chain *chain) +{ + if (chain->nr_parents > 1) + return 0; + + if (!chain->parents) + return 1; + + return single_chain(chain->parents); +} + +#define START " |\n" +#define TICK " --- " +#define BLANK " " +#define LINE " |" +#define INDENT " " + +unsigned long long line_mask; +void make_indent(int indent) +{ + int i; + + for (i = 0; i < indent; i++) { + if (line_mask & (1 << i)) + printf(LINE); + else + printf(INDENT); + } +} + +static void +print_single_parent(struct chain *chain, int indent) +{ + make_indent(indent); + + printf(BLANK); + printf("%s\n", chain->parents->func); +} + +static void +dump_chain(struct tep_handle *pevent, struct chain *chain, int indent) +{ + if (!chain->parents) + return; + + print_single_parent(chain, indent); + dump_chain(pevent, chain->parents, indent); +} + +static void print_parents(struct tep_handle *pevent, struct chain *chain, int indent) +{ + struct chain *parent = chain->parents; + int x; + + if (single_chain(chain)) { + dump_chain(pevent, chain, indent); + return; + } + + line_mask |= 1ULL << (indent); + + for (x = 0; parent; x++, parent = parent->sibling) { + struct chain *save_parent; + + make_indent(indent + 1); + printf("\n"); + + make_indent(indent + 1); + + printf("--%%%.2f-- %s # %d\n", + get_percent(chain->count, parent->count), + parent->func, parent->count); + + if (x == chain->nr_parents - 1) + line_mask &= (1ULL << indent) - 1; + + if (single_chain(parent)) + dump_chain(pevent, parent, indent + 1); + else { + save_parent = parent; + + while (parent && parent->parents && parent->nr_parents < 2 && + parent->parents->count == parent->count) { + print_single_parent(parent, indent + 1); + parent = parent->parents; + } + if (parent) + print_parents(pevent, parent, indent + 1); + parent = save_parent; + } + } +} + +static void print_chains(struct tep_handle *pevent) +{ + struct chain *chain = chains; + int pid; + + for (; chain; chain = chain->next) { + pid = chain->pid_list->pid; + if (chain != chains) + printf("\n"); + if (compact) + printf(" %%%3.2f <all pids> %30s #%d\n", + get_percent(total_counts, chain->count), + chain->func, + chain->count); + else + printf(" %%%3.2f (%d) %s %30s #%d\n", + get_percent(total_counts, chain->count), + pid, + tep_data_comm_from_pid(pevent, pid), + chain->func, + chain->count); + printf(START); + if (chain->event) + printf(TICK "*%s*\n", chain->func); + else + printf(TICK "%s\n", chain->func); + print_parents(pevent, chain, 0); + } +} + +static void do_trace_hist(struct tracecmd_input *handle) +{ + struct tep_handle *pevent = tracecmd_get_tep(handle); + struct tep_record *record; + struct tep_event *event; + int cpus; + int cpu; + int ret; + + cpus = tracecmd_cpus(handle); + + /* Need to get any event */ + for (cpu = 0; cpu < cpus; cpu++) { + record = tracecmd_peek_data(handle, cpu); + if (record) + break; + } + if (!record) + die("No records found in file"); + + ret = tep_data_type(pevent, record); + event = tep_find_event(pevent, ret); + + long_size = tracecmd_long_size(handle); + + common_type_hist = tep_find_common_field(event, "common_type"); + if (!common_type_hist) + die("Can't find a 'type' field?"); + + common_pid_field = tep_find_common_field(event, "common_pid"); + if (!common_pid_field) + die("Can't find a 'pid' field?"); + + update_sched_wakeup(pevent); + update_sched_wakeup_new(pevent); + update_sched_switch(pevent); + update_function(pevent); + update_function_graph_entry(pevent); + update_function_graph_exit(pevent); + update_kernel_stack(pevent); + + for (cpu = 0; cpu < cpus; cpu++) { + for (;;) { + struct tep_record *record; + + record = tracecmd_read_data(handle, cpu); + if (!record) + break; + + /* If we missed events, just flush out the current stack */ + if (record->missed_events) + flush_stack(); + + process_record(pevent, record); + tracecmd_free_record(record); + } + } + + if (current_pid >= 0) + save_call_chain(current_pid, ips, ips_idx, 0); + if (pending_pid >= 0) + save_call_chain(pending_pid, pending_ips, pending_ips_idx, 1); + + save_stored_stacks(); + + sort_chains(); + print_chains(pevent); +} + +void trace_hist(int argc, char **argv) +{ + struct tracecmd_input *handle; + const char *input_file = NULL; + int instances; + int ret; + + for (;;) { + int c; + + c = getopt(argc-1, argv+1, "+hi:P"); + if (c == -1) + break; + switch (c) { + case 'h': + usage(argv); + break; + case 'i': + if (input_file) + die("Only one input for historgram"); + input_file = optarg; + break; + case 'P': + compact = 1; + break; + default: + usage(argv); + } + } + + if ((argc - optind) >= 2) { + if (input_file) + usage(argv); + input_file = argv[optind + 1]; + } + + if (!input_file) + input_file = DEFAULT_INPUT_FILE; + + handle = tracecmd_alloc(input_file, 0); + if (!handle) + die("can't open %s\n", input_file); + + ret = tracecmd_read_headers(handle, 0); + if (ret) + return; + + ret = tracecmd_init_data(handle); + if (ret < 0) + die("failed to init data"); + + if (ret > 0) + die("trace-cmd hist does not work with latency traces\n"); + + instances = tracecmd_buffer_instances(handle); + if (instances) { + struct tracecmd_input *new_handle; + int i; + + for (i = 0; i < instances; i++) { + new_handle = tracecmd_buffer_instance_handle(handle, i); + if (!new_handle) { + warning("could not retrieve handle %d", i); + continue; + } + do_trace_hist(new_handle); + tracecmd_close(new_handle); + } + } else { + do_trace_hist(handle); + } + + tracecmd_close(handle); +} diff --git a/tracecmd/trace-list.c b/tracecmd/trace-list.c new file mode 100644 index 00000000..fbf2882e --- /dev/null +++ b/tracecmd/trace-list.c @@ -0,0 +1,760 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ + +#include <stdlib.h> +#include <sys/stat.h> + +#include "tracefs.h" +#include "trace-local.h" + + +static void dump_file_content(const char *path) +{ + char buf[BUFSIZ]; + ssize_t n; + FILE *fp; + + fp = fopen(path, "r"); + if (!fp) + die("reading %s", path); + + do { + n = fread(buf, 1, BUFSIZ, fp); + if (n > 0) + fwrite(buf, 1, n, stdout); + } while (n > 0); + fclose(fp); +} + + + +void show_instance_file(struct buffer_instance *instance, const char *name) +{ + char *path; + + path = tracefs_instance_get_file(instance->tracefs, name); + dump_file_content(path); + tracefs_put_tracing_file(path); +} + +enum { + SHOW_EVENT_FORMAT = 1 << 0, + SHOW_EVENT_FILTER = 1 << 1, + SHOW_EVENT_TRIGGER = 1 << 2, + SHOW_EVENT_FULL = 1 << 3, +}; + + +void show_file(const char *name) +{ + char *path; + + path = tracefs_get_tracing_file(name); + dump_file_content(path); + tracefs_put_tracing_file(path); +} + +typedef int (*process_file_func)(char *buf, int len, int flags); + +static void process_file_re(process_file_func func, + const char *name, const char *re, int flags) +{ + regex_t reg; + char *path; + char *buf = NULL; + char *str; + FILE *fp; + ssize_t n; + size_t l = strlen(re); + + /* Just in case :-p */ + if (!re || l == 0) { + show_file(name); + return; + } + + /* Handle the newline at end of names for the user */ + str = malloc(l + 3); + if (!str) + die("Failed to allocate reg ex %s", re); + strcpy(str, re); + if (re[l-1] == '$') + strcpy(&str[l-1], "\n*$"); + + if (regcomp(®, str, REG_ICASE|REG_NOSUB)) + die("invalid function regex '%s'", re); + + free(str); + + path = tracefs_get_tracing_file(name); + fp = fopen(path, "r"); + if (!fp) + die("reading %s", path); + tracefs_put_tracing_file(path); + + do { + n = getline(&buf, &l, fp); + if (n > 0 && regexec(®, buf, 0, NULL, 0) == 0) + func(buf, n, flags); + } while (n > 0); + free(buf); + fclose(fp); + + regfree(®); +} + +static void show_event(process_file_func func, const char *system, + const char *event, int flags) +{ + char *buf; + int ret; + + ret = asprintf(&buf, "%s:%s", system, event); + if (ret < 0) + die("Can not allocate event"); + func(buf, strlen(buf), flags); + free(buf); +} + +static void show_system(process_file_func func, const char *system, int flags) +{ + char **events; + int e; + + events = tracefs_system_events(NULL, system); + if (!events) /* die? */ + return; + + for (e = 0; events[e]; e++) + show_event(func, system, events[e], flags); +} + +static void show_event_systems(process_file_func func, char **systems, int flags) +{ + int s; + + for (s = 0; systems[s]; s++) + show_system(func, systems[s], flags); +} + +static void match_system_events(process_file_func func, const char *system, + regex_t *reg, int flags) +{ + char **events; + int e; + + events = tracefs_system_events(NULL, system); + if (!events) /* die? */ + return; + for (e = 0; events[e]; e++) { + if (regexec(reg, events[e], 0, NULL, 0) == 0) + show_event(func, system, events[e], flags); + } + tracefs_list_free(events); +} + +static void process_events(process_file_func func, const char *re, int flags) +{ + const char *ftrace = "ftrace"; + regex_t system_reg; + regex_t event_reg; + char *str; + size_t l = strlen(re); + bool just_systems = true; + char **systems; + char *system; + char *event; + int s; + + systems = tracefs_event_systems(NULL); + if (!systems) + return process_file_re(func, "available_events", re, flags); + + if (!re || l == 0) { + show_event_systems(func, systems, flags); + return; + } + + str = strdup(re); + if (!str) + die("Can not allocate momory for regex"); + + system = strtok(str, ":"); + event = strtok(NULL, ""); + + if (regcomp(&system_reg, system, REG_ICASE|REG_NOSUB)) + die("invalid regex '%s'", system); + + if (event) { + if (regcomp(&event_reg, event, REG_ICASE|REG_NOSUB)) + die("invalid regex '%s'", event); + } else { + /* + * If the regex ends with ":", then event would be null, + * but we do not want to match events. + */ + if (re[l-1] != ':') + just_systems = false; + } + free(str); + + /* + * See if this matches the special ftrace system, as ftrace is not included + * in the systems list, but can get events from tracefs_system_events(). + */ + if (regexec(&system_reg, ftrace, 0, NULL, 0) == 0) { + if (!event) + show_system(func, ftrace, flags); + else + match_system_events(func, ftrace, &event_reg, flags); + } else if (!just_systems) { + match_system_events(func, ftrace, &system_reg, flags); + } + + for (s = 0; systems[s]; s++) { + + if (regexec(&system_reg, systems[s], 0, NULL, 0) == 0) { + if (!event) { + show_system(func, systems[s], flags); + continue; + } + match_system_events(func, systems[s], &event_reg, flags); + continue; + } + if (just_systems) + continue; + + match_system_events(func, systems[s], &system_reg, flags); + } + tracefs_list_free(systems); + + regfree(&system_reg); + if (event) + regfree(&event_reg); +} + +static int show_file_write(char *buf, int len, int flags) +{ + return fwrite(buf, 1, len, stdout); +} + +static void show_file_re(const char *name, const char *re) +{ + process_file_re(show_file_write, name, re, 0); +} + +static char *get_event_file(const char *type, char *buf, int len) +{ + char *system; + char *event; + char *path; + char *file; + int ret; + + if (buf[len-1] == '\n') + buf[len-1] = '\0'; + + system = strtok(buf, ":"); + if (!system) + die("no system found in %s", buf); + + event = strtok(NULL, ":"); + if (!event) + die("no event found in %s\n", buf); + + path = tracefs_get_tracing_file("events"); + ret = asprintf(&file, "%s/%s/%s/%s", path, system, event, type); + if (ret < 0) + die("Failed to allocate event file %s %s", system, event); + + tracefs_put_tracing_file(path); + + return file; +} + +static int event_filter_write(char *buf, int len, int flags) +{ + char *file; + + if (buf[len-1] == '\n') + buf[len-1] = '\0'; + + printf("%s\n", buf); + + file = get_event_file("filter", buf, len); + dump_file_content(file); + free(file); + printf("\n"); + + return 0; +} + +static int event_trigger_write(char *buf, int len, int flags) +{ + char *file; + + if (buf[len-1] == '\n') + buf[len-1] = '\0'; + + printf("%s\n", buf); + + file = get_event_file("trigger", buf, len); + dump_file_content(file); + free(file); + printf("\n"); + + return 0; +} + +static int event_format_write(char *fbuf, int len, int flags) +{ + char *file = get_event_file("format", fbuf, len); + char *buf = NULL; + size_t l; + FILE *fp; + bool full; + int n; + + full = flags & SHOW_EVENT_FULL; + + /* The get_event_file() crops system in fbuf */ + printf("system: %s\n", fbuf); + + /* Don't print the print fmt, it's ugly */ + + fp = fopen(file, "r"); + if (!fp) + die("reading %s", file); + + do { + n = getline(&buf, &l, fp); + if (n > 0) { + if (!full && strncmp(buf, "print fmt", 9) == 0) + break; + fwrite(buf, 1, n, stdout); + } + } while (n > 0); + fclose(fp); + free(buf); + free(file); + + return 0; +} + +static int event_name(char *buf, int len, int flags) +{ + printf("%s\n", buf); + + return 0; +} + +static void show_event_filter_re(const char *re) +{ + process_events(event_filter_write, re, 0); +} + + +static void show_event_trigger_re(const char *re) +{ + process_events(event_trigger_write, re, 0); +} + + +static void show_event_format_re(const char *re, int flags) +{ + process_events(event_format_write, re, flags); +} + +static void show_event_names_re(const char *re) +{ + process_events(event_name, re, 0); +} + +static void show_events(const char *eventre, int flags) +{ + if (flags && !eventre) + die("When specifying event files, an event must be named"); + + if (eventre) { + if (flags & SHOW_EVENT_FORMAT) + show_event_format_re(eventre, flags); + + else if (flags & SHOW_EVENT_FILTER) + show_event_filter_re(eventre); + + else if (flags & SHOW_EVENT_TRIGGER) + show_event_trigger_re(eventre); + else + show_event_names_re(eventre); + } else + show_file("available_events"); +} + + +static void show_tracers(void) +{ + show_file("available_tracers"); +} + +void show_options(const char *prefix, struct buffer_instance *buffer) +{ + struct tracefs_instance *instance = buffer ? buffer->tracefs : NULL; + struct dirent *dent; + struct stat st; + char *path; + DIR *dir; + + if (!prefix) + prefix = ""; + + path = tracefs_instance_get_file(instance, "options"); + if (!path) + goto show_file; + if (stat(path, &st) < 0) + goto show_file; + + if ((st.st_mode & S_IFMT) != S_IFDIR) + goto show_file; + + dir = opendir(path); + if (!dir) + die("Can not read instance directory"); + + while ((dent = readdir(dir))) { + const char *name = dent->d_name; + long long val; + char *file; + int ret; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0) + continue; + + ret = asprintf(&file, "options/%s", name); + if (ret < 0) + die("Failed to allocate file name"); + ret = tracefs_instance_file_read_number(instance, file, &val); + if (!ret) { + if (val) + printf("%s%s\n", prefix, name); + else + printf("%sno%s\n", prefix, name); + } + free(file); + } + closedir(dir); + tracefs_put_tracing_file(path); + return; + + show_file: + tracefs_put_tracing_file(path); + show_file("trace_options"); +} + +static void show_clocks(void) +{ + char *clocks; + int size; + + clocks = tracefs_instance_file_read(NULL, "trace_clock", &size); + if (!clocks) + die("getting clocks"); + if (clocks[size - 1] == '\n') + clocks[size - 1] = 0; + + if (trace_tsc2nsec_is_supported()) + printf("%s %s\n", clocks, TSCNSEC_CLOCK); + else + printf("%s\n", clocks); + + free(clocks); +} + + +static void show_functions(const char *funcre) +{ + if (funcre) + show_file_re("available_filter_functions", funcre); + else + show_file("available_filter_functions"); +} + + +static void show_buffers(void) +{ + struct dirent *dent; + DIR *dir; + char *path; + int printed = 0; + + path = tracefs_get_tracing_file("instances"); + dir = opendir(path); + tracefs_put_tracing_file(path); + if (!dir) + die("Can not read instance directory"); + + while ((dent = readdir(dir))) { + const char *name = dent->d_name; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0) + continue; + + printf("%s\n", name); + printed = 1; + } + closedir(dir); + + if (!printed) + printf("No buffer instances defined\n"); +} + + +static void show_systems(void) +{ + struct dirent *dent; + char *path; + DIR *dir; + + path = tracefs_get_tracing_file("events"); + dir = opendir(path); + + if (!dir) + die("Can not read events directory"); + + while ((dent = readdir(dir))) { + const char *name = dent->d_name; + struct stat st; + char *spath; + int ret; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0) + continue; + + if (asprintf(&spath, "%s/%s", path, name) < 0) + continue; + ret = stat(spath, &st); + if (!ret && S_ISDIR(st.st_mode)) + printf("%s\n", name); + + free(spath); + } + + printf("\n"); + closedir(dir); + tracefs_put_tracing_file(path); +} + +static void show_plugin_options(void) +{ + struct tep_handle *pevent; + struct tep_plugin_list *list; + struct trace_seq s; + + tracecmd_ftrace_load_options(); + + pevent = tep_alloc(); + if (!pevent) + die("Can not allocate pevent\n"); + + trace_seq_init(&s); + + list = trace_load_plugins(pevent, 0); + tep_plugin_print_options(&s); + trace_seq_do_printf(&s); + tep_unload_plugins(list, pevent); + tep_free(pevent); +} + + +void trace_option(int argc, char **argv) +{ + show_plugin_options(); +} + + +static void show_plugins(void) +{ + struct tep_handle *pevent; + struct tep_plugin_list *list; + struct trace_seq s; + + pevent = tep_alloc(); + if (!pevent) + die("Can not allocate pevent\n"); + + trace_seq_init(&s); + + list = trace_load_plugins(pevent, 0); + tep_print_plugins(&s, " ", "\n", list); + + trace_seq_do_printf(&s); + tep_unload_plugins(list, pevent); + tep_free(pevent); +} + +static void show_compression(void) +{ + char **versions, **names; + int c, i; + + c = tracecmd_compress_protos_get(&names, &versions); + if (c <= 0) { + printf("No compression algorithms are supported\n"); + return; + } + printf("Supported compression algorithms:\n"); + for (i = 0; i < c; i++) + printf("\t%s, %s\n", names[i], versions[i]); + + free(names); + free(versions); +} + +void trace_list(int argc, char **argv) +{ + int events = 0; + int tracer = 0; + int options = 0; + int funcs = 0; + int buffers = 0; + int clocks = 0; + int plug = 0; + int plug_op = 0; + int flags = 0; + int systems = 0; + int show_all = 1; + int compression = 0; + int i; + const char *arg; + const char *funcre = NULL; + const char *eventre = NULL; + + for (i = 2; i < argc; i++) { + arg = NULL; + if (argv[i][0] == '-') { + if (i < argc - 1) { + if (argv[i+1][0] != '-') + arg = argv[i+1]; + } + switch (argv[i][1]) { + case 'h': + usage(argv); + break; + case 'e': + events = 1; + eventre = arg; + show_all = 0; + break; + case 'B': + buffers = 1; + show_all = 0; + break; + case 'C': + clocks = 1; + show_all = 0; + break; + case 'F': + flags |= SHOW_EVENT_FORMAT; + break; + case 'R': + flags |= SHOW_EVENT_TRIGGER; + break; + case 'l': + flags |= SHOW_EVENT_FILTER; + break; + case 'p': + case 't': + tracer = 1; + show_all = 0; + break; + case 'P': + plug = 1; + show_all = 0; + break; + case 'O': + plug_op = 1; + show_all = 0; + break; + case 'o': + options = 1; + show_all = 0; + break; + case 'f': + funcs = 1; + funcre = arg; + show_all = 0; + break; + case 's': + systems = 1; + show_all = 0; + break; + case 'c': + compression = 1; + show_all = 0; + break; + case '-': + if (strcmp(argv[i], "--debug") == 0) { + tracecmd_set_debug(true); + break; + } + if (strcmp(argv[i], "--full") == 0) { + flags |= SHOW_EVENT_FULL; + break; + } + fprintf(stderr, "list: invalid option -- '%s'\n", + argv[i]); + default: + fprintf(stderr, "list: invalid option -- '%c'\n", + argv[i][1]); + usage(argv); + } + } + } + + if (events) + show_events(eventre, flags); + + if (tracer) + show_tracers(); + + if (options) + show_options(NULL, NULL); + + if (plug) + show_plugins(); + + if (plug_op) + show_plugin_options(); + + if (funcs) + show_functions(funcre); + + if (buffers) + show_buffers(); + + if (clocks) + show_clocks(); + if (systems) + show_systems(); + if (compression) + show_compression(); + if (show_all) { + printf("event systems:\n"); + show_systems(); + printf("events:\n"); + show_events(NULL, 0); + printf("\ntracers:\n"); + show_tracers(); + printf("\noptions:\n"); + show_options(NULL, NULL); + show_compression(); + } + + return; + +} diff --git a/tracecmd/trace-listen.c b/tracecmd/trace-listen.c new file mode 100644 index 00000000..86d2b9e9 --- /dev/null +++ b/tracecmd/trace-listen.c @@ -0,0 +1,1201 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#define _LARGEFILE64_SOURCE +#include <dirent.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <netdb.h> +#include <unistd.h> +#include <fcntl.h> +#include <signal.h> +#include <errno.h> + +#ifdef VSOCK +#include <linux/vm_sockets.h> +#endif + +#include "trace-local.h" +#include "trace-msg.h" + +#define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__) + +#define MAX_OPTION_SIZE 4096 + +#define _VAR_DIR_Q(dir) #dir +#define VAR_DIR_Q(dir) _VAR_DIR_Q(dir) + +#define VAR_RUN_DIR VAR_DIR_Q(VAR_DIR) "/run" + +static char *default_output_dir = "."; +static char *output_dir; +static char *default_output_file = "trace"; +static char *output_file; + +static bool use_vsock; + +static int backlog = 5; + +static int do_daemon; + +/* Used for signaling INT to finish */ +static struct tracecmd_msg_handle *stop_msg_handle; +static bool done; + +#define pdie(fmt, ...) \ + do { \ + tracecmd_plog_error(fmt, ##__VA_ARGS__);\ + remove_pid_file(); \ + exit(-1); \ + } while (0) + +#define TEMP_FILE_STR "%s.%s:%s.cpu%d", output_file, host, port, cpu +static char *get_temp_file(const char *host, const char *port, int cpu) +{ + char *file = NULL; + int size; + + size = snprintf(file, 0, TEMP_FILE_STR); + file = malloc(size + 1); + if (!file) + return NULL; + sprintf(file, TEMP_FILE_STR); + + return file; +} + +static void put_temp_file(char *file) +{ + free(file); +} + +static void signal_setup(int sig, sighandler_t handle) +{ + struct sigaction action; + + sigaction(sig, NULL, &action); + /* Make accept return EINTR */ + action.sa_flags &= ~SA_RESTART; + action.sa_handler = handle; + sigaction(sig, &action, NULL); +} + +static void delete_temp_file(const char *host, const char *port, int cpu) +{ + char file[PATH_MAX]; + + snprintf(file, PATH_MAX, TEMP_FILE_STR); + unlink(file); +} + +static int read_string(int fd, char *buf, size_t size) +{ + size_t i; + int n; + + for (i = 0; i < size; i++) { + n = read(fd, buf+i, 1); + if (!buf[i] || n <= 0) + break; + } + + return i; +} + +static int process_option(struct tracecmd_msg_handle *msg_handle, char *option) +{ + /* currently the only option we have is to us TCP */ + if (strcmp(option, "TCP") == 0) { + msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP; + return 1; + } + return 0; +} + +static void finish(int sig) +{ + if (stop_msg_handle) + tracecmd_msg_set_done(stop_msg_handle); + done = true; +} + +static void make_pid_name(int mode, char *buf) +{ + snprintf(buf, PATH_MAX, VAR_RUN_DIR "/trace-cmd-net.pid"); +} + +static void remove_pid_file(void) +{ + char buf[PATH_MAX]; + int mode = do_daemon; + + if (!do_daemon) + return; + + make_pid_name(mode, buf); + + unlink(buf); +} + +static int process_child(int sfd, const char *host, const char *port, + int cpu, int page_size, enum port_type type) +{ + struct sockaddr_storage peer_addr; +#ifdef VSOCK + struct sockaddr_vm vm_addr; +#endif + struct sockaddr *addr; + socklen_t addr_len; + char buf[page_size]; + char *tempfile; + int left; + int cfd; + int fd; + int r, w; + int once = 0; + + signal_setup(SIGUSR1, finish); + + tempfile = get_temp_file(host, port, cpu); + if (!tempfile) + return -ENOMEM; + + fd = open(tempfile, O_WRONLY | O_TRUNC | O_CREAT, 0644); + if (fd < 0) + pdie("creating %s", tempfile); + + if (type == USE_TCP) { + addr = (struct sockaddr *)&peer_addr; + addr_len = sizeof(peer_addr); +#ifdef VSOCK + } else if (type == USE_VSOCK) { + addr = (struct sockaddr *)&vm_addr; + addr_len = sizeof(vm_addr); +#endif + } + + if (type == USE_TCP || type == USE_VSOCK) { + if (listen(sfd, backlog) < 0) + pdie("listen"); + + cfd = accept(sfd, addr, &addr_len); + if (cfd < 0 && errno == EINTR) + goto done; + if (cfd < 0) + pdie("accept"); + close(sfd); + sfd = cfd; + } + + for (;;) { + /* TODO, make this copyless! */ + r = read(sfd, buf, page_size); + if (r < 0) { + if (errno == EINTR) + break; + pdie("reading pages from client"); + } + if (!r) + break; + /* UDP requires that we get the full size in one go */ + if (type == USE_UDP && r < page_size && !once) { + once = 1; + warning("read %d bytes, expected %d", r, page_size); + } + + left = r; + do { + w = write(fd, buf + (r - left), left); + if (w > 0) + left -= w; + } while (w >= 0 && left); + } + + done: + put_temp_file(tempfile); + exit(0); +} + +static int setup_vsock_port(int start_port, int *sfd) +{ + int sd; + + sd = trace_vsock_make(start_port); + if (sd < 0) + return -errno; + *sfd = sd; + + return start_port; +} + +int trace_net_make(int port, enum port_type type) +{ + struct addrinfo hints; + struct addrinfo *result, *rp; + char buf[BUFSIZ]; + int sd; + int s; + + snprintf(buf, BUFSIZ, "%d", port); + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_flags = AI_PASSIVE; + + switch (type) { + case USE_TCP: + hints.ai_socktype = SOCK_STREAM; + break; + case USE_UDP: + hints.ai_socktype = SOCK_DGRAM; + break; + default: + return -1; + } + + s = getaddrinfo(NULL, buf, &hints, &result); + if (s != 0) + pdie("getaddrinfo: error opening socket"); + + for (rp = result; rp != NULL; rp = rp->ai_next) { + sd = socket(rp->ai_family, rp->ai_socktype, + rp->ai_protocol); + if (sd < 0) + continue; + + if (bind(sd, rp->ai_addr, rp->ai_addrlen) == 0) + break; + + close(sd); + } + freeaddrinfo(result); + + if (rp == NULL) + return -1; + + dprint("Create listen port: %d fd:%d\n", port, sd); + + return sd; +} + +int trace_net_search(int start_port, int *sfd, enum port_type type) +{ + int num_port = start_port; + + if (type == USE_VSOCK) + return setup_vsock_port(start_port, sfd); + again: + *sfd = trace_net_make(num_port, type); + if (*sfd < 0) { + if (++num_port > MAX_PORT_SEARCH) + pdie("No available ports to bind"); + goto again; + } + + return num_port; +} + +static void fork_reader(int sfd, const char *node, const char *port, + int *pid, int cpu, int pagesize, enum port_type type) +{ + int ret; + + *pid = fork(); + + if (*pid < 0) + pdie("creating reader"); + + if (!*pid) { + ret = process_child(sfd, node, port, cpu, pagesize, type); + if (ret < 0) + pdie("Problem with reader %d", ret); + } + + close(sfd); +} + +static int open_port(const char *node, const char *port, int *pid, + int cpu, int pagesize, int start_port, enum port_type type) +{ + int sfd; + int num_port; + + /* + * trace_net_search() currently does not return an error, but if that + * changes in the future, we have a check for it now. + */ + num_port = trace_net_search(start_port, &sfd, type); + if (num_port < 0) + return num_port; + + fork_reader(sfd, node, port, pid, cpu, pagesize, type); + + return num_port; +} + +static int communicate_with_client(struct tracecmd_msg_handle *msg_handle) +{ + char *last_proto = NULL; + char buf[BUFSIZ]; + char *option; + int pagesize = 0; + int options; + int size; + int cpus; + int n, s, t, i; + int ret = -EINVAL; + int fd = msg_handle->fd; + + /* Let the client know what we are */ + write(fd, "tracecmd", 8); + + try_again: + /* read back the CPU count */ + n = read_string(fd, buf, BUFSIZ); + if (n == BUFSIZ) + /** ERROR **/ + return -EINVAL; + + cpus = atoi(buf); + + /* Is the client using the new protocol? */ + if (cpus == -1) { + if (memcmp(buf, V3_CPU, n) != 0) { + /* If it did not send a version, then bail */ + if (memcmp(buf, "-1V", 3)) { + tracecmd_plog("Unknown string %s\n", buf); + goto out; + } + /* Skip "-1" */ + tracecmd_plog("Cannot handle the protocol %s\n", buf+2); + + /* If it returned the same command as last time, bail! */ + if (last_proto && strncmp(last_proto, buf, n) == 0) { + tracecmd_plog("Repeat of version %s sent\n", last_proto); + goto out; + } + free(last_proto); + last_proto = malloc(n + 1); + if (last_proto) { + memcpy(last_proto, buf, n); + last_proto[n] = 0; + } + /* Return the highest protocol we can use */ + write(fd, "V3", 3); + goto try_again; + } + + /* Let the client know we use v3 protocol */ + write(fd, "V3", 3); + + /* read the rest of dummy data */ + n = read(fd, buf, sizeof(V3_MAGIC)); + if (memcmp(buf, V3_MAGIC, n) != 0) + goto out; + + /* We're off! */ + write(fd, "OK", 2); + + msg_handle->version = V3_PROTOCOL; + + /* read the CPU count, the page size, and options */ + if ((pagesize = tracecmd_msg_initial_setting(msg_handle)) < 0) + goto out; + } else { + /* The client is using the v1 protocol */ + + tracecmd_plog("cpus=%d\n", cpus); + if (cpus < 0) + goto out; + + msg_handle->cpu_count = cpus; + + /* next read the page size */ + n = read_string(fd, buf, BUFSIZ); + if (n == BUFSIZ) + /** ERROR **/ + goto out; + + pagesize = atoi(buf); + + tracecmd_plog("pagesize=%d\n", pagesize); + if (pagesize <= 0) + goto out; + + /* Now the number of options */ + n = read_string(fd, buf, BUFSIZ); + if (n == BUFSIZ) + /** ERROR **/ + return -EINVAL; + + options = atoi(buf); + + for (i = 0; i < options; i++) { + /* next is the size of the options */ + n = read_string(fd, buf, BUFSIZ); + if (n == BUFSIZ) + /** ERROR **/ + goto out; + size = atoi(buf); + /* prevent a client from killing us */ + if (size > MAX_OPTION_SIZE) + goto out; + + ret = -ENOMEM; + option = malloc(size); + if (!option) + goto out; + + ret = -EIO; + do { + t = size; + s = 0; + s = read(fd, option+s, t); + if (s <= 0) + goto out; + t -= s; + s = size - t; + } while (t); + + s = process_option(msg_handle, option); + free(option); + /* do we understand this option? */ + ret = -EINVAL; + if (!s) + goto out; + } + } + + if (msg_handle->flags & TRACECMD_MSG_FL_USE_TCP) + tracecmd_plog("Using TCP for live connection\n"); + + ret = pagesize; + out: + free(last_proto); + + return ret; +} + +static int create_client_file(const char *node, const char *port) +{ + char buf[BUFSIZ]; + int ofd; + + snprintf(buf, BUFSIZ, "%s.%s:%s.dat", output_file, node, port); + + ofd = open(buf, O_RDWR | O_CREAT | O_TRUNC, 0644); + if (ofd < 0) + pdie("Can not create file %s", buf); + return ofd; +} + +static void destroy_all_readers(int cpus, int *pid_array, const char *node, + const char *port) +{ + int cpu; + + for (cpu = 0; cpu < cpus; cpu++) { + if (pid_array[cpu] > 0) { + kill(pid_array[cpu], SIGKILL); + waitpid(pid_array[cpu], NULL, 0); + delete_temp_file(node, port, cpu); + pid_array[cpu] = 0; + } + } + + free(pid_array); +} + +static int *create_all_readers(const char *node, const char *port, + int pagesize, struct tracecmd_msg_handle *msg_handle) +{ + enum port_type port_type = USE_UDP; + char buf[BUFSIZ]; + unsigned int *port_array; + int *pid_array; + unsigned int start_port; + unsigned int connect_port; + int cpus = msg_handle->cpu_count; + int cpu; + int pid; + + if (!pagesize) + return NULL; + + if (msg_handle->flags & TRACECMD_MSG_FL_USE_TCP) + port_type = USE_TCP; + else if (msg_handle->flags & TRACECMD_MSG_FL_USE_VSOCK) + port_type = USE_VSOCK; + + port_array = malloc(sizeof(*port_array) * cpus); + if (!port_array) + return NULL; + + pid_array = malloc(sizeof(*pid_array) * cpus); + if (!pid_array) { + free(port_array); + return NULL; + } + + memset(pid_array, 0, sizeof(int) * cpus); + + start_port = START_PORT_SEARCH; + + /* Now create a port for each CPU */ + for (cpu = 0; cpu < cpus; cpu++) { + connect_port = open_port(node, port, &pid, cpu, + pagesize, start_port, port_type); + if (connect_port < 0) + goto out_free; + port_array[cpu] = connect_port; + pid_array[cpu] = pid; + /* + * Due to some bugging finding ports, + * force search after last port + */ + start_port = connect_port + 1; + } + + if (msg_handle->version == V3_PROTOCOL) { + /* send set of port numbers to the client */ + if (tracecmd_msg_send_port_array(msg_handle, port_array) < 0) { + tracecmd_plog("Failed sending port array\n"); + goto out_free; + } + } else { + /* send the client a comma deliminated set of port numbers */ + for (cpu = 0; cpu < cpus; cpu++) { + snprintf(buf, BUFSIZ, "%s%d", + cpu ? "," : "", port_array[cpu]); + write(msg_handle->fd, buf, strlen(buf)); + } + /* end with null terminator */ + write(msg_handle->fd, "\0", 1); + } + + free(port_array); + return pid_array; + + out_free: + free(port_array); + destroy_all_readers(cpus, pid_array, node, port); + return NULL; +} + +static int +collect_metadata_from_client(struct tracecmd_msg_handle *msg_handle, + int ofd) +{ + char buf[BUFSIZ]; + int n, s, t; + int ifd = msg_handle->fd; + int ret = 0; + + do { + n = read(ifd, buf, BUFSIZ); + if (n < 0) { + if (errno == EINTR) + continue; + ret = -errno; + break; + } + t = n; + s = 0; + do { + s = write(ofd, buf+s, t); + if (s < 0) { + if (errno == EINTR) + break; + ret = -errno; + goto out; + } + t -= s; + s = n - t; + } while (t); + } while (n > 0 && !tracecmd_msg_done(msg_handle)); + +out: + return ret; +} + +static void stop_all_readers(int cpus, int *pid_array) +{ + int cpu; + + for (cpu = 0; cpu < cpus; cpu++) { + if (pid_array[cpu] > 0) + kill(pid_array[cpu], SIGUSR1); + } +} + +static int put_together_file(int cpus, int ofd, const char *node, + const char *port, bool write_options) +{ + struct tracecmd_output *handle = NULL; + char **temp_files; + int cpu; + int ret = -ENOMEM; + + /* Now put together the file */ + temp_files = malloc(sizeof(*temp_files) * cpus); + if (!temp_files) + return -ENOMEM; + + for (cpu = 0; cpu < cpus; cpu++) { + temp_files[cpu] = get_temp_file(node, port, cpu); + if (!temp_files[cpu]) + goto out; + } + + handle = tracecmd_get_output_handle_fd(ofd); + if (!handle) { + ret = -1; + goto out; + } + + if (write_options) { + ret = tracecmd_write_cpus(handle, cpus); + if (ret) + goto out; + ret = tracecmd_write_buffer_info(handle); + if (ret) + goto out; + ret = tracecmd_write_options(handle); + if (ret) + goto out; + } + ret = tracecmd_write_cpu_data(handle, cpus, temp_files, NULL); + +out: + tracecmd_output_close(handle); + for (cpu--; cpu >= 0; cpu--) { + put_temp_file(temp_files[cpu]); + } + free(temp_files); + return ret; +} + +static int process_client(struct tracecmd_msg_handle *msg_handle, + const char *node, const char *port) +{ + int *pid_array; + int pagesize; + int cpus; + int ofd; + int ret; + + pagesize = communicate_with_client(msg_handle); + if (pagesize < 0) + return pagesize; + + ofd = create_client_file(node, port); + + pid_array = create_all_readers(node, port, pagesize, msg_handle); + if (!pid_array) + return -ENOMEM; + + /* on signal stop this msg */ + stop_msg_handle = msg_handle; + + /* Now we are ready to start reading data from the client */ + if (msg_handle->version == V3_PROTOCOL) + ret = tracecmd_msg_collect_data(msg_handle, ofd); + else + ret = collect_metadata_from_client(msg_handle, ofd); + stop_msg_handle = NULL; + + /* wait a little to let our readers finish reading */ + sleep(1); + + cpus = msg_handle->cpu_count; + + /* stop our readers */ + stop_all_readers(cpus, pid_array); + + /* wait a little to have the readers clean up */ + sleep(1); + + if (!ret) + ret = put_together_file(cpus, ofd, node, port, + msg_handle->version < V3_PROTOCOL); + + destroy_all_readers(cpus, pid_array, node, port); + + return ret; +} + +static int do_fork(int cfd) +{ + pid_t pid; + + /* in debug mode, we do not fork off children */ + if (tracecmd_get_debug()) + return 0; + + pid = fork(); + if (pid < 0) { + warning("failed to create child"); + return -1; + } + + if (pid > 0) { + close(cfd); + return pid; + } + + signal_setup(SIGINT, finish); + + return 0; +} + +bool trace_net_cmp_connection(struct sockaddr_storage *addr, const char *name) +{ + char host[NI_MAXHOST], nhost[NI_MAXHOST]; + char service[NI_MAXSERV]; + socklen_t addr_len = sizeof(*addr); + struct addrinfo *result, *rp; + struct addrinfo hints; + bool found = false; + int s; + + if (getnameinfo((struct sockaddr *)addr, addr_len, + host, NI_MAXHOST, + service, NI_MAXSERV, NI_NUMERICSERV)) + return -1; + + if (strcmp(host, name) == 0) + return true; + + /* Check other IPs that name could be for */ + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + /* Check other IPs that name could be for */ + s = getaddrinfo(name, NULL, &hints, &result); + if (s != 0) + return false; + + for (rp = result; rp != NULL; rp = rp->ai_next) { + if (getnameinfo(rp->ai_addr, rp->ai_addrlen, + nhost, NI_MAXHOST, + service, NI_MAXSERV, NI_NUMERICSERV)) + continue; + if (strcmp(host, nhost) == 0) { + found = 1; + break; + } + } + + freeaddrinfo(result); + return found; +} + +bool trace_net_cmp_connection_fd(int fd, const char *name) +{ + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + + if (getpeername(fd, (struct sockaddr *)&addr, &addr_len)) + return false; + + return trace_net_cmp_connection(&addr, name); +}; + +int trace_net_print_connection(int fd) +{ + char host[NI_MAXHOST], service[NI_MAXSERV]; + struct sockaddr_storage net_addr; + socklen_t addr_len; + + addr_len = sizeof(net_addr); + if (getpeername(fd, (struct sockaddr *)&net_addr, &addr_len)) + return -1; + + if (getnameinfo((struct sockaddr *)&net_addr, addr_len, + host, NI_MAXHOST, + service, NI_MAXSERV, NI_NUMERICSERV)) + return -1; + + if (tracecmd_get_debug()) + tracecmd_debug("Connected to %s:%s fd:%d\n", host, service, fd); + else + tracecmd_plog("Connected to %s:%s\n", host, service); + return 0; +} + +static int do_connection(int cfd, struct sockaddr *addr, + socklen_t addr_len) +{ + struct tracecmd_msg_handle *msg_handle; + char host[NI_MAXHOST], service[NI_MAXSERV]; + int s; + int ret; + + ret = do_fork(cfd); + if (ret) + return ret; + + msg_handle = tracecmd_msg_handle_alloc(cfd, 0); + + if (use_vsock) { +#ifdef VSOCK + struct sockaddr_vm *vm_addr = (struct sockaddr_vm *)addr; + snprintf(host, NI_MAXHOST, "V%d", vm_addr->svm_cid); + snprintf(service, NI_MAXSERV, "%d", vm_addr->svm_port); +#endif + } else { + s = getnameinfo((struct sockaddr *)addr, addr_len, + host, NI_MAXHOST, + service, NI_MAXSERV, NI_NUMERICSERV); + + if (s == 0) + tracecmd_plog("Connected with %s:%s\n", host, service); + else { + tracecmd_plog("Error with getnameinfo: %s\n", gai_strerror(s)); + close(cfd); + tracecmd_msg_handle_close(msg_handle); + return -1; + } + } + + process_client(msg_handle, host, service); + + tracecmd_msg_handle_close(msg_handle); + + if (!tracecmd_get_debug()) + exit(0); + + return 0; +} + +static int *client_pids; +static int free_pids; +static int saved_pids; + +static void add_process(int pid) +{ + int *client = NULL; + int i; + + if (free_pids) { + for (i = 0; i < saved_pids; i++) { + if (!client_pids[i]) { + client = &client_pids[i]; + break; + } + } + free_pids--; + if (!client) + warning("Could not find free pid"); + } + if (!client) { + client_pids = realloc(client_pids, + sizeof(*client_pids) * (saved_pids + 1)); + if (!client_pids) + pdie("allocating pids"); + client = &client_pids[saved_pids++]; + } + *client = pid; +} + +static void remove_process(int pid) +{ + int i; + + for (i = 0; i < saved_pids; i++) { + if (client_pids[i] == pid) + break; + } + + if (i == saved_pids) + return; + + client_pids[i] = 0; + free_pids++; +} + +static void kill_clients(void) +{ + int status; + int i; + + for (i = 0; i < saved_pids; i++) { + if (!client_pids[i]) + continue; + /* Only kill the clients if we received SIGINT or SIGTERM */ + if (done) + kill(client_pids[i], SIGINT); + waitpid(client_pids[i], &status, 0); + } + + saved_pids = 0; +} + +static void clean_up(void) +{ + int status; + int ret; + + /* Clean up any children that has started before */ + do { + ret = waitpid(0, &status, WNOHANG); + if (ret > 0) + remove_process(ret); + } while (ret > 0); +} + +static void do_accept_loop(int sfd) +{ + struct sockaddr_storage peer_addr; +#ifdef VSOCK + struct sockaddr_vm vm_addr; +#endif + struct sockaddr *addr; + socklen_t addr_len; + int cfd, pid; + + if (use_vsock) { +#ifdef VSOCK + addr = (struct sockaddr *)&vm_addr; + addr_len = sizeof(vm_addr); +#endif + } else { + addr = (struct sockaddr *)&peer_addr; + addr_len = sizeof(peer_addr); + } + + do { + cfd = accept(sfd, addr, &addr_len); + if (cfd < 0 && errno == EINTR) { + clean_up(); + continue; + } + if (cfd < 0) + pdie("connecting"); + + pid = do_connection(cfd, addr, addr_len); + if (pid > 0) + add_process(pid); + + } while (!done); + /* Get any final stragglers */ + clean_up(); +} + +static void make_pid_file(void) +{ + char buf[PATH_MAX]; + int mode = do_daemon; + int fd; + + if (!do_daemon) + return; + + make_pid_name(mode, buf); + + fd = open(buf, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) { + perror(buf); + return; + } + + sprintf(buf, "%d\n", getpid()); + write(fd, buf, strlen(buf)); + close(fd); +} + +static void sigstub(int sig) +{ +} + +static int get_vsock(const char *port) +{ + unsigned int cid; + int sd; + + sd = trace_vsock_make(atoi(port)); + if (sd < 0) + return sd; + + cid = trace_vsock_local_cid(); + if (cid >= 0) + printf("listening on @%u:%s\n", cid, port); + + return sd; +} + +static int get_network(char *port) +{ + struct addrinfo hints; + struct addrinfo *result, *rp; + int sfd, s; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_PASSIVE; + + s = getaddrinfo(NULL, port, &hints, &result); + if (s != 0) + pdie("getaddrinfo: error opening %s", port); + + for (rp = result; rp != NULL; rp = rp->ai_next) { + sfd = socket(rp->ai_family, rp->ai_socktype, + rp->ai_protocol); + if (sfd < 0) + continue; + + if (bind(sfd, rp->ai_addr, rp->ai_addrlen) == 0) + break; + + close(sfd); + } + + if (rp == NULL) + pdie("Could not bind"); + + freeaddrinfo(result); + + return sfd; +} + +static void do_listen(char *port) +{ + int sfd; + + if (!tracecmd_get_debug()) + signal_setup(SIGCHLD, sigstub); + + make_pid_file(); + + if (use_vsock) + sfd = get_vsock(port); + else + sfd = get_network(port); + + + if (listen(sfd, backlog) < 0) + pdie("listen"); + + do_accept_loop(sfd); + + kill_clients(); + + remove_pid_file(); +} + +static void start_daemon(void) +{ + do_daemon = 1; + + if (daemon(1, 0) < 0) + die("starting daemon"); +} + +enum { + OPT_verbose = 254, + OPT_debug = 255, +}; + +void trace_listen(int argc, char **argv) +{ + char *logfile = NULL; + char *port = NULL; + int daemon = 0; + int c; + + if (argc < 2) + usage(argv); + + if (strcmp(argv[1], "listen") != 0) + usage(argv); + + for (;;) { + int option_index = 0; + static struct option long_options[] = { + {"port", required_argument, NULL, 'p'}, + {"help", no_argument, NULL, '?'}, + {"debug", no_argument, NULL, OPT_debug}, + {"verbose", optional_argument, NULL, OPT_verbose}, + {NULL, 0, NULL, 0} + }; + + c = getopt_long (argc-1, argv+1, "+hp:Vo:d:l:D", + long_options, &option_index); + if (c == -1) + break; + switch (c) { + case 'h': + usage(argv); + break; + case 'p': + port = optarg; + break; + case 'd': + output_dir = optarg; + break; + case 'V': + use_vsock = true; + break; + case 'o': + output_file = optarg; + break; + case 'l': + logfile = optarg; + break; + case 'D': + daemon = 1; + break; + case OPT_debug: + tracecmd_set_debug(true); + break; + case OPT_verbose: + if (trace_set_verbose(optarg) < 0) + die("invalid verbose level %s", optarg); + break; + default: + usage(argv); + } + } + + if (!port) + usage(argv); + + if ((argc - optind) >= 2) + usage(argv); + + if (!output_file) + output_file = default_output_file; + + if (!output_dir) + output_dir = default_output_dir; + + if (logfile) { + /* set the writes to a logfile instead */ + if (tracecmd_set_logfile(logfile) < 0) + die("creating log file %s", logfile); + } + + if (chdir(output_dir) < 0) + die("Can't access directory %s", output_dir); + + if (daemon) + start_daemon(); + + signal_setup(SIGINT, finish); + signal_setup(SIGTERM, finish); + + do_listen(port); + + return; +} diff --git a/tracecmd/trace-mem.c b/tracecmd/trace-mem.c new file mode 100644 index 00000000..25eb0861 --- /dev/null +++ b/tracecmd/trace-mem.c @@ -0,0 +1,564 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2013 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * + * This code was inspired by Ezequiel Garcia's trace_analyze program: + * git://github.com/ezequielgarcia/trace_analyze.git + * + * Unfortuntately, I hate working with Python, and I also had trouble + * getting it to work, as I had an old python on my Fedora 13, and it + * was written for the newer version. I decided to do some of it here + * in C. + */ +#define _LARGEFILE64_SOURCE +#include <dirent.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <signal.h> + +#include "trace-local.h" +#include "trace-hash-local.h" +#include "list.h" + +static int kmalloc_type; +static int kmalloc_node_type; +static int kfree_type; +static int kmem_cache_alloc_type; +static int kmem_cache_alloc_node_type; +static int kmem_cache_free_type; + +static struct tep_format_field *common_type_mem; + +static struct tep_format_field *kmalloc_callsite_field; +static struct tep_format_field *kmalloc_bytes_req_field; +static struct tep_format_field *kmalloc_bytes_alloc_field; +static struct tep_format_field *kmalloc_ptr_field; + +static struct tep_format_field *kmalloc_node_callsite_field; +static struct tep_format_field *kmalloc_node_bytes_req_field; +static struct tep_format_field *kmalloc_node_bytes_alloc_field; +static struct tep_format_field *kmalloc_node_ptr_field; + +static struct tep_format_field *kfree_ptr_field; + +static struct tep_format_field *kmem_cache_callsite_field; +static struct tep_format_field *kmem_cache_bytes_req_field; +static struct tep_format_field *kmem_cache_bytes_alloc_field; +static struct tep_format_field *kmem_cache_ptr_field; + +static struct tep_format_field *kmem_cache_node_callsite_field; +static struct tep_format_field *kmem_cache_node_bytes_req_field; +static struct tep_format_field *kmem_cache_node_bytes_alloc_field; +static struct tep_format_field *kmem_cache_node_ptr_field; + +static struct tep_format_field *kmem_cache_free_ptr_field; + +static void *zalloc(size_t size) +{ + return calloc(1, size); +} + +static struct tep_event * +update_event(struct tep_handle *pevent, + const char *sys, const char *name, int *id) +{ + struct tep_event *event; + + event = tep_find_event_by_name(pevent, sys, name); + if (!event) + return NULL; + + *id = event->id; + + return event; +} + +static void update_kmalloc(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "kmem", "kmalloc", &kmalloc_type); + if (!event) + return; + + kmalloc_callsite_field = tep_find_field(event, "call_site"); + kmalloc_bytes_req_field = tep_find_field(event, "bytes_req"); + kmalloc_bytes_alloc_field = tep_find_field(event, "bytes_alloc"); + kmalloc_ptr_field = tep_find_field(event, "ptr"); +} + +static void update_kmalloc_node(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "kmem", "kmalloc_node", &kmalloc_node_type); + if (!event) + return; + + kmalloc_node_callsite_field = tep_find_field(event, "call_site"); + kmalloc_node_bytes_req_field = tep_find_field(event, "bytes_req"); + kmalloc_node_bytes_alloc_field = tep_find_field(event, "bytes_alloc"); + kmalloc_node_ptr_field = tep_find_field(event, "ptr"); +} + +static void update_kfree(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "kmem", "kfree", &kfree_type); + if (!event) + return; + + kfree_ptr_field = tep_find_field(event, "ptr"); +} + +static void update_kmem_cache_alloc(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "kmem", "kmem_cache_alloc", &kmem_cache_alloc_type); + if (!event) + return; + + kmem_cache_callsite_field = tep_find_field(event, "call_site"); + kmem_cache_bytes_req_field = tep_find_field(event, "bytes_req"); + kmem_cache_bytes_alloc_field = tep_find_field(event, "bytes_alloc"); + kmem_cache_ptr_field = tep_find_field(event, "ptr"); +} + +static void update_kmem_cache_alloc_node(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "kmem", "kmem_cache_alloc_node", + &kmem_cache_alloc_node_type); + if (!event) + return; + + kmem_cache_node_callsite_field = tep_find_field(event, "call_site"); + kmem_cache_node_bytes_req_field = tep_find_field(event, "bytes_req"); + kmem_cache_node_bytes_alloc_field = tep_find_field(event, "bytes_alloc"); + kmem_cache_node_ptr_field = tep_find_field(event, "ptr"); +} + +static void update_kmem_cache_free(struct tep_handle *pevent) +{ + struct tep_event *event; + + event = update_event(pevent, "kmem", "kmem_cache_free", &kmem_cache_free_type); + if (!event) + return; + + kmem_cache_free_ptr_field = tep_find_field(event, "ptr"); +} + +struct func_descr { + struct func_descr *next; + const char *func; + unsigned long total_alloc; + unsigned long total_req; + unsigned long current_alloc; + unsigned long current_req; + unsigned long max_alloc; + unsigned long max_req; + unsigned long waste; + unsigned long max_waste; +}; + +struct ptr_descr { + struct ptr_descr *next; + struct func_descr *func; + unsigned long long ptr; + unsigned long alloc; + unsigned long req; +}; + +#define HASH_BITS 12 +#define HASH_SIZE (1 << HASH_BITS) +#define HASH_MASK (HASH_SIZE - 1); + +static struct func_descr *func_hash[HASH_SIZE]; +static struct ptr_descr *ptr_hash[HASH_SIZE]; +static struct func_descr **func_list; + +static unsigned func_count; + +static int make_key(const void *ptr, int size) +{ + int key = 0; + int i; + char *kp = (char *)&key; + const char *indx = ptr; + + for (i = 0; i < size; i++) + kp[i & 3] ^= indx[i]; + + return trace_hash(key); +} + +static struct func_descr *find_func(const char *func) +{ + struct func_descr *funcd; + int key = make_key(func, strlen(func)) & HASH_MASK; + + for (funcd = func_hash[key]; funcd; funcd = funcd->next) { + /* + * As func is always a constant to one pointer, + * we can use a direct compare instead of strcmp. + */ + if (funcd->func == func) + return funcd; + } + + return NULL; +} + +static struct func_descr *create_func(const char *func) +{ + struct func_descr *funcd; + int key = make_key(func, strlen(func)) & HASH_MASK; + + funcd = zalloc(sizeof(*funcd)); + if (!funcd) + die("malloc"); + + funcd->func = func; + funcd->next = func_hash[key]; + func_hash[key] = funcd; + + func_count++; + + return funcd; +} + +static struct ptr_descr *find_ptr(unsigned long long ptr) +{ + struct ptr_descr *ptrd; + int key = make_key(&ptr, sizeof(ptr)) & HASH_MASK; + + for (ptrd = ptr_hash[key]; ptrd; ptrd = ptrd->next) { + if (ptrd->ptr == ptr) + return ptrd; + } + + return NULL; +} + +static struct ptr_descr *create_ptr(unsigned long long ptr) +{ + struct ptr_descr *ptrd; + int key = make_key(&ptr, sizeof(ptr)) & HASH_MASK; + + ptrd = zalloc(sizeof(*ptrd)); + if (!ptrd) + die("malloc"); + + ptrd->ptr = ptr; + ptrd->next = ptr_hash[key]; + ptr_hash[key] = ptrd; + + return ptrd; +} + +static void remove_ptr(unsigned long long ptr) +{ + struct ptr_descr *ptrd, **last; + int key = make_key(&ptr, sizeof(ptr)) & HASH_MASK; + + last = &ptr_hash[key]; + for (ptrd = ptr_hash[key]; ptrd; ptrd = ptrd->next) { + if (ptrd->ptr == ptr) + break; + last = &ptrd->next; + } + + if (!ptrd) + return; + + *last = ptrd->next; + free(ptrd); +} + +static void add_kmalloc(const char *func, unsigned long long ptr, + unsigned int req, int alloc) +{ + struct func_descr *funcd; + struct ptr_descr *ptrd; + + funcd = find_func(func); + if (!funcd) + funcd = create_func(func); + + funcd->total_alloc += alloc; + funcd->total_req += req; + funcd->current_alloc += alloc; + funcd->current_req += req; + if (funcd->current_alloc > funcd->max_alloc) + funcd->max_alloc = funcd->current_alloc; + if (funcd->current_req > funcd->max_req) + funcd->max_req = funcd->current_req; + + ptrd = find_ptr(ptr); + if (!ptrd) + ptrd = create_ptr(ptr); + + ptrd->alloc = alloc; + ptrd->req = req; + ptrd->func = funcd; +} + +static void remove_kmalloc(unsigned long long ptr) +{ + struct func_descr *funcd; + struct ptr_descr *ptrd; + + ptrd = find_ptr(ptr); + if (!ptrd) + return; + + funcd = ptrd->func; + funcd->current_alloc -= ptrd->alloc; + funcd->current_req -= ptrd->req; + + remove_ptr(ptr); +} + +static void +process_kmalloc(struct tep_handle *pevent, struct tep_record *record, + struct tep_format_field *callsite_field, + struct tep_format_field *bytes_req_field, + struct tep_format_field *bytes_alloc_field, + struct tep_format_field *ptr_field) +{ + unsigned long long callsite; + unsigned long long val; + unsigned long long ptr; + unsigned int req; + int alloc; + const char *func; + + tep_read_number_field(callsite_field, record->data, &callsite); + tep_read_number_field(bytes_req_field, record->data, &val); + req = val; + tep_read_number_field(bytes_alloc_field, record->data, &val); + alloc = val; + tep_read_number_field(ptr_field, record->data, &ptr); + + func = tep_find_function(pevent, callsite); + + add_kmalloc(func, ptr, req, alloc); +} + +static void +process_kfree(struct tep_handle *pevent, struct tep_record *record, + struct tep_format_field *ptr_field) +{ + unsigned long long ptr; + + tep_read_number_field(ptr_field, record->data, &ptr); + + remove_kmalloc(ptr); +} + +static void +process_record(struct tep_handle *pevent, struct tep_record *record) +{ + unsigned long long val; + int type; + + tep_read_number_field(common_type_mem, record->data, &val); + type = val; + + if (type == kmalloc_type) + return process_kmalloc(pevent, record, + kmalloc_callsite_field, + kmalloc_bytes_req_field, + kmalloc_bytes_alloc_field, + kmalloc_ptr_field); + if (type == kmalloc_node_type) + return process_kmalloc(pevent, record, + kmalloc_node_callsite_field, + kmalloc_node_bytes_req_field, + kmalloc_node_bytes_alloc_field, + kmalloc_node_ptr_field); + if (type == kfree_type) + return process_kfree(pevent, record, kfree_ptr_field); + + if (type == kmem_cache_alloc_type) + return process_kmalloc(pevent, record, + kmem_cache_callsite_field, + kmem_cache_bytes_req_field, + kmem_cache_bytes_alloc_field, + kmem_cache_ptr_field); + if (type == kmem_cache_alloc_node_type) + return process_kmalloc(pevent, record, + kmem_cache_node_callsite_field, + kmem_cache_node_bytes_req_field, + kmem_cache_node_bytes_alloc_field, + kmem_cache_node_ptr_field); + if (type == kmem_cache_free_type) + return process_kfree(pevent, record, kmem_cache_free_ptr_field); +} + +static int func_cmp(const void *a, const void *b) +{ + const struct func_descr *fa = *(const struct func_descr **)a; + const struct func_descr *fb = *(const struct func_descr **)b; + + if (fa->waste > fb->waste) + return -1; + if (fa->waste < fb->waste) + return 1; + return 0; +} + +static void sort_list(void) +{ + struct func_descr *funcd; + int h; + int i = 0; + + func_list = zalloc(sizeof(*func_list) * func_count); + + for (h = 0; h < HASH_SIZE; h++) { + for (funcd = func_hash[h]; funcd; funcd = funcd->next) { + funcd->waste = funcd->current_alloc - funcd->current_req; + funcd->max_waste = funcd->max_alloc - funcd->max_req; + if (i == func_count) + die("more funcs than expected\n"); + func_list[i++] = funcd; + } + } + + qsort(func_list, func_count, sizeof(*func_list), func_cmp); +} + +static void print_list(void) +{ + struct func_descr *funcd; + int i; + + printf(" Function \t"); + printf("Waste\tAlloc\treq\t\tTotAlloc TotReq\t\tMaxAlloc MaxReq\t"); + printf("MaxWaste\n"); + printf(" -------- \t"); + printf("-----\t-----\t---\t\t-------- ------\t\t-------- ------\t"); + printf("--------\n"); + + for (i = 0; i < func_count; i++) { + funcd = func_list[i]; + + printf("%32s\t%ld\t%ld\t%ld\t\t%8ld %8ld\t\t%8ld %8ld\t%ld\n", + funcd->func, funcd->waste, + funcd->current_alloc, funcd->current_req, + funcd->total_alloc, funcd->total_req, + funcd->max_alloc, funcd->max_req, funcd->max_waste); + } +} + +static void do_trace_mem(struct tracecmd_input *handle) +{ + struct tep_handle *pevent = tracecmd_get_tep(handle); + struct tep_record *record; + struct tep_event *event; + int missed_events = 0; + int cpus; + int cpu; + int ret; + + ret = tracecmd_init_data(handle); + if (ret < 0) + die("failed to init data"); + + if (ret > 0) + die("trace-cmd mem does not work with latency traces\n"); + + cpus = tracecmd_cpus(handle); + + /* Need to get any event */ + for (cpu = 0; cpu < cpus; cpu++) { + record = tracecmd_peek_data(handle, cpu); + if (record) + break; + } + if (!record) + die("No records found in file"); + + ret = tep_data_type(pevent, record); + event = tep_find_event(pevent, ret); + + common_type_mem = tep_find_common_field(event, "common_type"); + if (!common_type_mem) + die("Can't find a 'type' field?"); + + update_kmalloc(pevent); + update_kmalloc_node(pevent); + update_kfree(pevent); + update_kmem_cache_alloc(pevent); + update_kmem_cache_alloc_node(pevent); + update_kmem_cache_free(pevent); + + while ((record = tracecmd_read_next_data(handle, &cpu))) { + + /* record missed event */ + if (!missed_events && record->missed_events) + missed_events = 1; + + process_record(pevent, record); + tracecmd_free_record(record); + } + + sort_list(); + print_list(); +} + +void trace_mem(int argc, char **argv) +{ + struct tracecmd_input *handle; + const char *input_file = NULL; + int ret; + + for (;;) { + int c; + + c = getopt(argc-1, argv+1, "+hi:"); + if (c == -1) + break; + switch (c) { + case 'h': + usage(argv); + break; + case 'i': + if (input_file) + die("Only one input for mem"); + input_file = optarg; + break; + default: + usage(argv); + } + } + + if ((argc - optind) >= 2) { + if (input_file) + usage(argv); + input_file = argv[optind + 1]; + } + + if (!input_file) + input_file = DEFAULT_INPUT_FILE; + + handle = tracecmd_alloc(input_file, 0); + if (!handle) + die("can't open %s\n", input_file); + + ret = tracecmd_read_headers(handle, 0); + if (ret) + return; + + do_trace_mem(handle); + + tracecmd_close(handle); +} diff --git a/tracecmd/trace-profile.c b/tracecmd/trace-profile.c new file mode 100644 index 00000000..6a2cc3d0 --- /dev/null +++ b/tracecmd/trace-profile.c @@ -0,0 +1,2455 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ + +/** FIXME: Convert numbers based on machine and file */ +#define _LARGEFILE64_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifndef NO_AUDIT +#include <libaudit.h> +#endif +#include "trace-local.h" +#include "trace-hash.h" +#include "trace-hash-local.h" +#include "list.h" + +#include <linux/time64.h> + +#ifdef WARN_NO_AUDIT +# warning "lib audit not found, using raw syscalls " \ + "(install audit-libs-devel(for fedora) or libaudit-dev(for debian/ubuntu) and try again)" +#endif + +#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP" +#define TASK_STATE_MAX 1024 + +#define task_from_item(item) container_of(item, struct task_data, hash) +#define start_from_item(item) container_of(item, struct start_data, hash) +#define event_from_item(item) container_of(item, struct event_hash, hash) +#define stack_from_item(item) container_of(item, struct stack_data, hash) +#define group_from_item(item) container_of(item, struct group_data, hash) +#define event_data_from_item(item) container_of(item, struct event_data, hash) + +static unsigned long long nsecs_per_sec(unsigned long long ts) +{ + return ts / NSEC_PER_SEC; +} + +static unsigned long long mod_to_usec(unsigned long long ts) +{ + return ((ts % NSEC_PER_SEC) + NSEC_PER_USEC / 2) / NSEC_PER_USEC; +} + +struct handle_data; +struct event_hash; +struct event_data; + +typedef void (*event_data_print)(struct trace_seq *s, struct event_hash *hash); +typedef int (*handle_event_func)(struct handle_data *h, unsigned long long pid, + struct event_data *data, + struct tep_record *record, int cpu); + +enum event_data_type { + EVENT_TYPE_UNDEFINED, + EVENT_TYPE_STACK, + EVENT_TYPE_SCHED_SWITCH, + EVENT_TYPE_WAKEUP, + EVENT_TYPE_FUNC, + EVENT_TYPE_SYSCALL, + EVENT_TYPE_IRQ, + EVENT_TYPE_SOFTIRQ, + EVENT_TYPE_SOFTIRQ_RAISE, + EVENT_TYPE_PROCESS_EXEC, + EVENT_TYPE_USER_MATE, +}; + +struct event_data { + struct trace_hash_item hash; + int id; + int trace; + struct tep_event *event; + + struct event_data *end; + struct event_data *start; + + struct tep_format_field *pid_field; + struct tep_format_field *start_match_field; /* match with start */ + struct tep_format_field *end_match_field; /* match with end */ + struct tep_format_field *data_field; /* optional */ + + event_data_print print_func; + handle_event_func handle_event; + void *private; + int migrate; /* start/end pairs can migrate cpus */ + int global; /* use global tasks */ + enum event_data_type type; +}; + +struct stack_data { + struct trace_hash_item hash; + unsigned long long count; + unsigned long long time; + unsigned long long time_min; + unsigned long long ts_min; + unsigned long long time_max; + unsigned long long ts_max; + unsigned long long time_avg; + unsigned long size; + char caller[]; +}; + +struct stack_holder { + unsigned long size; + void *caller; + struct tep_record *record; +}; + +struct start_data { + struct trace_hash_item hash; + struct event_data *event_data; + struct list_head list; + struct task_data *task; + unsigned long long timestamp; + unsigned long long search_val; + unsigned long long val; + int cpu; + + struct stack_holder stack; +}; + +struct event_hash { + struct trace_hash_item hash; + struct event_data *event_data; + unsigned long long search_val; + unsigned long long val; + unsigned long long count; + unsigned long long time_total; + unsigned long long time_avg; + unsigned long long time_max; + unsigned long long ts_max; + unsigned long long time_min; + unsigned long long ts_min; + unsigned long long time_std; + unsigned long long last_time; + + struct trace_hash stacks; +}; + +struct group_data { + struct trace_hash_item hash; + char *comm; + struct trace_hash event_hash; +}; + +struct task_data { + struct trace_hash_item hash; + int pid; + int sleeping; + + char *comm; + + struct trace_hash start_hash; + struct trace_hash event_hash; + + struct task_data *proxy; + struct start_data *last_start; + struct event_hash *last_event; + struct tep_record *last_stack; + struct handle_data *handle; + struct group_data *group; +}; + +struct cpu_info { + int current; +}; + +struct sched_switch_data { + struct tep_format_field *prev_state; + int match_state; +}; + +struct handle_data { + struct handle_data *next; + struct tracecmd_input *handle; + struct tep_handle *pevent; + + struct trace_hash events; + struct trace_hash group_hash; + + struct cpu_info **cpu_data; + + struct tep_format_field *common_pid; + struct tep_format_field *wakeup_comm; + struct tep_format_field *switch_prev_comm; + struct tep_format_field *switch_next_comm; + + struct sched_switch_data sched_switch_blocked; + struct sched_switch_data sched_switch_preempt; + + struct trace_hash task_hash; + struct list_head *cpu_starts; + struct list_head migrate_starts; + + struct task_data *global_task; + struct task_data *global_percpu_tasks; + + int cpus; +}; + +static struct handle_data *handles; +static struct event_data *stacktrace_event; +static bool merge_like_comms = false; + +void trace_profile_set_merge_like_comms(void) +{ + merge_like_comms = true; +} + +static struct start_data * +add_start(struct task_data *task, + struct event_data *event_data, struct tep_record *record, + unsigned long long search_val, unsigned long long val) +{ + struct start_data *start; + + start = malloc(sizeof(*start)); + if (!start) + return NULL; + memset(start, 0, sizeof(*start)); + start->hash.key = trace_hash(search_val); + start->search_val = search_val; + start->val = val; + start->timestamp = record->ts; + start->event_data = event_data; + start->cpu = record->cpu; + start->task = task; + trace_hash_add(&task->start_hash, &start->hash); + if (event_data->migrate) + list_add(&start->list, &task->handle->migrate_starts); + else + list_add(&start->list, &task->handle->cpu_starts[record->cpu]); + return start; +} + +struct event_data_match { + struct event_data *event_data; + unsigned long long search_val; + unsigned long long val; +}; + +static int match_start(struct trace_hash_item *item, void *data) +{ + struct start_data *start = start_from_item(item); + struct event_data_match *edata = data; + + return start->event_data == edata->event_data && + start->search_val == edata->search_val; +} + +static int match_event(struct trace_hash_item *item, void *data) +{ + struct event_data_match *edata = data; + struct event_hash *event = event_from_item(item); + + return event->event_data == edata->event_data && + event->search_val == edata->search_val && + event->val == edata->val; +} + +static struct event_hash * +find_event_hash(struct task_data *task, struct event_data_match *edata) +{ + struct event_hash *event_hash; + struct trace_hash_item *item; + unsigned long long key; + + key = (unsigned long)edata->event_data + + (unsigned long)edata->search_val + + (unsigned long)edata->val; + key = trace_hash(key); + item = trace_hash_find(&task->event_hash, key, match_event, edata); + if (item) + return event_from_item(item); + + event_hash = malloc(sizeof(*event_hash)); + if (!event_hash) + return NULL; + memset(event_hash, 0, sizeof(*event_hash)); + + event_hash->event_data = edata->event_data; + event_hash->search_val = edata->search_val; + event_hash->val = edata->val; + event_hash->hash.key = key; + trace_hash_init(&event_hash->stacks, 32); + + trace_hash_add(&task->event_hash, &event_hash->hash); + + return event_hash; +} + +static struct event_hash * +find_start_event_hash(struct task_data *task, struct event_data *event_data, + struct start_data *start) +{ + struct event_data_match edata; + + edata.event_data = event_data; + edata.search_val = start->search_val; + edata.val = start->val; + + return find_event_hash(task, &edata); +} + +static struct start_data * +find_start(struct task_data *task, struct event_data *event_data, + unsigned long long search_val) +{ + unsigned long long key = trace_hash(search_val); + struct event_data_match edata; + void *data = &edata; + struct trace_hash_item *item; + struct start_data *start; + + edata.event_data = event_data; + edata.search_val = search_val; + + item = trace_hash_find(&task->start_hash, key, match_start, data); + if (!item) + return NULL; + + start = start_from_item(item); + return start; +} + +struct stack_match { + void *caller; + unsigned long size; +}; + +static int match_stack(struct trace_hash_item *item, void *data) +{ + struct stack_data *stack = stack_from_item(item); + struct stack_match *match = data; + + if (match->size != stack->size) + return 0; + + return memcmp(stack->caller, match->caller, stack->size) == 0; +} + + +static void add_event_stack(struct event_hash *event_hash, + void *caller, unsigned long size, + unsigned long long time, unsigned long long ts) +{ + unsigned long long key; + struct stack_data *stack; + struct stack_match match; + struct trace_hash_item *item; + int i; + + match.caller = caller; + match.size = size; + + if (size < sizeof(int)) + die("Stack size of less than sizeof(int)??"); + + for (key = 0, i = 0; i <= size - sizeof(int); i += sizeof(int)) + key += trace_hash(*(int *)(caller + i)); + + item = trace_hash_find(&event_hash->stacks, key, match_stack, &match); + if (!item) { + stack = malloc(sizeof(*stack) + size); + if (!stack) { + warning("Could not allocate stack"); + return; + } + memset(stack, 0, sizeof(*stack)); + memcpy(&stack->caller, caller, size); + stack->size = size; + stack->hash.key = key; + trace_hash_add(&event_hash->stacks, &stack->hash); + } else + stack = stack_from_item(item); + + stack->count++; + stack->time += time; + if (stack->count == 1 || time < stack->time_min) { + stack->time_min = time; + stack->ts_min = ts; + } + if (time > stack->time_max) { + stack->time_max = time; + stack->ts_max = ts; + } +} + +static void free_start(struct start_data *start) +{ + if (start->task->last_start == start) + start->task->last_start = NULL; + if (start->stack.record) + tracecmd_free_record(start->stack.record); + trace_hash_del(&start->hash); + list_del(&start->list); + free(start); +} + +static struct event_hash * +add_and_free_start(struct task_data *task, struct start_data *start, + struct event_data *event_data, unsigned long long ts) +{ + struct event_hash *event_hash; + long long delta; + + delta = ts - start->timestamp; + + /* + * It's possible on a live trace, because of timestamps being + * different on different CPUs, we can go back in time. When + * that happens, just zero out the delta. + */ + if (delta < 0) + delta = 0; + + event_hash = find_start_event_hash(task, event_data, start); + if (!event_hash) + return NULL; + event_hash->count++; + event_hash->time_total += delta; + event_hash->last_time = delta; + + if (delta > event_hash->time_max) { + event_hash->time_max = delta; + event_hash->ts_max = ts; + } + + if (event_hash->count == 1 || delta < event_hash->time_min) { + event_hash->time_min = delta; + event_hash->ts_min = ts; + } + + if (start->stack.record) { + unsigned long size; + void *caller; + + size = start->stack.size; + caller = start->stack.caller; + + add_event_stack(event_hash, caller, size, delta, + start->stack.record->ts); + tracecmd_free_record(start->stack.record); + start->stack.record = NULL; + } + + free_start(start); + + return event_hash; +} + +static struct event_hash * +find_and_update_start(struct task_data *task, struct event_data *event_data, + unsigned long long ts, unsigned long long search_val) +{ + struct start_data *start; + + start = find_start(task, event_data, search_val); + if (!start) + return NULL; + return add_and_free_start(task, start, event_data, ts); +} + +static int match_task(struct trace_hash_item *item, void *data) +{ + struct task_data *task = task_from_item(item); + int pid = *(unsigned long *)data; + + return task->pid == pid; +} + +static void init_task(struct handle_data *h, struct task_data *task) +{ + task->handle = h; + + trace_hash_init(&task->start_hash, 16); + trace_hash_init(&task->event_hash, 32); +} + +static struct task_data * +add_task(struct handle_data *h, int pid) +{ + unsigned long long key = trace_hash(pid); + struct task_data *task; + + task = malloc(sizeof(*task)); + if (!task) { + warning("Could not allocate task"); + return NULL; + } + memset(task, 0, sizeof(*task)); + + task->pid = pid; + task->hash.key = key; + trace_hash_add(&h->task_hash, &task->hash); + + init_task(h, task); + + return task; +} + +static struct task_data * +find_task(struct handle_data *h, int pid) +{ + unsigned long long key = trace_hash(pid); + struct trace_hash_item *item; + static struct task_data *last_task; + void *data = (unsigned long *)&pid; + + if (last_task && last_task->pid == pid) + return last_task; + + item = trace_hash_find(&h->task_hash, key, match_task, data); + + if (item) + last_task = task_from_item(item); + else + last_task = add_task(h, pid); + + return last_task; +} + +static int match_group(struct trace_hash_item *item, void *data) +{ + struct group_data *group = group_from_item(item); + + return strcmp(group->comm, (char *)data) == 0; +} + + +static void +add_task_comm(struct task_data *task, struct tep_format_field *field, + struct tep_record *record) +{ + const char *comm; + + task->comm = malloc(field->size + 1); + if (!task->comm) { + warning("Could not allocate task comm"); + return; + } + comm = record->data + field->offset; + memcpy(task->comm, comm, field->size); + task->comm[field->size] = 0; +} + +/* Account for tasks that don't have starts */ +static void account_task(struct task_data *task, struct event_data *event_data, + struct tep_record *record) +{ + struct event_data_match edata; + struct event_hash *event_hash; + struct task_data *proxy = NULL; + unsigned long long search_val = 0; + unsigned long long val = 0; + unsigned long long pid; + + /* + * If an event has the pid_field set, then find that task for + * this event instead. Let this task proxy for it to handle + * stack traces on this event. + */ + if (event_data->pid_field) { + tep_read_number_field(event_data->pid_field, + record->data, &pid); + proxy = task; + task = find_task(task->handle, pid); + if (!task) + return; + proxy->proxy = task; + } + + /* + * If data_field is defined, use that for val, + * if the start_field is defined, use that for search_val. + */ + if (event_data->data_field) { + tep_read_number_field(event_data->data_field, + record->data, &val); + } + if (event_data->start_match_field) { + tep_read_number_field(event_data->start_match_field, + record->data, &search_val); + } + + edata.event_data = event_data; + edata.search_val = val; + edata.val = val; + + event_hash = find_event_hash(task, &edata); + if (!event_hash) { + warning("failed to allocate event_hash"); + return; + } + + event_hash->count++; + task->last_event = event_hash; +} + +static struct task_data * +find_event_task(struct handle_data *h, struct event_data *event_data, + struct tep_record *record, unsigned long long pid) +{ + if (event_data->global) { + if (event_data->migrate) + return h->global_task; + else + return &h->global_percpu_tasks[record->cpu]; + } + + /* If pid_field is defined, use that to find the task */ + if (event_data->pid_field) + tep_read_number_field(event_data->pid_field, + record->data, &pid); + return find_task(h, pid); +} + +static struct task_data * +handle_end_event(struct handle_data *h, struct event_data *event_data, + struct tep_record *record, int pid) +{ + struct event_hash *event_hash; + struct task_data *task; + unsigned long long val; + + task = find_event_task(h, event_data, record, pid); + if (!task) + return NULL; + + tep_read_number_field(event_data->start_match_field, record->data, + &val); + event_hash = find_and_update_start(task, event_data->start, record->ts, val); + task->last_start = NULL; + task->last_event = event_hash; + + return task; +} + +static struct task_data * +handle_start_event(struct handle_data *h, struct event_data *event_data, + struct tep_record *record, unsigned long long pid) +{ + struct start_data *start; + struct task_data *task; + unsigned long long val; + + task = find_event_task(h, event_data, record, pid); + if (!task) + return NULL; + + tep_read_number_field(event_data->end_match_field, record->data, + &val); + start = add_start(task, event_data, record, val, val); + if (!start) { + warning("Failed to allocate start of task"); + return NULL; + } + + task->last_start = start; + task->last_event = NULL; + + return task; +} + +static int handle_event_data(struct handle_data *h, + unsigned long long pid, + struct event_data *event_data, + struct tep_record *record, int cpu) +{ + struct task_data *task = NULL; + + /* If this is the end of a event pair (start is set) */ + if (event_data->start) + task = handle_end_event(h, event_data, record, pid); + + /* If this is the start of a event pair (end is set) */ + if (event_data->end) { + task = handle_start_event(h, event_data, record, pid); + /* handle_start_event only returns NULL on error */ + if (!task) + return -1; + } + + if (!task) { + task = find_task(h, pid); + if (!task) + return -1; + task->proxy = NULL; + task->last_start = NULL; + task->last_event = NULL; + account_task(task, event_data, record); + } + + return 0; +} + +static void handle_missed_events(struct handle_data *h, int cpu) +{ + struct start_data *start; + struct start_data *n; + + /* Clear all starts on this CPU */ + list_for_each_entry_safe(start, n, &h->cpu_starts[cpu], list) { + free_start(start); + } + + /* Now clear all starts whose events can migrate */ + list_for_each_entry_safe(start, n, &h->migrate_starts, list) { + free_start(start); + } +} + +static int match_event_data(struct trace_hash_item *item, void *data) +{ + struct event_data *event_data = event_data_from_item(item); + int id = (int)(unsigned long)data; + + return event_data->id == id; +} + +static struct event_data * +find_event_data(struct handle_data *h, int id) +{ + struct trace_hash_item *item; + unsigned long long key = trace_hash(id); + void *data = (void *)(unsigned long)id; + + item = trace_hash_find(&h->events, key, match_event_data, data); + if (item) + return event_data_from_item(item); + return NULL; +} + +static void trace_profile_record(struct tracecmd_input *handle, + struct tep_record *record) +{ + static struct handle_data *last_handle; + struct tep_record *stack_record; + struct event_data *event_data; + struct task_data *task; + struct handle_data *h; + struct tep_handle *pevent; + unsigned long long pid; + int cpu = record->cpu; + int id; + + if (last_handle && last_handle->handle == handle) + h = last_handle; + else { + for (h = handles; h; h = h->next) { + if (h->handle == handle) + break; + } + if (!h) + die("Handle not found?"); + last_handle = h; + } + + if (record->missed_events) + handle_missed_events(h, cpu); + + pevent = h->pevent; + + id = tep_data_type(pevent, record); + + event_data = find_event_data(h, id); + + if (!event_data) + return; + + + /* Get this current PID */ + tep_read_number_field(h->common_pid, record->data, &pid); + + task = find_task(h, pid); + if (!task) + return; + stack_record = task->last_stack; + + if (event_data->handle_event) + event_data->handle_event(h, pid, event_data, record, cpu); + else + handle_event_data(h, pid, event_data, record, cpu); + + /* If the last stack hasn't changed, free it */ + if (stack_record && task->last_stack == stack_record) { + tracecmd_free_record(stack_record); + task->last_stack = NULL; + } +} + +static struct event_data * +add_event(struct handle_data *h, const char *system, const char *event_name, + enum event_data_type type) +{ + struct event_data *event_data; + struct tep_event *event; + + event = tep_find_event_by_name(h->pevent, system, event_name); + if (!event) + return NULL; + + if (!h->common_pid) { + h->common_pid = tep_find_common_field(event, "common_pid"); + if (!h->common_pid) + die("No 'common_pid' found in event"); + } + + event_data = malloc(sizeof(*event_data)); + if (!event_data) { + warning("Could not allocate event_data"); + return NULL; + } + memset(event_data, 0, sizeof(*event_data)); + event_data->id = event->id; + event_data->event = event; + event_data->type = type; + event_data->hash.key = trace_hash(event_data->event->id); + + trace_hash_add(&h->events, &event_data->hash); + + return event_data; +} + +static void +mate_events(struct handle_data *h, struct event_data *start, + const char *pid_field, const char *end_match_field, + struct event_data *end, const char *start_match_field, + int migrate, int global) +{ + start->end = end; + end->start = start; + + if (pid_field) { + start->pid_field = tep_find_field(start->event, pid_field); + if (!start->pid_field) + die("Event: %s does not have field %s", + start->event->name, pid_field); + } + + /* Field to match with end */ + start->end_match_field = tep_find_field(start->event, end_match_field); + if (!start->end_match_field) + die("Event: %s does not have field %s", + start->event->name, end_match_field); + + /* Field to match with start */ + end->start_match_field = tep_find_field(end->event, start_match_field); + if (!end->start_match_field) + die("Event: %s does not have field %s", + end->event->name, start_match_field); + + start->migrate = migrate; + start->global = global; + end->migrate = migrate; + end->global = global; +} + +/** + * tracecmd_mate_events - match events to profile against + * @handle: The input handle where the events exist. + * @start_event: The event that starts the transaction + * @pid_field: Use this over common_pid (may be NULL to use common_pid) + * @end_match_field: The field that matches the end events @start_match_field + * @end_event: The event that ends the transaction + * @start_match_field: The end event field that matches start's @end_match_field + * @migrate: Can the transaction switch CPUs? 1 for yes, 0 for no + * @global: The events are global and not per task + */ +void tracecmd_mate_events(struct tracecmd_input *handle, + struct tep_event *start_event, + const char *pid_field, const char *end_match_field, + struct tep_event *end_event, + const char *start_match_field, + int migrate, int global) +{ + struct handle_data *h; + struct event_data *start; + struct event_data *end; + + for (h = handles; h; h = h->next) { + if (h->handle == handle) + break; + } + if (!h) + die("Handle not found for trace profile"); + + start = add_event(h, start_event->system, start_event->name, + EVENT_TYPE_USER_MATE); + + end = add_event(h, end_event->system, end_event->name, + EVENT_TYPE_USER_MATE); + + if (!start || !end) + return; + + mate_events(h, start, pid_field, end_match_field, end, start_match_field, + migrate, global); +} + +static void func_print(struct trace_seq *s, struct event_hash *event_hash) +{ + const char *func; + + func = tep_find_function(event_hash->event_data->event->tep, + event_hash->val); + if (func) + trace_seq_printf(s, "func: %s()", func); + else + trace_seq_printf(s, "func: 0x%llx", event_hash->val); +} + +static void syscall_print(struct trace_seq *s, struct event_hash *event_hash) +{ +#ifndef NO_AUDIT + const char *name = NULL; + int machine; + + machine = audit_detect_machine(); + if (machine < 0) + goto fail; + name = audit_syscall_to_name(event_hash->val, machine); + if (!name) + goto fail; + trace_seq_printf(s, "syscall:%s", name); + return; +fail: +#endif + trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name, + (int)event_hash->val); +} + +/* From Linux include/linux/interrupt.h */ +#define SOFTIRQS \ + C(HI), \ + C(TIMER), \ + C(NET_TX), \ + C(NET_RX), \ + C(BLOCK), \ + C(BLOCK_IOPOLL), \ + C(TASKLET), \ + C(SCHED), \ + C(HRTIMER), \ + C(RCU), \ + C(NR), + +#undef C +#define C(a) a##_SOFTIRQ + +enum { SOFTIRQS }; + +#undef C +#define C(a) #a + +static const char *softirq_map[] = { SOFTIRQS }; + +static void softirq_print(struct trace_seq *s, struct event_hash *event_hash) +{ + int softirq = (int)event_hash->val; + + if (softirq < NR_SOFTIRQ) + trace_seq_printf(s, "%s:%s", event_hash->event_data->event->name, + softirq_map[softirq]); + else + trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name, + softirq); +} + +static void sched_switch_print(struct trace_seq *s, struct event_hash *event_hash) +{ + const char states[] = TASK_STATE_TO_CHAR_STR; + int i; + + trace_seq_printf(s, "%s:", event_hash->event_data->event->name); + + if (event_hash->val) { + int val = event_hash->val; + + for (i = 0; val && i < sizeof(states) - 1; i++, val >>= 1) { + if (val & 1) + trace_seq_putc(s, states[i+1]); + } + } else + trace_seq_putc(s, 'R'); +} + +static int handle_sched_switch_event(struct handle_data *h, + unsigned long long pid, + struct event_data *event_data, + struct tep_record *record, int cpu) +{ + struct task_data *task; + unsigned long long prev_pid; + unsigned long long prev_state; + unsigned long long next_pid; + struct start_data *start; + + /* pid_field holds prev_pid, data_field holds prev_state */ + tep_read_number_field(event_data->pid_field, + record->data, &prev_pid); + + tep_read_number_field(event_data->data_field, + record->data, &prev_state); + + /* only care about real states */ + prev_state &= TASK_STATE_MAX - 1; + + /* end_match_field holds next_pid */ + tep_read_number_field(event_data->end_match_field, + record->data, &next_pid); + + task = find_task(h, prev_pid); + if (!task) + return -1; + if (!task->comm) + add_task_comm(task, h->switch_prev_comm, record); + + if (prev_state) + task->sleeping = 1; + else + task->sleeping = 0; + + /* task is being scheduled out. prev_state tells why */ + start = add_start(task, event_data, record, prev_pid, prev_state); + task->last_start = start; + task->last_event = NULL; + + task = find_task(h, next_pid); + if (!task) + return -1; + + if (!task->comm) + add_task_comm(task, h->switch_next_comm, record); + + /* + * If the next task was blocked, it required a wakeup to + * restart, and there should be one. + * But if it was preempted, we look for the previous sched switch. + * Unfortunately, we have to look for both types of events as + * we do not know why next_pid scheduled out. + * + * event_data->start holds the sched_wakeup event data. + */ + find_and_update_start(task, event_data->start, record->ts, next_pid); + + /* Look for this task if it was preempted (no wakeup found). */ + find_and_update_start(task, event_data, record->ts, next_pid); + + return 0; +} + +static int handle_stacktrace_event(struct handle_data *h, + unsigned long long pid, + struct event_data *event_data, + struct tep_record *record, int cpu) +{ + struct task_data *orig_task; + struct task_data *proxy; + struct task_data *task; + unsigned long long size; + struct event_hash *event_hash; + struct start_data *start; + void *caller; + + task = find_task(h, pid); + if (!task) + return -1; + + if (task->last_stack) { + tracecmd_free_record(task->last_stack); + task->last_stack = NULL; + } + + if ((proxy = task->proxy)) { + task->proxy = NULL; + orig_task = task; + task = proxy; + } + + if (!task->last_start && !task->last_event) { + /* + * Save this stack in case function graph needs it. + * Need the original task, not a proxy. + */ + if (proxy) + task = orig_task; + tracecmd_record_ref(record); + task->last_stack = record; + return 0; + } + + /* + * start_match_field holds the size. + * data_field holds the caller location. + */ + size = record->size - event_data->data_field->offset; + caller = record->data + event_data->data_field->offset; + + /* + * If there's a "start" then don't add the stack until + * it finds a matching "end". + */ + if ((start = task->last_start)) { + tracecmd_record_ref(record); + start->stack.record = record; + start->stack.size = size; + start->stack.caller = caller; + task->last_start = NULL; + task->last_event = NULL; + return 0; + } + + event_hash = task->last_event; + task->last_event = NULL; + + add_event_stack(event_hash, caller, size, event_hash->last_time, + record->ts); + + return 0; +} + +static int handle_fgraph_entry_event(struct handle_data *h, + unsigned long long pid, + struct event_data *event_data, + struct tep_record *record, int cpu) +{ + unsigned long long size; + struct start_data *start; + struct task_data *task; + void *caller; + + task = handle_start_event(h, event_data, record, pid); + if (!task) + return -1; + + /* + * If a stack trace hasn't been used for a previous task, + * then it could be a function trace that we can use for + * the function graph. But stack traces come before the function + * graph events (unfortunately). So we need to attach the previous + * stack trace (if there is one) to this start event. + */ + if (task->last_stack) { + start = task->last_start; + record = task->last_stack; + size = record->size - stacktrace_event->data_field->offset; + caller = record->data + stacktrace_event->data_field->offset; + start->stack.record = record; + start->stack.size = size; + start->stack.caller = caller; + task->last_stack = NULL; + task->last_event = NULL; + } + + /* Do not map stacks after this event to this event */ + task->last_start = NULL; + + return 0; +} + +static int handle_fgraph_exit_event(struct handle_data *h, + unsigned long long pid, + struct event_data *event_data, + struct tep_record *record, int cpu) +{ + struct task_data *task; + + task = handle_end_event(h, event_data, record, pid); + if (!task) + return -1; + /* Do not match stacks with function graph exit events */ + task->last_event = NULL; + + return 0; +} + +static int handle_process_exec(struct handle_data *h, + unsigned long long pid, + struct event_data *event_data, + struct tep_record *record, int cpu) +{ + struct task_data *task; + unsigned long long val; + + /* Task has execed, remove the comm for it */ + if (event_data->data_field) { + tep_read_number_field(event_data->data_field, + record->data, &val); + pid = val; + } + + task = find_task(h, pid); + if (!task) + return -1; + + free(task->comm); + task->comm = NULL; + + return 0; +} + +static int handle_sched_wakeup_event(struct handle_data *h, + unsigned long long pid, + struct event_data *event_data, + struct tep_record *record, int cpu) +{ + struct task_data *proxy; + struct task_data *task = NULL; + struct start_data *start; + unsigned long long success; + + proxy = find_task(h, pid); + if (!proxy) + return -1; + + /* If present, data_field holds "success" */ + if (event_data->data_field) { + tep_read_number_field(event_data->data_field, + record->data, &success); + + /* If not a successful wakeup, ignore this */ + if (!success) + return 0; + } + + tep_read_number_field(event_data->pid_field, + record->data, &pid); + + task = find_task(h, pid); + if (!task) + return -1; + + if (!task->comm) + add_task_comm(task, h->wakeup_comm, record); + + /* if the task isn't sleeping, then ignore the wake up */ + if (!task->sleeping) { + /* Ignore any following stack traces */ + proxy->proxy = NULL; + proxy->last_start = NULL; + proxy->last_event = NULL; + return 0; + } + + /* It's being woken up */ + task->sleeping = 0; + + /* + * We need the stack trace to be hooked to the woken up + * task, not the waker. + */ + proxy->proxy = task; + + /* There should be a blocked schedule out of this task */ + find_and_update_start(task, event_data->start, record->ts, pid); + + /* Set this up for timing how long the wakeup takes */ + start = add_start(task, event_data, record, pid, pid); + task->last_event = NULL; + task->last_start = start; + + return 0; +} + +void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hook, + int global) +{ + struct tep_handle *pevent = tracecmd_get_tep(handle); + struct tep_format_field **fields; + struct handle_data *h; + struct event_data *event_data; + struct event_data *sched_switch; + struct event_data *sched_wakeup; + struct event_data *irq_entry; + struct event_data *irq_exit; + struct event_data *softirq_entry; + struct event_data *softirq_exit; + struct event_data *softirq_raise; + struct event_data *fgraph_entry; + struct event_data *fgraph_exit; + struct event_data *syscall_enter; + struct event_data *syscall_exit; + struct event_data *process_exec; + struct event_data *start_event; + struct event_data *end_event; + struct tep_event **events; + int ret; + int i; + + tracecmd_set_show_data_func(handle, trace_profile_record); + h = malloc(sizeof(*h)); + if (!h) { + warning("Could not allocate handle"); + return; + }; + memset(h, 0, sizeof(*h)); + h->next = handles; + handles = h; + + trace_hash_init(&h->task_hash, 1024); + trace_hash_init(&h->events, 1024); + trace_hash_init(&h->group_hash, 512); + + h->handle = handle; + h->pevent = pevent; + + h->cpus = tracecmd_cpus(handle); + + /* + * For streaming profiling, cpus will not be set up yet. + * In this case, we simply use the number of cpus on the + * system. + */ + if (!h->cpus) + h->cpus = tracecmd_count_cpus(); + + list_head_init(&h->migrate_starts); + h->cpu_starts = malloc(sizeof(*h->cpu_starts) * h->cpus); + if (!h->cpu_starts) + goto free_handle; + + for (i = 0; i < h->cpus; i++) + list_head_init(&h->cpu_starts[i]); + + h->cpu_data = malloc(h->cpus * sizeof(*h->cpu_data)); + if (!h->cpu_data) + goto free_starts; + + memset(h->cpu_data, 0, h->cpus * sizeof(h->cpu_data)); + + h->global_task = malloc(sizeof(struct task_data)); + if (!h->global_task) + goto free_data; + + memset(h->global_task, 0, sizeof(struct task_data)); + init_task(h, h->global_task); + h->global_task->comm = strdup("Global Events"); + if (!h->global_task->comm) + die("malloc"); + h->global_task->pid = -1; + + h->global_percpu_tasks = calloc(h->cpus, sizeof(struct task_data)); + if (!h->global_percpu_tasks) + die("malloc"); + for (i = 0; i < h->cpus; i++) { + init_task(h, &h->global_percpu_tasks[i]); + ret = asprintf(&h->global_percpu_tasks[i].comm, + "Global CPU[%d] Events", i); + if (ret < 0) + die("malloc"); + h->global_percpu_tasks[i].pid = -1 - i; + } + + irq_entry = add_event(h, "irq", "irq_handler_entry", EVENT_TYPE_IRQ); + irq_exit = add_event(h, "irq", "irq_handler_exit", EVENT_TYPE_IRQ); + softirq_entry = add_event(h, "irq", "softirq_entry", EVENT_TYPE_SOFTIRQ); + softirq_exit = add_event(h, "irq", "softirq_exit", EVENT_TYPE_SOFTIRQ); + softirq_raise = add_event(h, "irq", "softirq_raise", EVENT_TYPE_SOFTIRQ_RAISE); + sched_wakeup = add_event(h, "sched", "sched_wakeup", EVENT_TYPE_WAKEUP); + sched_switch = add_event(h, "sched", "sched_switch", EVENT_TYPE_SCHED_SWITCH); + fgraph_entry = add_event(h, "ftrace", "funcgraph_entry", EVENT_TYPE_FUNC); + fgraph_exit = add_event(h, "ftrace", "funcgraph_exit", EVENT_TYPE_FUNC); + syscall_enter = add_event(h, "raw_syscalls", "sys_enter", EVENT_TYPE_SYSCALL); + syscall_exit = add_event(h, "raw_syscalls", "sys_exit", EVENT_TYPE_SYSCALL); + + process_exec = add_event(h, "sched", "sched_process_exec", + EVENT_TYPE_PROCESS_EXEC); + + stacktrace_event = add_event(h, "ftrace", "kernel_stack", EVENT_TYPE_STACK); + if (stacktrace_event) { + stacktrace_event->handle_event = handle_stacktrace_event; + + stacktrace_event->data_field = tep_find_field(stacktrace_event->event, + "caller"); + if (!stacktrace_event->data_field) + die("Event: %s does not have field caller", + stacktrace_event->event->name); + } + + if (process_exec) { + process_exec->handle_event = handle_process_exec; + process_exec->data_field = tep_find_field(process_exec->event, + "old_pid"); + } + + if (sched_switch) { + sched_switch->handle_event = handle_sched_switch_event; + sched_switch->data_field = tep_find_field(sched_switch->event, + "prev_state"); + if (!sched_switch->data_field) + die("Event: %s does not have field prev_state", + sched_switch->event->name); + + h->switch_prev_comm = tep_find_field(sched_switch->event, + "prev_comm"); + if (!h->switch_prev_comm) + die("Event: %s does not have field prev_comm", + sched_switch->event->name); + + h->switch_next_comm = tep_find_field(sched_switch->event, + "next_comm"); + if (!h->switch_next_comm) + die("Event: %s does not have field next_comm", + sched_switch->event->name); + + sched_switch->print_func = sched_switch_print; + } + + if (sched_switch && sched_wakeup) { + mate_events(h, sched_switch, "prev_pid", "next_pid", + sched_wakeup, "pid", 1, 0); + mate_events(h, sched_wakeup, "pid", "pid", + sched_switch, "prev_pid", 1, 0); + sched_wakeup->handle_event = handle_sched_wakeup_event; + + /* The 'success' field may or may not be present */ + sched_wakeup->data_field = tep_find_field(sched_wakeup->event, + "success"); + + h->wakeup_comm = tep_find_field(sched_wakeup->event, "comm"); + if (!h->wakeup_comm) + die("Event: %s does not have field comm", + sched_wakeup->event->name); + } + + if (irq_entry && irq_exit) + mate_events(h, irq_entry, NULL, "irq", irq_exit, "irq", 0, global); + + if (softirq_entry) + softirq_entry->print_func = softirq_print; + + if (softirq_exit) + softirq_exit->print_func = softirq_print; + + if (softirq_raise) + softirq_raise->print_func = softirq_print; + + if (softirq_entry && softirq_exit) + mate_events(h, softirq_entry, NULL, "vec", softirq_exit, "vec", + 0, global); + + if (softirq_entry && softirq_raise) + mate_events(h, softirq_raise, NULL, "vec", softirq_entry, "vec", + 0, global); + + if (fgraph_entry && fgraph_exit) { + mate_events(h, fgraph_entry, NULL, "func", fgraph_exit, "func", 1, 0); + fgraph_entry->handle_event = handle_fgraph_entry_event; + fgraph_exit->handle_event = handle_fgraph_exit_event; + fgraph_entry->print_func = func_print; + } + + if (syscall_enter && syscall_exit) { + mate_events(h, syscall_enter, NULL, "id", syscall_exit, "id", 1, 0); + syscall_enter->print_func = syscall_print; + syscall_exit->print_func = syscall_print; + } + + events = tep_list_events(pevent, TEP_EVENT_SORT_ID); + if (!events) + die("malloc"); + + /* Add some other events */ + event_data = add_event(h, "ftrace", "function", EVENT_TYPE_FUNC); + if (event_data) { + event_data->data_field = + tep_find_field(event_data->event, "ip"); + } + + /* Add any user defined hooks */ + for (; hook; hook = hook->next) { + start_event = add_event(h, hook->start_system, hook->start_event, + EVENT_TYPE_USER_MATE); + end_event = add_event(h, hook->end_system, hook->end_event, + EVENT_TYPE_USER_MATE); + if (!start_event) { + warning("Event %s not found", hook->start_event); + continue; + } + if (!end_event) { + warning("Event %s not found", hook->end_event); + continue; + } + mate_events(h, start_event, hook->pid, hook->start_match, + end_event, hook->end_match, hook->migrate, + hook->global); + } + + /* Now add any defined event that we haven't processed */ + for (i = 0; events[i]; i++) { + event_data = find_event_data(h, events[i]->id); + if (event_data) + continue; + + event_data = add_event(h, events[i]->system, events[i]->name, + EVENT_TYPE_UNDEFINED); + + fields = tep_event_fields(events[i]); + if (!fields) + die("malloc"); + + if (fields[0]) + event_data->data_field = fields[0]; + + free(fields); + } + return; + + free_data: + free(h->cpu_data); + free_starts: + free(h->cpu_starts); + free_handle: + handles = h->next; + free(h); + warning("Failed handle allocations"); +} + +static void output_event_stack(struct tep_handle *pevent, struct stack_data *stack) +{ + int longsize = tep_get_long_size(pevent); + unsigned long long val; + const char *func; + unsigned long long stop = -1ULL; + void *ptr; + int i; + + if (longsize < 8) + stop &= (1ULL << (longsize * 8)) - 1; + + if (stack->count) + stack->time_avg = stack->time / stack->count; + + printf(" <stack> %lld total:%lld min:%lld(ts:%lld.%06lld) max:%lld(ts:%lld.%06lld) avg=%lld\n", + stack->count, stack->time, stack->time_min, + nsecs_per_sec(stack->ts_min), mod_to_usec(stack->ts_min), + stack->time_max, + nsecs_per_sec(stack->ts_max), mod_to_usec(stack->ts_max), + stack->time_avg); + + for (i = 0; i < stack->size; i += longsize) { + ptr = stack->caller + i; + switch (longsize) { + case 4: + /* todo, read value from pevent */ + val = *(unsigned int *)ptr; + break; + case 8: + val = *(unsigned long long *)ptr; + break; + default: + die("Strange long size %d", longsize); + } + if (val == stop) + break; + func = tep_find_function(pevent, val); + if (func) + printf(" => %s (0x%llx)\n", func, val); + else + printf(" => 0x%llx\n", val); + } +} + +struct stack_chain { + struct stack_chain *children; + unsigned long long val; + unsigned long long time; + unsigned long long time_min; + unsigned long long ts_min; + unsigned long long time_max; + unsigned long long ts_max; + unsigned long long time_avg; + unsigned long long count; + int percent; + int nr_children; +}; + +static int compare_chains(const void *a, const void *b) +{ + const struct stack_chain * A = a; + const struct stack_chain * B = b; + + if (A->time > B->time) + return -1; + if (A->time < B->time) + return 1; + /* If stacks don't use time, then use count */ + if (A->count > B->count) + return -1; + if (A->count < B->count) + return 1; + return 0; +} + +static int calc_percent(unsigned long long val, unsigned long long total) +{ + return (val * 100 + total / 2) / total; +} + +static int stack_overflows(struct stack_data *stack, int longsize, int level) +{ + return longsize * level > stack->size - longsize; +} + +static unsigned long long +stack_value(struct stack_data *stack, int longsize, int level) +{ + void *ptr; + + ptr = &stack->caller[longsize * level]; + return longsize == 8 ? *(u64 *)ptr : *(unsigned *)ptr; +} + +static struct stack_chain * +make_stack_chain(struct stack_data **stacks, int cnt, int longsize, int level, + int *nr_children) +{ + struct stack_chain *chain; + unsigned long long total_time = 0; + unsigned long long total_count = 0; + unsigned long long time; + unsigned long long time_min; + unsigned long long ts_min; + unsigned long long time_max; + unsigned long long ts_max; + unsigned long long count; + unsigned long long stop = -1ULL; + int nr_chains = 0; + u64 last = 0; + u64 val; + int start; + int i; + int x; + + if (longsize < 8) + stop &= (1ULL << (longsize * 8)) - 1; + + /* First find out how many diffs there are */ + for (i = 0; i < cnt; i++) { + if (stack_overflows(stacks[i], longsize, level)) + continue; + + val = stack_value(stacks[i], longsize, level); + + if (val == stop) + continue; + + if (!nr_chains || val != last) + nr_chains++; + last = val; + } + + if (!nr_chains) { + *nr_children = 0; + return NULL; + } + + chain = malloc(sizeof(*chain) * nr_chains); + if (!chain) { + warning("Could not allocate chain"); + return NULL; + } + memset(chain, 0, sizeof(*chain) * nr_chains); + + x = 0; + count = 0; + start = 0; + time = 0; + time_min = 0; + time_max = 0; + + for (i = 0; i < cnt; i++) { + if (stack_overflows(stacks[i], longsize, level)) { + start = i+1; + continue; + } + + val = stack_value(stacks[i], longsize, level); + + if (val == stop) { + start = i+1; + continue; + } + + count += stacks[i]->count; + time += stacks[i]->time; + if (stacks[i]->time_max > time_max) { + time_max = stacks[i]->time_max; + ts_max = stacks[i]->ts_max; + } + if (i == start || stacks[i]->time_min < time_min) { + time_min = stacks[i]->time_min; + ts_min = stacks[i]->ts_min; + } + if (i == cnt - 1 || + stack_overflows(stacks[i+1], longsize, level) || + val != stack_value(stacks[i+1], longsize, level)) { + + total_time += time; + total_count += count; + chain[x].val = val; + chain[x].time_avg = time / count; + chain[x].count = count; + chain[x].time = time; + chain[x].time_min = time_min; + chain[x].ts_min = ts_min; + chain[x].time_max = time_max; + chain[x].ts_max = ts_max; + chain[x].children = + make_stack_chain(&stacks[start], (i - start) + 1, + longsize, level+1, + &chain[x].nr_children); + x++; + start = i + 1; + count = 0; + time = 0; + time_min = 0; + time_max = 0; + } + } + + qsort(chain, nr_chains, sizeof(*chain), compare_chains); + + *nr_children = nr_chains; + + /* Should never happen */ + if (!total_time && !total_count) + return chain; + + + /* Now calculate percentage */ + time = 0; + for (i = 0; i < nr_chains; i++) { + if (total_time) + chain[i].percent = calc_percent(chain[i].time, total_time); + /* In case stacks don't have time */ + else if (total_count) + chain[i].percent = calc_percent(chain[i].count, total_count); + } + + return chain; +} + +static void free_chain(struct stack_chain *chain, int nr_chains) +{ + int i; + + if (!chain) + return; + + for (i = 0; i < nr_chains; i++) + free_chain(chain[i].children, chain[i].nr_children); + + free(chain); +} + +#define INDENT 5 + +static void print_indent(int level, unsigned long long mask) +{ + char line; + int p; + + for (p = 0; p < level + 1; p++) { + if (mask & (1ULL << p)) + line = '|'; + else + line = ' '; + printf("%*c ", INDENT, line); + } +} + +static void print_chain_func(struct tep_handle *pevent, struct stack_chain *chain) +{ + unsigned long long val = chain->val; + const char *func; + + func = tep_find_function(pevent, val); + if (func) + printf("%s (0x%llx)\n", func, val); + else + printf("0x%llx\n", val); +} + +static void output_chain(struct tep_handle *pevent, struct stack_chain *chain, int level, + int nr_chains, unsigned long long *mask) +{ + struct stack_chain *child; + int nr_children; + int i; + char line = '|'; + + if (!nr_chains) + return; + + *mask |= (1ULL << (level + 1)); + print_indent(level + 1, *mask); + printf("\n"); + + for (i = 0; i < nr_chains; i++) { + + print_indent(level, *mask); + + printf("%*c ", INDENT, '+'); + + if (i == nr_chains - 1) { + *mask &= ~(1ULL << (level + 1)); + line = ' '; + } + + print_chain_func(pevent, &chain[i]); + + print_indent(level, *mask); + + printf("%*c ", INDENT, line); + printf(" %d%% (%lld)", chain[i].percent, chain[i].count); + if (chain[i].time) + printf(" time:%lld max:%lld(ts:%lld.%06lld) min:%lld(ts:%lld.%06lld) avg:%lld", + chain[i].time, chain[i].time_max, + nsecs_per_sec(chain[i].ts_max), + mod_to_usec(chain[i].ts_max), + chain[i].time_min, + nsecs_per_sec(chain[i].ts_min), + mod_to_usec(chain[i].ts_min), + chain[i].time_avg); + printf("\n"); + + for (child = chain[i].children, nr_children = chain[i].nr_children; + child && nr_children == 1; + nr_children = child->nr_children, child = child->children) { + print_indent(level, *mask); + printf("%*c ", INDENT, line); + printf(" "); + print_chain_func(pevent, child); + } + + if (child) + output_chain(pevent, child, level+1, nr_children, mask); + + print_indent(level + 1, *mask); + printf("\n"); + } + *mask &= ~(1ULL << (level + 1)); + print_indent(level, *mask); + printf("\n"); +} + +static int compare_stacks(const void *a, const void *b) +{ + struct stack_data * const *A = a; + struct stack_data * const *B = b; + unsigned int sa, sb; + int size; + int i; + + /* only compare up to the smaller size of the two */ + if ((*A)->size > (*B)->size) + size = (*B)->size; + else + size = (*A)->size; + + for (i = 0; i < size; i += sizeof(sa)) { + sa = *(unsigned *)&(*A)->caller[i]; + sb = *(unsigned *)&(*B)->caller[i]; + if (sa > sb) + return 1; + if (sa < sb) + return -1; + } + + /* They are the same up to size. Then bigger size wins */ + if ((*A)->size > (*B)->size) + return 1; + if ((*A)->size < (*B)->size) + return -1; + return 0; +} + +static void output_stacks(struct tep_handle *pevent, struct trace_hash *stack_hash) +{ + struct trace_hash_item **bucket; + struct trace_hash_item *item; + struct stack_data **stacks; + struct stack_chain *chain; + unsigned long long mask = 0; + int nr_chains; + int longsize = tep_get_long_size(pevent); + int nr_stacks; + int i; + + nr_stacks = 0; + trace_hash_for_each_bucket(bucket, stack_hash) { + trace_hash_for_each_item(item, bucket) { + nr_stacks++; + } + } + + stacks = malloc(sizeof(*stacks) * nr_stacks); + if (!stacks) { + warning("Could not allocate stacks"); + return; + } + + nr_stacks = 0; + trace_hash_for_each_bucket(bucket, stack_hash) { + trace_hash_for_each_item(item, bucket) { + stacks[nr_stacks++] = stack_from_item(item); + } + } + + qsort(stacks, nr_stacks, sizeof(*stacks), compare_stacks); + + chain = make_stack_chain(stacks, nr_stacks, longsize, 0, &nr_chains); + + output_chain(pevent, chain, 0, nr_chains, &mask); + + if (0) + for (i = 0; i < nr_stacks; i++) + output_event_stack(pevent, stacks[i]); + + free(stacks); + free_chain(chain, nr_chains); +} + +static void output_event(struct event_hash *event_hash) +{ + struct event_data *event_data = event_hash->event_data; + struct tep_handle *pevent = event_data->event->tep; + struct trace_seq s; + + trace_seq_init(&s); + + if (event_data->print_func) + event_data->print_func(&s, event_hash); + else if (event_data->type == EVENT_TYPE_FUNC) + func_print(&s, event_hash); + else + trace_seq_printf(&s, "%s:0x%llx", + event_data->event->name, + event_hash->val); + trace_seq_terminate(&s); + + printf(" Event: %s (%lld)", + s.buffer, event_hash->count); + + trace_seq_destroy(&s); + + if (event_hash->time_total) { + event_hash->time_avg = event_hash->time_total / event_hash->count; + printf(" Total: %lld Avg: %lld Max: %lld(ts:%lld.%06lld) Min:%lld(ts:%lld.%06lld)", + event_hash->time_total, event_hash->time_avg, + event_hash->time_max, + nsecs_per_sec(event_hash->ts_max), + mod_to_usec(event_hash->ts_max), + event_hash->time_min, + nsecs_per_sec(event_hash->ts_min), + mod_to_usec(event_hash->ts_min)); + } + printf("\n"); + + output_stacks(pevent, &event_hash->stacks); +} + +static int compare_events(const void *a, const void *b) +{ + struct event_hash * const *A = a; + struct event_hash * const *B = b; + const struct event_data *event_data_a = (*A)->event_data; + const struct event_data *event_data_b = (*B)->event_data; + + /* Schedule switch goes first */ + if (event_data_a->type == EVENT_TYPE_SCHED_SWITCH) { + if (event_data_b->type != EVENT_TYPE_SCHED_SWITCH) + return -1; + /* lower the state the better */ + if ((*A)->val > (*B)->val) + return 1; + if ((*A)->val < (*B)->val) + return -1; + return 0; + } else if (event_data_b->type == EVENT_TYPE_SCHED_SWITCH) + return 1; + + /* Wakeups are next */ + if (event_data_a->type == EVENT_TYPE_WAKEUP) { + if (event_data_b->type != EVENT_TYPE_WAKEUP) + return -1; + return 0; + } else if (event_data_b->type == EVENT_TYPE_WAKEUP) + return 1; + + if (event_data_a->id > event_data_b->id) + return 1; + if (event_data_a->id < event_data_b->id) + return -1; + if ((*A)->time_total > (*B)->time_total) + return -1; + if ((*A)->time_total < (*B)->time_total) + return 1; + return 0; +} + +static void output_task(struct handle_data *h, struct task_data *task) +{ + struct trace_hash_item **bucket; + struct trace_hash_item *item; + struct event_hash **events; + const char *comm; + int nr_events = 0; + int i; + + if (task->group) + return; + + if (task->comm) + comm = task->comm; + else + comm = tep_data_comm_from_pid(h->pevent, task->pid); + + if (task->pid < 0) + printf("%s\n", task->comm); + else + printf("\ntask: %s-%d\n", comm, task->pid); + + trace_hash_for_each_bucket(bucket, &task->event_hash) { + trace_hash_for_each_item(item, bucket) { + nr_events++; + } + } + + events = malloc(sizeof(*events) * nr_events); + if (!events) { + warning("Could not allocate events"); + return; + } + + i = 0; + trace_hash_for_each_bucket(bucket, &task->event_hash) { + trace_hash_for_each_item(item, bucket) { + events[i++] = event_from_item(item); + } + } + + qsort(events, nr_events, sizeof(*events), compare_events); + + for (i = 0; i < nr_events; i++) + output_event(events[i]); + + free(events); +} + +static void output_group(struct handle_data *h, struct group_data *group) +{ + struct trace_hash_item **bucket; + struct trace_hash_item *item; + struct event_hash **events; + int nr_events = 0; + int i; + + printf("\ngroup: %s\n", group->comm); + + trace_hash_for_each_bucket(bucket, &group->event_hash) { + trace_hash_for_each_item(item, bucket) { + nr_events++; + } + } + + events = malloc(sizeof(*events) * nr_events); + if (!events) { + warning("Could not allocate events"); + return; + } + + i = 0; + trace_hash_for_each_bucket(bucket, &group->event_hash) { + trace_hash_for_each_item(item, bucket) { + events[i++] = event_from_item(item); + } + } + + qsort(events, nr_events, sizeof(*events), compare_events); + + for (i = 0; i < nr_events; i++) + output_event(events[i]); + + free(events); +} + +static int compare_tasks(const void *a, const void *b) +{ + struct task_data * const *A = a; + struct task_data * const *B = b; + + if ((*A)->pid > (*B)->pid) + return 1; + else if ((*A)->pid < (*B)->pid) + return -1; + return 0; +} + +static int compare_groups(const void *a, const void *b) +{ + const char *A = a; + const char *B = b; + + return strcmp(A, B); +} + +static void free_event_hash(struct event_hash *event_hash) +{ + struct trace_hash_item **bucket; + struct trace_hash_item *item; + struct stack_data *stack; + + trace_hash_for_each_bucket(bucket, &event_hash->stacks) { + trace_hash_while_item(item, bucket) { + stack = stack_from_item(item); + trace_hash_del(&stack->hash); + free(stack); + } + } + trace_hash_free(&event_hash->stacks); + free(event_hash); +} + +static void __free_task(struct task_data *task) +{ + struct trace_hash_item **bucket; + struct trace_hash_item *item; + struct start_data *start; + struct event_hash *event_hash; + + free(task->comm); + + trace_hash_for_each_bucket(bucket, &task->start_hash) { + trace_hash_while_item(item, bucket) { + start = start_from_item(item); + if (start->stack.record) + tracecmd_free_record(start->stack.record); + list_del(&start->list); + trace_hash_del(item); + free(start); + } + } + trace_hash_free(&task->start_hash); + + trace_hash_for_each_bucket(bucket, &task->event_hash) { + trace_hash_while_item(item, bucket) { + event_hash = event_from_item(item); + trace_hash_del(item); + free_event_hash(event_hash); + } + } + trace_hash_free(&task->event_hash); + + if (task->last_stack) + tracecmd_free_record(task->last_stack); +} + +static void free_task(struct task_data *task) +{ + __free_task(task); + free(task); +} + +static void free_group(struct group_data *group) +{ + struct trace_hash_item **bucket; + struct trace_hash_item *item; + struct event_hash *event_hash; + + free(group->comm); + + trace_hash_for_each_bucket(bucket, &group->event_hash) { + trace_hash_while_item(item, bucket) { + event_hash = event_from_item(item); + trace_hash_del(item); + free_event_hash(event_hash); + } + } + trace_hash_free(&group->event_hash); + free(group); +} + +static void show_global_task(struct handle_data *h, + struct task_data *task) +{ + if (trace_hash_empty(&task->event_hash)) + return; + + output_task(h, task); +} + +static void output_tasks(struct handle_data *h) +{ + struct trace_hash_item **bucket; + struct trace_hash_item *item; + struct task_data **tasks; + int nr_tasks = 0; + int i; + + trace_hash_for_each_bucket(bucket, &h->task_hash) { + trace_hash_for_each_item(item, bucket) { + nr_tasks++; + } + } + + tasks = malloc(sizeof(*tasks) * nr_tasks); + if (!tasks) { + warning("Could not allocate tasks"); + return; + } + + nr_tasks = 0; + + trace_hash_for_each_bucket(bucket, &h->task_hash) { + trace_hash_while_item(item, bucket) { + tasks[nr_tasks++] = task_from_item(item); + trace_hash_del(item); + } + } + + qsort(tasks, nr_tasks, sizeof(*tasks), compare_tasks); + + for (i = 0; i < nr_tasks; i++) { + output_task(h, tasks[i]); + free_task(tasks[i]); + } + + free(tasks); +} + +static void output_groups(struct handle_data *h) +{ + struct trace_hash_item **bucket; + struct trace_hash_item *item; + struct group_data **groups; + int nr_groups = 0; + int i; + + trace_hash_for_each_bucket(bucket, &h->group_hash) { + trace_hash_for_each_item(item, bucket) { + nr_groups++; + } + } + + if (nr_groups == 0) + return; + + groups = malloc(sizeof(*groups) * nr_groups); + if (!groups) { + warning("Could not allocate groups"); + return; + } + + nr_groups = 0; + + trace_hash_for_each_bucket(bucket, &h->group_hash) { + trace_hash_while_item(item, bucket) { + groups[nr_groups++] = group_from_item(item); + trace_hash_del(item); + } + } + + qsort(groups, nr_groups, sizeof(*groups), compare_groups); + + for (i = 0; i < nr_groups; i++) { + output_group(h, groups[i]); + free_group(groups[i]); + } + + free(groups); +} + +static void output_handle(struct handle_data *h) +{ + int i; + + show_global_task(h, h->global_task); + for (i = 0; i < h->cpus; i++) + show_global_task(h, &h->global_percpu_tasks[i]); + + output_groups(h); + output_tasks(h); +} + +static void merge_event_stack(struct event_hash *event, + struct stack_data *stack) +{ + struct stack_data *exist; + struct trace_hash_item *item; + struct stack_match match; + + match.caller = stack->caller; + match.size = stack->size; + item = trace_hash_find(&event->stacks, stack->hash.key, match_stack, + &match); + if (!item) { + trace_hash_add(&event->stacks, &stack->hash); + return; + } + exist = stack_from_item(item); + exist->count += stack->count; + exist->time += stack->time; + + if (exist->time_max < stack->time_max) { + exist->time_max = stack->time_max; + exist->ts_max = stack->ts_max; + } + if (exist->time_min > stack->time_min) { + exist->time_min = stack->time_min; + exist->ts_min = stack->ts_min; + } + free(stack); +} + +static void merge_stacks(struct event_hash *exist, struct event_hash *event) +{ + struct stack_data *stack; + struct trace_hash_item *item; + struct trace_hash_item **bucket; + + trace_hash_for_each_bucket(bucket, &event->stacks) { + trace_hash_while_item(item, bucket) { + stack = stack_from_item(item); + trace_hash_del(&stack->hash); + merge_event_stack(exist, stack); + } + } +} + +static void merge_event_into_group(struct group_data *group, + struct event_hash *event) +{ + struct event_hash *exist; + struct trace_hash_item *item; + struct event_data_match edata; + unsigned long long key; + + if (event->event_data->type == EVENT_TYPE_WAKEUP) { + edata.event_data = event->event_data; + event->search_val = 0; + event->val = 0; + key = trace_hash((unsigned long)event->event_data); + } else if (event->event_data->type == EVENT_TYPE_SCHED_SWITCH) { + edata.event_data = event->event_data; + event->search_val = event->val; + key = (unsigned long)event->event_data + + ((unsigned long)event->val * 2); + key = trace_hash(key); + } else { + key = event->hash.key; + } + + edata.event_data = event->event_data; + edata.search_val = event->search_val; + edata.val = event->val; + + item = trace_hash_find(&group->event_hash, key, match_event, &edata); + if (!item) { + event->hash.key = key; + trace_hash_add(&group->event_hash, &event->hash); + return; + } + + exist = event_from_item(item); + exist->count += event->count; + exist->time_total += event->time_total; + + if (exist->time_max < event->time_max) { + exist->time_max = event->time_max; + exist->ts_max = event->ts_max; + } + if (exist->time_min > event->time_min) { + exist->time_min = event->time_min; + exist->ts_min = event->ts_min; + } + + merge_stacks(exist, event); + free_event_hash(event); +} + +static void add_group(struct handle_data *h, struct task_data *task) +{ + unsigned long long key; + struct trace_hash_item *item; + struct group_data *grp; + struct trace_hash_item **bucket; + void *data = task->comm; + + if (!task->comm) + return; + + key = trace_hash_str(task->comm); + + item = trace_hash_find(&h->group_hash, key, match_group, data); + if (item) { + grp = group_from_item(item); + } else { + grp = malloc(sizeof(*grp)); + if (!grp) { + warning("Could not allocate group"); + return; + } + memset(grp, 0, sizeof(*grp)); + + grp->comm = strdup(task->comm); + if (!grp->comm) + die("strdup"); + grp->hash.key = key; + trace_hash_add(&h->group_hash, &grp->hash); + trace_hash_init(&grp->event_hash, 32); + } + task->group = grp; + + trace_hash_for_each_bucket(bucket, &task->event_hash) { + trace_hash_while_item(item, bucket) { + struct event_hash *event_hash; + + event_hash = event_from_item(item); + trace_hash_del(&event_hash->hash); + merge_event_into_group(grp, event_hash); + } + } +} + +static void merge_tasks(struct handle_data *h) +{ + struct trace_hash_item **bucket; + struct trace_hash_item *item; + + if (!merge_like_comms) + return; + + trace_hash_for_each_bucket(bucket, &h->task_hash) { + trace_hash_for_each_item(item, bucket) + add_group(h, task_from_item(item)); + } +} + +int do_trace_profile(void) +{ + struct handle_data *h; + + for (h = handles; h; h = h->next) { + if (merge_like_comms) + merge_tasks(h); + output_handle(h); + trace_hash_free(&h->task_hash); + } + + return 0; +} diff --git a/tracecmd/trace-read.c b/tracecmd/trace-read.c new file mode 100644 index 00000000..df559d2a --- /dev/null +++ b/tracecmd/trace-read.c @@ -0,0 +1,1984 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#define _LARGEFILE64_SOURCE +#include <dirent.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <stdarg.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <signal.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> + +#include "trace-local.h" +#include "trace-hash.h" +#include "trace-hash-local.h" +#include "kbuffer.h" +#include "list.h" + +/* + * tep_func_repeat_format is defined as a weak variable in the + * libtraceevent library function plugin, to allow applications + * to override the format of the timestamp it prints for the + * last function that repeated. + */ +const char *tep_func_repeat_format; + +static struct filter_str { + struct filter_str *next; + char *filter; + int neg; +} *filter_strings; +static struct filter_str **filter_next = &filter_strings; + +struct filter { + struct filter *next; + struct tep_event_filter *filter; +}; + +struct event_str { + struct event_str *next; + const char *event; +}; + +struct handle_list { + struct list_head list; + struct tracecmd_input *handle; + const char *file; + int cpus; + int done; + struct tep_record *record; + struct filter *event_filters; + struct filter *event_filter_out; + unsigned long long *last_timestamp; +}; +static struct list_head handle_list; + +struct input_files { + struct list_head list; + const char *file; + long long tsoffset; + unsigned long long ts2secs; +}; +static struct list_head input_files; +static struct input_files *last_input_file; + +struct pid_list { + struct pid_list *next; + char *pid; + int free; +} *pid_list; + +struct pid_list *comm_list; + +static unsigned int page_size; +static int input_fd; +static const char *default_input_file = DEFAULT_INPUT_FILE; +static const char *input_file; +static int multi_inputs; +static int max_file_size; + +static int instances; + +static int *filter_cpus; +static int nr_filter_cpus; +static int test_filters_mode; + +static int show_wakeup; +static int wakeup_id; +static int wakeup_new_id; +static int sched_id; +static int stacktrace_id; + +static int profile; + +static int buffer_breaks = 0; + +static int no_irqs; +static int no_softirqs; + +static int tsdiff; +static int tscheck; + +static int latency_format; +static bool raw_format; +static const char *format_type = TEP_PRINT_INFO; + +static struct tep_format_field *wakeup_task; +static struct tep_format_field *wakeup_success; +static struct tep_format_field *wakeup_new_task; +static struct tep_format_field *wakeup_new_success; +static struct tep_format_field *sched_task; +static struct tep_format_field *sched_prio; + +static unsigned long long total_wakeup_lat; +static unsigned long wakeup_lat_count; + +static unsigned long long total_wakeup_rt_lat; +static unsigned long wakeup_rt_lat_count; + +struct wakeup_info { + struct trace_hash_item hash; + unsigned long long start; + int pid; +}; + +static struct hook_list *hooks; +static struct hook_list *last_hook; + +#define WAKEUP_HASH_SIZE 1024 +static struct trace_hash wakeup_hash; + +static void print_event_name(struct trace_seq *s, struct tep_event *event) +{ + static const char *spaces = " "; /* 20 spaces */ + const char *name; + int len; + + name = event ? event->name : "(NULL)"; + + trace_seq_printf(s, " %s: ", name); + + /* Space out the event names evenly. */ + len = strlen(name); + if (len < 20) + trace_seq_printf(s, "%.*s", 20 - len, spaces); +} + +enum time_fmt { + TIME_FMT_LAT = 1, + TIME_FMT_NORMAL = 2, +}; + +static const char *time_format(struct tracecmd_input *handle, enum time_fmt tf) +{ + struct tep_handle *tep = tracecmd_get_tep(handle); + + switch (tf) { + case TIME_FMT_LAT: + if (latency_format) + return "%8.8s-%-5d %3d"; + return "%16s-%-5d [%03d]"; + default: + if (tracecmd_get_flags(handle) & TRACECMD_FL_IN_USECS) { + if (tep_test_flag(tep, TEP_NSEC_OUTPUT)) + return " %9.1d:"; + else + return " %6.1000d:"; + } else + return "%12d:"; + } +} + +static void print_event(struct trace_seq *s, struct tracecmd_input *handle, + struct tep_record *record) +{ + struct tep_handle *tep = tracecmd_get_tep(handle); + struct tep_event *event; + const char *lfmt = time_format(handle, TIME_FMT_LAT); + const char *tfmt = time_format(handle, TIME_FMT_NORMAL); + + event = tep_find_event_by_record(tep, record); + tep_print_event(tep, s, record, lfmt, TEP_PRINT_COMM, + TEP_PRINT_PID, TEP_PRINT_CPU); + tep_print_event(tep, s, record, tfmt, TEP_PRINT_TIME); + print_event_name(s, event); + tep_print_event(tep, s, record, "%s", format_type); +} + +/* Debug variables for testing tracecmd_read_at */ +#define TEST_READ_AT 0 +#if TEST_READ_AT +#define DO_TEST +static off64_t test_read_at_offset; +static int test_read_at_copy = 100; +static int test_read_at_index; +static void show_test(struct tracecmd_input *handle) +{ + struct tep_record *record; + struct trace_seq s; + + if (!test_read_at_offset) { + printf("\nNO RECORD COPIED\n"); + return; + } + + record = tracecmd_read_at(handle, test_read_at_offset, NULL); + printf("\nHERE'S THE COPY RECORD\n"); + trace_seq_init(&s); + print_event(&s, handle, record); + trace_seq_do_printf(&s); + trace_seq_destroy(&s); + printf("\n"); + + tracecmd_free_record(record); +} + +static void test_save(struct tep_record *record, int cpu) +{ + if (test_read_at_index++ == test_read_at_copy) { + test_read_at_offset = record->offset; + printf("\nUSING THIS RECORD\n"); + } +} +#endif /* TEST_READ_AT */ + +/* Debug variables for testing tracecmd_set_cpu_at_timestamp */ +#define TEST_AT_TIMESTAMP 0 +#if TEST_AT_TIMESTAMP +#define DO_TEST +static unsigned long long test_at_timestamp_ts; +static int test_at_timestamp_copy = 100; +static int test_at_timestamp_cpu = -1; +static int test_at_timestamp_index; +static void show_test(struct tracecmd_input *handle) +{ + struct tep_record *record; + struct trace_seq s; + int cpu = test_at_timestamp_cpu; + + if (!test_at_timestamp_ts) { + printf("\nNO RECORD COPIED\n"); + return; + } + + if (tracecmd_set_cpu_to_timestamp(handle, cpu, test_at_timestamp_ts)) + return; + + record = tracecmd_read_data(handle, cpu); + printf("\nHERE'S THE COPY RECORD with page %p offset=%p\n", + (void *)(record->offset & ~(page_size - 1)), + (void *)record->offset); + trace_seq_init(&s); + print_event(&s, handle, record); + trace_seq_do_printf(&s); + trace_seq_destroy(&s); + printf("\n"); + + tracecmd_free_record(record); +} + +static void test_save(struct tep_record *record, int cpu) +{ + if (test_at_timestamp_index++ == test_at_timestamp_copy) { + test_at_timestamp_ts = record->ts; + test_at_timestamp_cpu = cpu; + printf("\nUSING THIS RECORD page=%p offset=%p\n", + (void *)(record->offset & ~(page_size - 1)), + (void *)record->offset); + } +} +#endif /* TEST_AT_TIMESTAMP */ + +#define TEST_FIRST_LAST 0 +#if TEST_FIRST_LAST +#define DO_TEST +static void show_test(struct tracecmd_input *handle) +{ + struct tep_record *record; + struct trace_seq s; + int cpu = 0; + + record = tracecmd_read_cpu_first(handle, cpu); + if (!record) { + printf("No first record?\n"); + return; + } + + printf("\nHERE'S THE FIRST RECORD with offset %p\n", + (void *)record->offset); + trace_seq_init(&s); + print_event(&s, handle, record); + trace_seq_do_printf(&s); + trace_seq_destroy(&s); + printf("\n"); + + tracecmd_free_record(record); + + record = tracecmd_read_cpu_last(handle, cpu); + if (!record) { + printf("No last record?\n"); + return; + } + + printf("\nHERE'S THE LAST RECORD with offset %p\n", + (void *)record->offset); + trace_seq_init(&s); + print_event(&s, handle, record); + trace_seq_do_printf(&s); + trace_seq_destroy(&s); + printf("\n"); + + tracecmd_free_record(record); +} +static void test_save(struct tep_record *record, int cpu) +{ +} +#endif /* TEST_FIRST_LAST */ + +#ifndef DO_TEST +static void show_test(struct tracecmd_input *handle) +{ + /* quiet the compiler */ + if (0) + print_event(NULL, NULL, NULL); +} +static void test_save(struct tep_record *record, int cpu) +{ +} +#endif + +static void add_input(const char *file) +{ + struct input_files *item; + + item = malloc(sizeof(*item)); + if (!item) + die("Failed to allocate for %s", file); + memset(item, 0, sizeof(*item)); + item->file = file; + list_add_tail(&item->list, &input_files); + last_input_file = item; +} + +static void add_handle(struct tracecmd_input *handle, const char *file) +{ + struct handle_list *item; + + item = malloc(sizeof(*item)); + if (!item) + die("Failed ot allocate for %s", file); + memset(item, 0, sizeof(*item)); + item->handle = handle; + if (file) { + item->file = file + strlen(file); + /* we want just the base name */ + while (item->file >= file && *item->file != '/') + item->file--; + item->file++; + if (strlen(item->file) > max_file_size) + max_file_size = strlen(item->file); + } + list_add_tail(&item->list, &handle_list); +} + +static void free_inputs(void) +{ + struct input_files *item; + + while (!list_empty(&input_files)) { + item = container_of(input_files.next, struct input_files, list); + list_del(&item->list); + free(item); + } +} + +static void free_handles(void) +{ + struct handle_list *item; + + while (!list_empty(&handle_list)) { + item = container_of(handle_list.next, struct handle_list, list); + list_del(&item->list); + free(item); + } +} + +static void add_filter(const char *filter, int neg) +{ + struct filter_str *ftr; + + ftr = malloc(sizeof(*ftr)); + if (!ftr) + die("Failed to allocate for filter %s", filter); + ftr->filter = strdup(filter); + if (!ftr->filter) + die("malloc"); + ftr->next = NULL; + ftr->neg = neg; + + /* must maintain order of command line */ + *filter_next = ftr; + filter_next = &ftr->next; +} + +static void __add_filter(struct pid_list **head, const char *arg) +{ + struct pid_list *list; + char *pids = strdup(arg); + char *pid; + char *sav; + int free = 1; + + if (!pids) + die("malloc"); + + pid = strtok_r(pids, ",", &sav); + while (pid) { + list = malloc(sizeof(*list)); + if (!list) + die("Failed to allocate for arg %s", arg); + list->pid = pid; + list->free = free; + list->next = *head; + *head = list; + /* The first pid needs to be freed */ + free = 0; + pid = strtok_r(NULL, ",", &sav); + } +} + +static void add_comm_filter(const char *arg) +{ + __add_filter(&comm_list, arg); +} + +static void add_pid_filter(const char *arg) +{ + __add_filter(&pid_list, arg); +} + +static char *append_pid_filter(char *curr_filter, char *pid) +{ + char *filter; + int len, curr_len; + +#define FILTER_FMT "(common_pid==" __STR ")||(pid==" __STR ")||(next_pid==" __STR ")" + +#undef __STR +#define __STR "" + + /* strlen(".*:") > strlen("||") */ + len = strlen(".*:" FILTER_FMT) + strlen(pid) * 3 + 1; + +#undef __STR +#define __STR "%s" + + if (!curr_filter) { + filter = malloc(len); + if (!filter) + die("Failed to allocate for filter %s", curr_filter); + sprintf(filter, ".*:" FILTER_FMT, pid, pid, pid); + } else { + curr_len = strlen(curr_filter); + len += curr_len; + + filter = realloc(curr_filter, len); + if (!filter) + die("realloc"); + sprintf(filter + curr_len, "||" FILTER_FMT, pid, pid, pid); + } + + return filter; +} + +static void convert_comm_filter(struct tracecmd_input *handle) +{ + struct tep_cmdline *cmdline; + struct tep_handle *pevent; + struct pid_list *list; + + char pidstr[100]; + + if (!comm_list) + return; + + pevent = tracecmd_get_tep(handle); + + /* Seach for comm names and get their pids */ + for (list = comm_list; list; list = list->next) { + cmdline = tep_data_pid_from_comm(pevent, list->pid, NULL); + if (!cmdline) { + warning("comm: %s not in cmdline list", list->pid); + continue; + } + do { + sprintf(pidstr, "%d", tep_cmdline_pid(pevent, cmdline)); + add_pid_filter(pidstr); + cmdline = tep_data_pid_from_comm(pevent, list->pid, + cmdline); + } while (cmdline); + } + + while (comm_list) { + list = comm_list; + comm_list = comm_list->next; + if (list->free) + free(list->pid); + free(list); + } +} + +static void make_pid_filter(struct tracecmd_input *handle) +{ + struct pid_list *list; + char *str = NULL; + + convert_comm_filter(handle); + + if (!pid_list) + return; + + /* First do all common pids */ + for (list = pid_list; list; list = list->next) { + str = append_pid_filter(str, list->pid); + } + + add_filter(str, 0); + free(str); + + while (pid_list) { + list = pid_list; + pid_list = pid_list->next; + if (list->free) + free(list->pid); + free(list); + } +} + +static void process_filters(struct handle_list *handles) +{ + struct filter **filter_next = &handles->event_filters; + struct filter **filter_out_next = &handles->event_filter_out; + struct filter *event_filter; + struct filter_str *filter; + struct tep_handle *pevent; + char errstr[200]; + int filters = 0; + int ret; + + pevent = tracecmd_get_tep(handles->handle); + + make_pid_filter(handles->handle); + + while (filter_strings) { + filter = filter_strings; + filter_strings = filter->next; + + event_filter = malloc(sizeof(*event_filter)); + if (!event_filter) + die("Failed to allocate for event filter"); + event_filter->next = NULL; + event_filter->filter = tep_filter_alloc(pevent); + if (!event_filter->filter) + die("malloc"); + + ret = tep_filter_add_filter_str(event_filter->filter, + filter->filter); + if (ret < 0) { + tep_strerror(pevent, ret, errstr, sizeof(errstr)); + die("Error filtering: %s\n%s", + filter->filter, errstr); + } + + if (filter->neg) { + *filter_out_next = event_filter; + filter_out_next = &event_filter->next; + } else { + *filter_next = event_filter; + filter_next = &event_filter->next; + } + filters++; + free(filter->filter); + free(filter); + } + if (filters && test_filters_mode) + exit(0); +} + +static void init_wakeup(struct tracecmd_input *handle) +{ + struct tep_handle *pevent; + struct tep_event *event; + + if (!show_wakeup) + return; + + pevent = tracecmd_get_tep(handle); + + trace_hash_init(&wakeup_hash, WAKEUP_HASH_SIZE); + + event = tep_find_event_by_name(pevent, "sched", "sched_wakeup"); + if (!event) + goto fail; + wakeup_id = event->id; + wakeup_task = tep_find_field(event, "pid"); + if (!wakeup_task) + goto fail; + wakeup_success = tep_find_field(event, "success"); + + event = tep_find_event_by_name(pevent, "sched", "sched_switch"); + if (!event) + goto fail; + sched_id = event->id; + sched_task = tep_find_field(event, "next_pid"); + if (!sched_task) + goto fail; + + sched_prio = tep_find_field(event, "next_prio"); + if (!sched_prio) + goto fail; + + + wakeup_new_id = -1; + + event = tep_find_event_by_name(pevent, "sched", "sched_wakeup_new"); + if (!event) + goto skip; + wakeup_new_id = event->id; + wakeup_new_task = tep_find_field(event, "pid"); + if (!wakeup_new_task) + goto fail; + wakeup_new_success = tep_find_field(event, "success"); + + skip: + return; + + fail: + show_wakeup = 0; +} + +static void add_wakeup(unsigned int val, unsigned long long start) +{ + unsigned int key = trace_hash(val); + struct wakeup_info *info; + struct trace_hash_item *item; + + item = trace_hash_find(&wakeup_hash, key, NULL, NULL); + if (item) { + info = container_of(item, struct wakeup_info, hash); + /* Hmm, double wakeup? */ + info->start = start; + return; + } + + info = malloc(sizeof(*info)); + if (!info) + die("Failed to allocate wakeup info"); + info->hash.key = key; + info->start = start; + trace_hash_add(&wakeup_hash, &info->hash); +} + +static unsigned long long max_lat = 0; +static unsigned long long max_time; +static unsigned long long min_lat = -1; +static unsigned long long min_time; + +static unsigned long long max_rt_lat = 0; +static unsigned long long max_rt_time; +static unsigned long long min_rt_lat = -1; +static unsigned long long min_rt_time; + +static void add_sched(unsigned int val, unsigned long long end, int rt) +{ + struct trace_hash_item *item; + unsigned int key = trace_hash(val); + struct wakeup_info *info; + unsigned long long cal; + + item = trace_hash_find(&wakeup_hash, key, NULL, NULL); + if (!item) + return; + + info = container_of(item, struct wakeup_info, hash); + + cal = end - info->start; + + if (cal > max_lat) { + max_lat = cal; + max_time = end; + } + if (cal < min_lat) { + min_lat = cal; + min_time = end; + } + + if (rt) { + if (cal > max_rt_lat) { + max_rt_lat = cal; + max_rt_time = end; + } + if (cal < min_rt_lat) { + min_rt_lat = cal; + min_rt_time = end; + } + } + + printf(" Latency: %llu.%03llu usecs", cal / 1000, cal % 1000); + + total_wakeup_lat += cal; + wakeup_lat_count++; + + if (rt) { + total_wakeup_rt_lat += cal; + wakeup_rt_lat_count++; + } + + trace_hash_del(item); + free(info); +} + +static void process_wakeup(struct tep_handle *pevent, struct tep_record *record) +{ + unsigned long long val; + int id; + + if (!show_wakeup) + return; + + id = tep_data_type(pevent, record); + if (id == wakeup_id) { + if (tep_read_number_field(wakeup_success, record->data, &val) == 0) { + if (!val) + return; + } + if (tep_read_number_field(wakeup_task, record->data, &val)) + return; + add_wakeup(val, record->ts); + } else if (id == wakeup_new_id) { + if (tep_read_number_field(wakeup_new_success, record->data, &val) == 0) { + if (!val) + return; + } + if (tep_read_number_field(wakeup_new_task, record->data, &val)) + return; + add_wakeup(val, record->ts); + } else if (id == sched_id) { + int rt = 1; + if (tep_read_number_field(sched_prio, record->data, &val)) + return; + if (val > 99) + rt = 0; + if (tep_read_number_field(sched_task, record->data, &val)) + return; + add_sched(val, record->ts, rt); + } +} + +static void +show_wakeup_timings(unsigned long long total, unsigned long count, + unsigned long long lat_max, unsigned long long time_max, + unsigned long long lat_min, unsigned long long time_min) +{ + + total /= count; + + printf("\nAverage wakeup latency: %llu.%03llu usecs\n", + total / 1000, + total % 1000); + printf("Maximum Latency: %llu.%03llu usecs at ", lat_max / 1000, lat_max % 1000); + printf("timestamp: %llu.%06llu\n", + time_max / 1000000000, ((time_max + 500) % 1000000000) / 1000); + printf("Minimum Latency: %llu.%03llu usecs at ", lat_min / 1000, lat_min % 1000); + printf("timestamp: %llu.%06llu\n\n", time_min / 1000000000, + ((time_min + 500) % 1000000000) / 1000); +} + +static void finish_wakeup(void) +{ + struct wakeup_info *info; + struct trace_hash_item **bucket; + struct trace_hash_item *item; + + if (!show_wakeup || !wakeup_lat_count) + return; + + show_wakeup_timings(total_wakeup_lat, wakeup_lat_count, + max_lat, max_time, + min_lat, min_time); + + + if (wakeup_rt_lat_count) { + printf("RT task timings:\n"); + show_wakeup_timings(total_wakeup_rt_lat, wakeup_rt_lat_count, + max_rt_lat, max_rt_time, + min_rt_lat, min_rt_time); + } + + trace_hash_for_each_bucket(bucket, &wakeup_hash) { + trace_hash_while_item(item, bucket) { + trace_hash_del(item); + info = container_of(item, struct wakeup_info, hash); + free(info); + } + } + + trace_hash_free(&wakeup_hash); +} + +void trace_show_data(struct tracecmd_input *handle, struct tep_record *record) +{ + tracecmd_show_data_func func = tracecmd_get_show_data_func(handle); + const char *tfmt = time_format(handle, TIME_FMT_NORMAL); + const char *cfmt = latency_format ? "%8.8s-%-5d %3d" : "%16s-%-5d [%03d]"; + struct tep_handle *pevent; + struct tep_event *event; + struct trace_seq s; + int cpu = record->cpu; + bool use_trace_clock; + static unsigned long long last_ts; + unsigned long long diff_ts; + unsigned long page_size; + char buf[50]; + + page_size = tracecmd_page_size(handle); + + test_save(record, cpu); + + if (func) { + func(handle, record); + return; + } + + pevent = tracecmd_get_tep(handle); + event = tep_find_event_by_record(pevent, record); + use_trace_clock = tracecmd_get_use_trace_clock(handle); + + trace_seq_init(&s); + if (record->missed_events > 0) + trace_seq_printf(&s, "CPU:%d [%lld EVENTS DROPPED]\n", + cpu, record->missed_events); + else if (record->missed_events < 0) + trace_seq_printf(&s, "CPU:%d [EVENTS DROPPED]\n", cpu); + if (buffer_breaks || tracecmd_get_debug()) { + if (tracecmd_record_at_buffer_start(handle, record)) { + trace_seq_printf(&s, "CPU:%d [SUBBUFFER START]", cpu); + if (tracecmd_get_debug()) + trace_seq_printf(&s, " [%lld:0x%llx]", + tracecmd_page_ts(handle, record), + record->offset & ~(page_size - 1)); + trace_seq_putc(&s, '\n'); + } + } + + tep_print_event(pevent, &s, record, cfmt, + TEP_PRINT_COMM, + TEP_PRINT_PID, + TEP_PRINT_CPU); + + if (latency_format) { + if (raw_format) + trace_seq_printf(&s, "-0x%x", + tep_data_flags(pevent, record)); + else + tep_print_event(pevent, &s, record, "%s", + TEP_PRINT_LATENCY); + } + + tep_print_event(pevent, &s, record, tfmt, TEP_PRINT_TIME); + + if (tsdiff) { + unsigned long long rec_ts = record->ts; + + buf[0] = 0; + if (use_trace_clock && !tep_test_flag(pevent, TEP_NSEC_OUTPUT)) + rec_ts = (rec_ts + 500) / 1000; + if (last_ts) { + diff_ts = rec_ts - last_ts; + snprintf(buf, 50, "(+%lld)", diff_ts); + buf[49] = 0; + } + last_ts = rec_ts; + trace_seq_printf(&s, " %-8s", buf); + } + + print_event_name(&s, event); + tep_print_event(pevent, &s, record, "%s", format_type); + + if (s.len && *(s.buffer + s.len - 1) == '\n') + s.len--; + if (tracecmd_get_debug()) { + struct kbuffer *kbuf; + struct kbuffer_raw_info info; + void *page; + void *offset; + + trace_seq_printf(&s, " [%d:0x%llx:%d]", + tracecmd_record_ts_delta(handle, record), + record->offset & (page_size - 1), record->size); + kbuf = tracecmd_record_kbuf(handle, record); + page = tracecmd_record_page(handle, record); + offset = tracecmd_record_offset(handle, record); + + if (kbuf && page && offset) { + struct kbuffer_raw_info *pi = &info; + + /* We need to get the record raw data to get next */ + pi->next = offset; + pi = kbuffer_raw_get(kbuf, page, pi); + while ((pi = kbuffer_raw_get(kbuf, page, pi))) { + if (pi->type < KBUFFER_TYPE_PADDING) + break; + switch (pi->type) { + case KBUFFER_TYPE_PADDING: + trace_seq_printf(&s, "\n PADDING: "); + break; + case KBUFFER_TYPE_TIME_EXTEND: + trace_seq_printf(&s, "\n TIME EXTEND: "); + break; + case KBUFFER_TYPE_TIME_STAMP: + trace_seq_printf(&s, "\n TIME STAMP: "); + break; + } + if (pi->type == KBUFFER_TYPE_TIME_STAMP) + trace_seq_printf(&s, "timestamp:%lld length:%d", + pi->delta, + pi->length); + else + trace_seq_printf(&s, "delta:%lld length:%d", + pi->delta, + pi->length); + } + } + } + + trace_seq_do_printf(&s); + trace_seq_destroy(&s); + + process_wakeup(pevent, record); + + printf("\n"); +} + +static void read_latency(struct tracecmd_input *handle) +{ + char *buf = NULL; + size_t size = 0; + int r; + + do { + r = tracecmd_latency_data_read(handle, &buf, &size); + if (r > 0) + printf("%.*s", r, buf); + } while (r > 0); + + printf("\n"); + free(buf); +} + +static int +test_filters(struct tep_handle *pevent, struct filter *event_filters, + struct tep_record *record, int neg) +{ + int found = 0; + int ret = FILTER_NONE; + int flags; + + if (no_irqs || no_softirqs) { + flags = tep_data_flags(pevent, record); + if (no_irqs && (flags & TRACE_FLAG_HARDIRQ)) + return FILTER_MISS; + if (no_softirqs && (flags & TRACE_FLAG_SOFTIRQ)) + return FILTER_MISS; + } + + while (event_filters) { + ret = tep_filter_match(event_filters->filter, record); + switch (ret) { + case FILTER_NONE: + case FILTER_MATCH: + found = 1; + } + /* We need to test all negative filters */ + if (!neg && found) + break; + event_filters = event_filters->next; + } + + return ret; +} + +struct stack_info_cpu { + int cpu; + int last_printed; +}; + +struct stack_info { + struct stack_info *next; + struct handle_list *handles; + struct stack_info_cpu *cpus; + int stacktrace_id; + int nr_cpus; +}; + +static int +test_stacktrace(struct handle_list *handles, struct tep_record *record, + int last_printed) +{ + static struct stack_info *infos; + struct stack_info *info; + struct stack_info_cpu *cpu_info; + struct handle_list *h; + struct tracecmd_input *handle; + struct tep_handle *pevent; + struct tep_event *event; + static int init; + int ret; + int id; + + if (!init) { + init = 1; + + list_for_each_entry(h, &handle_list, list) { + info = malloc(sizeof(*info)); + if (!info) + die("Failed to allocate handle"); + info->handles = h; + info->nr_cpus = tracecmd_cpus(h->handle); + + info->cpus = malloc(sizeof(*info->cpus) * info->nr_cpus); + if (!info->cpus) + die("Failed to allocate for %d cpus", info->nr_cpus); + memset(info->cpus, 0, sizeof(*info->cpus)); + + pevent = tracecmd_get_tep(h->handle); + event = tep_find_event_by_name(pevent, "ftrace", + "kernel_stack"); + if (event) + info->stacktrace_id = event->id; + else + info->stacktrace_id = 0; + + info->next = infos; + infos = info; + } + + + } + + handle = handles->handle; + pevent = tracecmd_get_tep(handle); + + for (info = infos; info; info = info->next) + if (info->handles == handles) + break; + + if (!info->stacktrace_id) + return 0; + + cpu_info = &info->cpus[record->cpu]; + + id = tep_data_type(pevent, record); + + /* + * Print the stack trace if the previous event was printed. + * But do not print the stack trace if it is explicitly + * being filtered out. + */ + if (id == info->stacktrace_id) { + ret = test_filters(pevent, handles->event_filter_out, record, 1); + if (ret != FILTER_MATCH) + return cpu_info->last_printed; + return 0; + } + + cpu_info->last_printed = last_printed; + return 0; +} + +static struct tep_record *get_next_record(struct handle_list *handles) +{ + struct tep_record *record; + struct tep_handle *pevent; + int found = 0; + int cpu; + int ret; + + if (handles->record) + return handles->record; + + if (handles->done) + return NULL; + + pevent = tracecmd_get_tep(handles->handle); + + do { + if (filter_cpus) { + long long last_stamp = -1; + struct tep_record *precord; + int first_record = 1; + int next_cpu = -1; + int i; + + for (i = 0; (cpu = filter_cpus[i]) >= 0; i++) { + precord = tracecmd_peek_data(handles->handle, cpu); + if (precord && + (first_record || precord->ts < last_stamp)) { + next_cpu = cpu; + last_stamp = precord->ts; + first_record = 0; + } + } + if (!first_record) + record = tracecmd_read_data(handles->handle, next_cpu); + else + record = NULL; + } else + record = tracecmd_read_next_data(handles->handle, &cpu); + + if (record) { + ret = test_filters(pevent, handles->event_filters, record, 0); + switch (ret) { + case FILTER_NOEXIST: + /* Stack traces may still filter this */ + if (stacktrace_id && + test_stacktrace(handles, record, 0)) + found = 1; + else + tracecmd_free_record(record); + break; + case FILTER_NONE: + case FILTER_MATCH: + /* Test the negative filters (-v) */ + ret = test_filters(pevent, handles->event_filter_out, + record, 1); + if (ret != FILTER_MATCH) { + found = 1; + break; + } + /* fall through */ + default: + tracecmd_free_record(record); + } + } + } while (record && !found); + + if (record && stacktrace_id) + test_stacktrace(handles, record, 1); + + handles->record = record; + if (!record) + handles->done = 1; + + return record; +} + +static void free_handle_record(struct handle_list *handles) +{ + if (!handles->record) + return; + + tracecmd_free_record(handles->record); + handles->record = NULL; +} + +static void print_handle_file(struct handle_list *handles) +{ + /* Only print file names if more than one file is read */ + if (!multi_inputs && !instances) + return; + if (handles->file && *handles->file != '\0') + printf("%*s: ", max_file_size, handles->file); + else + printf("%*s ", max_file_size, ""); +} + +static void free_filters(struct filter *event_filter) +{ + struct filter *filter; + + while (event_filter) { + filter = event_filter; + event_filter = filter->next; + + tep_filter_free(filter->filter); + free(filter); + } +} + +enum output_type { + OUTPUT_NORMAL, + OUTPUT_STAT_ONLY, + OUTPUT_UNAME_ONLY, + OUTPUT_VERSION_ONLY, +}; + +static void read_data_info(struct list_head *handle_list, enum output_type otype, + int global, int align_ts) +{ + unsigned long long ts, first_ts; + struct handle_list *handles; + struct handle_list *last_handle; + struct tep_record *record; + struct tep_record *last_record; + struct tep_handle *pevent; + struct tep_event *event; + int first = 1; + int ret; + + list_for_each_entry(handles, handle_list, list) { + int cpus; + + if (!tracecmd_is_buffer_instance(handles->handle)) { + ret = tracecmd_init_data(handles->handle); + if (ret < 0) + die("failed to init data"); + } + cpus = tracecmd_cpus(handles->handle); + handles->cpus = cpus; + handles->last_timestamp = calloc(cpus, sizeof(*handles->last_timestamp)); + if (!handles->last_timestamp) + die("allocating timestamps"); + + /* Don't process instances that we added here */ + if (tracecmd_is_buffer_instance(handles->handle)) + continue; + + if (align_ts) { + ts = tracecmd_get_first_ts(handles->handle); + if (first || first_ts > ts) + first_ts = ts; + first = 0; + } + print_handle_file(handles); + printf("cpus=%d\n", cpus); + + /* Latency trace is just all ASCII */ + if (ret > 0) { + if (multi_inputs) + die("latency traces do not work with multiple inputs"); + read_latency(handles->handle); + return; + } + + switch (otype) { + case OUTPUT_NORMAL: + break; + case OUTPUT_STAT_ONLY: + printf("\nKernel buffer statistics:\n" + " Note: \"entries\" are the entries left in the kernel ring buffer and are not\n" + " recorded in the trace data. They should all be zero.\n\n"); + tracecmd_print_stats(handles->handle); + continue; + case OUTPUT_UNAME_ONLY: + tracecmd_print_uname(handles->handle); + case OUTPUT_VERSION_ONLY: + tracecmd_print_version(handles->handle); + continue; + } + + /* Find the kernel_stacktrace if available */ + pevent = tracecmd_get_tep(handles->handle); + event = tep_find_event_by_name(pevent, "ftrace", "kernel_stack"); + if (event) + stacktrace_id = event->id; + + init_wakeup(handles->handle); + if (last_hook) + last_hook->next = tracecmd_hooks(handles->handle); + else + hooks = tracecmd_hooks(handles->handle); + if (profile) + trace_init_profile(handles->handle, hooks, global); + + process_filters(handles); + + /* If this file has buffer instances, get the handles for them */ + instances = tracecmd_buffer_instances(handles->handle); + if (instances) { + struct tracecmd_input *new_handle; + const char *name; + int i; + + for (i = 0; i < instances; i++) { + name = tracecmd_buffer_instance_name(handles->handle, i); + if (!name) + die("error in reading buffer instance"); + new_handle = tracecmd_buffer_instance_handle(handles->handle, i); + if (!new_handle) { + warning("could not retrieve handle %s", name); + continue; + } + add_handle(new_handle, name); + } + } + } + + if (otype != OUTPUT_NORMAL) + return; + + if (align_ts) { + list_for_each_entry(handles, handle_list, list) { + tracecmd_add_ts_offset(handles->handle, -first_ts); + } + } + + do { + last_handle = NULL; + last_record = NULL; + + list_for_each_entry(handles, handle_list, list) { + record = get_next_record(handles); + if (!record) + continue; + if (!last_record || + (record && record->ts < last_record->ts)) { + last_record = record; + last_handle = handles; + } + } + if (last_record) { + int cpu = last_record->cpu; + if (cpu >= last_handle->cpus) + die("cpu %d greater than %d\n", cpu, last_handle->cpus); + if (tscheck && + last_handle->last_timestamp[cpu] > last_record->ts) { + errno = 0; + warning("WARNING: Record on cpu %d went backwards: %lld to %lld delta: -%lld\n", + cpu, last_handle->last_timestamp[cpu], + last_record->ts, + last_handle->last_timestamp[cpu] - last_record->ts); + } + last_handle->last_timestamp[cpu] = last_record->ts; + print_handle_file(last_handle); + trace_show_data(last_handle->handle, last_record); + free_handle_record(last_handle); + } + } while (last_record); + + if (profile) + do_trace_profile(); + + list_for_each_entry(handles, handle_list, list) { + free_filters(handles->event_filters); + free_filters(handles->event_filter_out); + free(handles->last_timestamp); + + show_test(handles->handle); + } +} + +struct tracecmd_input *read_trace_header(const char *file, int flags) +{ + input_fd = open(file, O_RDONLY); + if (input_fd < 0) + die("opening '%s'\n", file); + + return tracecmd_alloc_fd(input_fd, flags); +} + +static void sig_end(int sig) +{ + struct handle_list *handles; + + fprintf(stderr, "trace-cmd: Received SIGINT\n"); + + list_for_each_entry(handles, &handle_list, list) { + tracecmd_close(handles->handle); + } + + exit(0); +} + +static const char *skip_space_and_test_digit(const char *p, const char *cpu_str) +{ + while (isspace(*p)) + p++; + if (!isdigit(*p)) + die("invalid character '%c' in cpu string '%s'", + *p, cpu_str); + return p; +} + +static void __add_cpu(int cpu) +{ + filter_cpus = tracecmd_add_id(filter_cpus, cpu, nr_filter_cpus++); +} + +static void parse_cpulist(const char *cpu_str) +{ + unsigned a, b; + const char *s = cpu_str; + + do { + s = skip_space_and_test_digit(s, cpu_str); + b = a = strtoul(s, (char **)&s, 10); + if (*s == '-') { + s = skip_space_and_test_digit(s + 1, cpu_str); + b = strtoul(s, (char **)&s, 10); + } + if (!(a <= b)) + die("range of cpu numbers must be lower to greater"); + while (a <= b) { + __add_cpu(a); + a++; + } + if (*s == ',' || *s == ':') + s++; + } while (*s != '\0'); +} + +static void read_file_fd(int fd, char *dst, int len) +{ + size_t size = 0; + int r; + + do { + r = read(fd, dst+size, len); + if (r > 0) { + size += r; + len -= r; + } + } while (r > 0); +} + +static void add_functions(struct tep_handle *pevent, const char *file) +{ + struct stat st; + char *buf; + int ret; + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) + die("Can't read file %s", file); + + ret = fstat(fd, &st); + if (ret < 0) + die("Can't stat file %s", file); + + buf = malloc(st.st_size + 1); + if (!buf) + die("Failed to allocate for function buffer"); + read_file_fd(fd, buf, st.st_size); + buf[st.st_size] = '\0'; + close(fd); + tep_parse_kallsyms(pevent, buf); + free(buf); +} + +static void process_plugin_option(char *option) +{ + char *name = option; + char *val = NULL; + char *p; + + if ((p = strstr(name, "="))) { + *p = '\0'; + val = p+1; + } + tep_plugin_add_option(name, val); +} + +static void set_event_flags(struct tep_handle *pevent, struct event_str *list, + unsigned int flag) +{ + struct tep_event **events; + struct tep_event *event; + struct event_str *str; + regex_t regex; + int ret; + int i; + + if (!list) + return; + + events = tep_list_events(pevent, 0); + + for (str = list; str; str = str->next) { + char *match; + + match = malloc(strlen(str->event) + 3); + if (!match) + die("Failed to allocate for match string '%s'", str->event); + sprintf(match, "^%s$", str->event); + + ret = regcomp(®ex, match, REG_ICASE|REG_NOSUB); + if (ret < 0) + die("Can't parse '%s'", str->event); + free(match); + for (i = 0; events[i]; i++) { + event = events[i]; + if (!regexec(®ex, event->name, 0, NULL, 0) || + !regexec(®ex, event->system, 0, NULL, 0)) + event->flags |= flag; + } + } +} + +static void add_hook(const char *arg) +{ + struct hook_list *hook; + + hook = tracecmd_create_event_hook(arg); + + hook->next = hooks; + hooks = hook; + if (!last_hook) + last_hook = hook; +} + +enum { + OPT_verbose = 234, + OPT_align_ts = 235, + OPT_raw_ts = 236, + OPT_version = 237, + OPT_tscheck = 238, + OPT_tsdiff = 239, + OPT_ts2secs = 240, + OPT_tsoffset = 241, + OPT_bycomm = 242, + OPT_debug = 243, + OPT_uname = 244, + OPT_profile = 245, + OPT_event = 246, + OPT_comm = 247, + OPT_boundary = 248, + OPT_stat = 249, + OPT_pid = 250, + OPT_nodate = 251, + OPT_check_event_parsing = 252, + OPT_kallsyms = 253, + OPT_events = 254, + OPT_cpu = 255, + OPT_cpus = 256, +}; + +void trace_report (int argc, char **argv) +{ + struct tracecmd_input *handle; + struct tep_handle *pevent; + struct event_str *raw_events = NULL; + struct event_str *nohandler_events = NULL; + struct event_str **raw_ptr = &raw_events; + struct event_str **nohandler_ptr = &nohandler_events; + const char *functions = NULL; + const char *print_event = NULL; + struct input_files *inputs; + struct handle_list *handles; + enum output_type otype; + long long tsoffset = 0; + unsigned long long ts2secs = 0; + unsigned long long ts2sc; + int open_flags = 0; + int show_stat = 0; + int show_funcs = 0; + int show_endian = 0; + int show_page_size = 0; + int show_printk = 0; + int show_uname = 0; + int show_version = 0; + int show_events = 0; + int show_cpus = 0; + int print_events = 0; + int nanosec = 0; + int no_date = 0; + int raw_ts = 0; + int align_ts = 0; + int global = 0; + int neg = 0; + int ret = 0; + int check_event_parsing = 0; + int c; + + list_head_init(&handle_list); + list_head_init(&input_files); + + if (argc < 2) + usage(argv); + + if (strcmp(argv[1], "report") != 0) + usage(argv); + + signal(SIGINT, sig_end); + + for (;;) { + int option_index = 0; + static struct option long_options[] = { + {"cpu", required_argument, NULL, OPT_cpu}, + {"cpus", no_argument, NULL, OPT_cpus}, + {"events", no_argument, NULL, OPT_events}, + {"event", required_argument, NULL, OPT_event}, + {"filter-test", no_argument, NULL, 'T'}, + {"kallsyms", required_argument, NULL, OPT_kallsyms}, + {"pid", required_argument, NULL, OPT_pid}, + {"comm", required_argument, NULL, OPT_comm}, + {"check-events", no_argument, NULL, + OPT_check_event_parsing}, + {"nodate", no_argument, NULL, OPT_nodate}, + {"stat", no_argument, NULL, OPT_stat}, + {"boundary", no_argument, NULL, OPT_boundary}, + {"debug", no_argument, NULL, OPT_debug}, + {"profile", no_argument, NULL, OPT_profile}, + {"uname", no_argument, NULL, OPT_uname}, + {"version", no_argument, NULL, OPT_version}, + {"by-comm", no_argument, NULL, OPT_bycomm}, + {"ts-offset", required_argument, NULL, OPT_tsoffset}, + {"ts2secs", required_argument, NULL, OPT_ts2secs}, + {"ts-diff", no_argument, NULL, OPT_tsdiff}, + {"ts-check", no_argument, NULL, OPT_tscheck}, + {"raw-ts", no_argument, NULL, OPT_raw_ts}, + {"align-ts", no_argument, NULL, OPT_align_ts}, + {"verbose", optional_argument, NULL, OPT_verbose}, + {"help", no_argument, NULL, '?'}, + {NULL, 0, NULL, 0} + }; + + c = getopt_long (argc-1, argv+1, "+hSIi:H:feGpRr:tPNn:LlEwF:V::vTqO:", + long_options, &option_index); + if (c == -1) + break; + switch (c) { + case 'h': + usage(argv); + break; + case 'i': + if (input_file) { + if (!multi_inputs) { + add_input(input_file); + if (tsoffset) + last_input_file->tsoffset = tsoffset; + } + multi_inputs++; + add_input(optarg); + } else + input_file = optarg; + break; + case 'F': + add_filter(optarg, neg); + break; + case 'H': + add_hook(optarg); + break; + case 'T': + test_filters_mode = 1; + break; + case 'f': + show_funcs = 1; + break; + case 'I': + no_irqs = 1; + break; + case 'S': + no_softirqs = 1; + break; + case 'P': + show_printk = 1; + break; + case 'L': + open_flags |= TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS; + break; + case 'N': + open_flags |= TRACECMD_FL_LOAD_NO_PLUGINS; + break; + case 'n': + *nohandler_ptr = malloc(sizeof(struct event_str)); + if (!*nohandler_ptr) + die("Failed to allocate for '-n %s'", optarg); + (*nohandler_ptr)->event = optarg; + (*nohandler_ptr)->next = NULL; + nohandler_ptr = &(*nohandler_ptr)->next; + break; + case 'e': + show_endian = 1; + break; + case 'p': + show_page_size = 1; + break; + case 'E': + show_events = 1; + break; + case 'G': + global = 1; + break; + case 'R': + raw_format = true; + break; + case 'r': + *raw_ptr = malloc(sizeof(struct event_str)); + if (!*raw_ptr) + die("Failed to allocate '-r %s'", optarg); + (*raw_ptr)->event = optarg; + (*raw_ptr)->next = NULL; + raw_ptr = &(*raw_ptr)->next; + break; + case 't': + nanosec = 1; + break; + case 'w': + show_wakeup = 1; + break; + case 'l': + latency_format = 1; + break; + case 'O': + process_plugin_option(optarg); + break; + case 'v': + if (neg) + die("Only 1 -v can be used"); + neg = 1; + break; + case 'q': + silence_warnings = 1; + tracecmd_set_loglevel(TEP_LOG_NONE); + break; + case OPT_cpu: + parse_cpulist(optarg); + break; + case OPT_cpus: + show_cpus = 1; + break; + case OPT_events: + print_events = 1; + break; + case OPT_event: + print_event = optarg; + break; + case OPT_kallsyms: + functions = optarg; + break; + case OPT_pid: + add_pid_filter(optarg); + break; + case OPT_comm: + add_comm_filter(optarg); + break; + case OPT_check_event_parsing: + check_event_parsing = 1; + break; + case OPT_nodate: + no_date = 1; + break; + case OPT_stat: + show_stat = 1; + break; + case OPT_boundary: + /* Debug to look at buffer breaks */ + buffer_breaks = 1; + break; + case OPT_debug: + buffer_breaks = 1; + tracecmd_set_debug(true); + break; + case OPT_profile: + profile = 1; + break; + case OPT_uname: + show_uname = 1; + break; + case OPT_version: + show_version = 1; + break; + case OPT_bycomm: + trace_profile_set_merge_like_comms(); + break; + case OPT_ts2secs: + ts2sc = atoll(optarg); + if (multi_inputs) + last_input_file->ts2secs = ts2sc; + else + ts2secs = ts2sc; + break; + case OPT_tsoffset: + tsoffset = atoll(optarg); + if (multi_inputs) + last_input_file->tsoffset = tsoffset; + if (!input_file) + die("--ts-offset must come after -i"); + break; + case OPT_tsdiff: + tsdiff = 1; + break; + case OPT_tscheck: + tscheck = 1; + break; + case OPT_raw_ts: + raw_ts = 1; + break; + case OPT_align_ts: + align_ts = 1; + break; + case 'V': + case OPT_verbose: + show_status = 1; + if (trace_set_verbose(optarg) < 0) + die("invalid verbose level %s", optarg); + break; + default: + usage(argv); + } + } + + if ((argc - optind) >= 2) { + if (input_file) + usage(argv); + input_file = argv[optind + 1]; + } + + if (!input_file) + input_file = default_input_file; + + if (!multi_inputs) { + add_input(input_file); + if (tsoffset) + last_input_file->tsoffset = tsoffset; + } else if (show_wakeup) + die("Wakeup tracing can only be done on a single input file"); + + list_for_each_entry(inputs, &input_files, list) { + handle = read_trace_header(inputs->file, open_flags); + if (!handle) + die("error reading header for %s", inputs->file); + + /* If used with instances, top instance will have no tag */ + add_handle(handle, multi_inputs ? inputs->file : NULL); + + if (no_date) + tracecmd_set_flag(handle, TRACECMD_FL_IGNORE_DATE); + if (raw_ts) + tracecmd_set_flag(handle, TRACECMD_FL_RAW_TS); + page_size = tracecmd_page_size(handle); + + if (show_page_size) { + printf("file page size is %d, and host page size is %d\n", + page_size, + getpagesize()); + return; + } + + if (inputs->tsoffset) + tracecmd_set_ts_offset(handle, inputs->tsoffset); + + if (inputs->ts2secs) + tracecmd_set_ts2secs(handle, inputs->ts2secs); + else if (ts2secs) + tracecmd_set_ts2secs(handle, ts2secs); + + pevent = tracecmd_get_tep(handle); + + if (nanosec) + tep_set_flag(pevent, TEP_NSEC_OUTPUT); + + if (raw_format) + format_type = TEP_PRINT_INFO_RAW; + + if (test_filters_mode) + tep_set_test_filters(pevent, 1); + + if (functions) + add_functions(pevent, functions); + + if (show_endian) { + printf("file is %s endian and host is %s endian\n", + tep_is_file_bigendian(pevent) ? "big" : "little", + tep_is_local_bigendian(pevent) ? "big" : "little"); + return; + } + + if (print_events) { + tracecmd_print_events(handle, NULL); + return; + } + + if (print_event) { + tracecmd_print_events(handle, print_event); + return; + } + + ret = tracecmd_read_headers(handle, 0); + if (check_event_parsing) { + if (ret || tracecmd_get_parsing_failures(handle)) + exit(EINVAL); + else + exit(0); + } else { + if (ret) + return; + } + + if (show_funcs) { + tep_print_funcs(pevent); + return; + } + if (show_printk) { + tep_print_printk(pevent); + return; + } + + if (show_events) { + struct tep_event **events; + struct tep_event *event; + int i; + + events = tep_list_events(pevent, TEP_EVENT_SORT_SYSTEM); + for (i = 0; events[i]; i++) { + event = events[i]; + if (event->system) + printf("%s:", event->system); + printf("%s\n", event->name); + } + return; + } + + if (show_cpus) { + int cpus; + int ret; + int i; + + if (!tracecmd_is_buffer_instance(handle)) { + ret = tracecmd_init_data(handle); + if (ret < 0) + die("failed to init data"); + } + cpus = tracecmd_cpus(handle); + printf("List of CPUs in %s with data:\n", inputs->file); + for (i = 0; i < cpus; i++) { + if (tracecmd_read_cpu_first(handle, i)) + printf(" %d\n", i); + } + continue; + } + + set_event_flags(pevent, nohandler_events, TEP_EVENT_FL_NOHANDLE); + set_event_flags(pevent, raw_events, TEP_EVENT_FL_PRINTRAW); + } + + if (show_cpus) + return; + + otype = OUTPUT_NORMAL; + + if (tracecmd_get_flags(handle) & TRACECMD_FL_RAW_TS) { + tep_func_repeat_format = "%d"; + } else if (tracecmd_get_flags(handle) & TRACECMD_FL_IN_USECS) { + if (tep_test_flag(tracecmd_get_tep(handle), TEP_NSEC_OUTPUT)) + tep_func_repeat_format = "%9.1d"; + else + tep_func_repeat_format = "%6.1000d"; + } else { + tep_func_repeat_format = "%12d"; + } + + + if (show_stat) + otype = OUTPUT_STAT_ONLY; + /* yeah yeah, uname overrides stat */ + if (show_uname) + otype = OUTPUT_UNAME_ONLY; + /* and version overrides uname! */ + if (show_version) + otype = OUTPUT_VERSION_ONLY; + read_data_info(&handle_list, otype, global, align_ts); + + list_for_each_entry(handles, &handle_list, list) { + tracecmd_close(handles->handle); + } + free_handles(); + free_inputs(); + + finish_wakeup(); + + return; +} diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c new file mode 100644 index 00000000..27c4e7ba --- /dev/null +++ b/tracecmd/trace-record.c @@ -0,0 +1,7322 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <dirent.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <getopt.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <sys/socket.h> +#include <sys/syscall.h> +#include <sys/utsname.h> +#ifndef NO_PTRACE +#include <sys/ptrace.h> +#else +#ifdef WARN_NO_PTRACE +#warning ptrace not supported. -c feature will not work +#endif +#endif +#include <netdb.h> +#include <fcntl.h> +#include <unistd.h> +#include <ctype.h> +#include <sched.h> +#include <glob.h> +#include <errno.h> +#include <limits.h> +#include <libgen.h> +#include <poll.h> +#include <pwd.h> +#include <grp.h> + +#include "tracefs.h" +#include "version.h" +#include "trace-local.h" +#include "trace-msg.h" + +#define _STR(x) #x +#define STR(x) _STR(x) + +#define TRACE_CTRL "tracing_on" +#define TRACE "trace" +#define AVAILABLE "available_tracers" +#define CURRENT "current_tracer" +#define ITER_CTRL "trace_options" +#define MAX_LATENCY "tracing_max_latency" +#define STAMP "stamp" +#define FUNC_STACK_TRACE "func_stack_trace" +#define TSC_CLOCK "x86-tsc" + +#define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__) + +enum trace_type { + TRACE_TYPE_RECORD = 1, + TRACE_TYPE_START = (1 << 1), + TRACE_TYPE_STREAM = (1 << 2), + TRACE_TYPE_EXTRACT = (1 << 3), + TRACE_TYPE_SET = (1 << 4), +}; + +static tracecmd_handle_init_func handle_init = NULL; + +static int rt_prio; + +static int keep; + +static int latency; +static int sleep_time = 1000; +static int recorder_threads; +static struct pid_record_data *pids; +static int buffers; + +/* Clear all function filters */ +static int clear_function_filters; + +static bool no_fifos; + +static char *host; + +static const char *gai_err; + +static bool quiet; + +static bool fork_process; + +/* Max size to let a per cpu file get */ +static int max_kb; + +static int do_ptrace; + +static int filter_task; +static bool no_filter = false; + +static int local_cpu_count; + +static int finished; + +/* setting of /proc/sys/kernel/ftrace_enabled */ +static int fset; + +static unsigned recorder_flags; + +/* Try a few times to get an accurate date */ +static int date2ts_tries = 50; + +static struct func_list *graph_funcs; + +static int func_stack; + +static int save_stdout = -1; + +static struct hook_list *hooks; + +struct event_list { + struct event_list *next; + const char *event; + char *trigger; + char *filter; + char *pid_filter; + char *filter_file; + char *trigger_file; + char *enable_file; + int neg; +}; + +struct tracecmd_event_list *listed_events; + +struct events { + struct events *sibling; + struct events *children; + struct events *next; + char *name; +}; + +/* Files to be reset when done recording */ +struct reset_file { + struct reset_file *next; + char *path; + char *reset; + int prio; +}; + +static struct reset_file *reset_files; + +/* Triggers need to be cleared in a special way */ +static struct reset_file *reset_triggers; + +struct buffer_instance top_instance; +struct buffer_instance *buffer_instances; +struct buffer_instance *first_instance; + +static struct tracecmd_recorder *recorder; + +static int ignore_event_not_found = 0; + +static inline int is_top_instance(struct buffer_instance *instance) +{ + return instance == &top_instance; +} + +static inline int no_top_instance(void) +{ + return first_instance != &top_instance; +} + +static void init_instance(struct buffer_instance *instance) +{ + instance->event_next = &instance->events; +} + +enum { + RESET_DEFAULT_PRIO = 0, + RESET_HIGH_PRIO = 100000, +}; + +enum trace_cmd { + CMD_extract, + CMD_start, + CMD_stream, + CMD_profile, + CMD_record, + CMD_record_agent, + CMD_set, +}; + +struct common_record_context { + enum trace_cmd curr_cmd; + struct buffer_instance *instance; + const char *output; + char *date2ts; + char *user; + const char *clock; + const char *compression; + struct tsc_nsec tsc2nsec; + int data_flags; + int tsync_loop_interval; + + int record_all; + int total_disable; + int disable; + int events; + int global; + int filtered; + int date; + int manual; + int topt; + int run_command; + int saved_cmdlines_size; + int file_version; +}; + +static void add_reset_file(const char *file, const char *val, int prio) +{ + struct reset_file *reset; + struct reset_file **last = &reset_files; + + /* Only reset if we are not keeping the state */ + if (keep) + return; + + reset = malloc(sizeof(*reset)); + if (!reset) + die("Failed to allocate reset"); + reset->path = strdup(file); + reset->reset = strdup(val); + reset->prio = prio; + if (!reset->path || !reset->reset) + die("Failed to allocate reset path or val"); + + while (*last && (*last)->prio > prio) + last = &(*last)->next; + + reset->next = *last; + *last = reset; +} + +static void add_reset_trigger(const char *file) +{ + struct reset_file *reset; + + /* Only reset if we are not keeping the state */ + if (keep) + return; + + reset = malloc(sizeof(*reset)); + if (!reset) + die("Failed to allocate reset"); + reset->path = strdup(file); + + reset->next = reset_triggers; + reset_triggers = reset; +} + +/* To save the contents of the file */ +static void reset_save_file(const char *file, int prio) +{ + char *content; + + content = get_file_content(file); + if (content) { + add_reset_file(file, content, prio); + free(content); + } +} + +/* + * @file: the file to check + * @nop: If the content of the file is this, use the reset value + * @reset: What to write if the file == @nop + */ +static void reset_save_file_cond(const char *file, int prio, + const char *nop, const char *reset) +{ + char *content; + char *cond; + + if (keep) + return; + + content = get_file_content(file); + + cond = strstrip(content); + + if (strcmp(cond, nop) == 0) + add_reset_file(file, reset, prio); + else + add_reset_file(file, content, prio); + + free(content); +} + +/** + * add_instance - add a buffer instance to the internal list + * @instance: The buffer instance to add + */ +void add_instance(struct buffer_instance *instance, int cpu_count) +{ + init_instance(instance); + instance->next = buffer_instances; + if (first_instance == buffer_instances) + first_instance = instance; + buffer_instances = instance; + instance->cpu_count = cpu_count; + buffers++; +} + +static void instance_reset_file_save(struct buffer_instance *instance, char *file, int prio) +{ + char *path; + + path = tracefs_instance_get_file(instance->tracefs, file); + if (path) + reset_save_file(path, prio); + tracefs_put_tracing_file(path); +} + +static void test_set_event_pid(struct buffer_instance *instance) +{ + static int have_set_event_pid; + static int have_event_fork; + static int have_func_fork; + + if (!have_set_event_pid && + tracefs_file_exists(top_instance.tracefs, "set_event_pid")) + have_set_event_pid = 1; + if (!have_event_fork && + tracefs_file_exists(top_instance.tracefs, "options/event-fork")) + have_event_fork = 1; + if (!have_func_fork && + tracefs_file_exists(top_instance.tracefs, "options/function-fork")) + have_func_fork = 1; + + if (!instance->have_set_event_pid && have_set_event_pid) { + instance->have_set_event_pid = 1; + instance_reset_file_save(instance, "set_event_pid", + RESET_DEFAULT_PRIO); + } + if (!instance->have_event_fork && have_event_fork) { + instance->have_event_fork = 1; + instance_reset_file_save(instance, "options/event-fork", + RESET_DEFAULT_PRIO); + } + if (!instance->have_func_fork && have_func_fork) { + instance->have_func_fork = 1; + instance_reset_file_save(instance, "options/function-fork", + RESET_DEFAULT_PRIO); + } +} + +/** + * allocate_instance - allocate a new buffer instance, + * it must exist in the ftrace system + * @name: The name of the instance (instance will point to this) + * + * Returns a newly allocated instance. In case of an error or if the + * instance does not exist in the ftrace system, NULL is returned. + */ +struct buffer_instance *allocate_instance(const char *name) +{ + struct buffer_instance *instance; + + instance = calloc(1, sizeof(*instance)); + if (!instance) + return NULL; + if (name) + instance->name = strdup(name); + if (tracefs_instance_exists(name)) { + instance->tracefs = tracefs_instance_create(name); + if (!instance->tracefs) + goto error; + } + + return instance; + +error: + if (instance) { + free(instance->name); + tracefs_instance_free(instance->tracefs); + free(instance); + } + return NULL; +} + +static int __add_all_instances(const char *tracing_dir) +{ + struct dirent *dent; + char *instances_dir; + struct stat st; + DIR *dir; + int ret; + + if (!tracing_dir) + return -1; + + instances_dir = append_file(tracing_dir, "instances"); + if (!instances_dir) + return -1; + + ret = stat(instances_dir, &st); + if (ret < 0 || !S_ISDIR(st.st_mode)) { + ret = -1; + goto out_free; + } + + dir = opendir(instances_dir); + if (!dir) { + ret = -1; + goto out_free; + } + + while ((dent = readdir(dir))) { + const char *name = strdup(dent->d_name); + char *instance_path; + struct buffer_instance *instance; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0) + continue; + + instance_path = append_file(instances_dir, name); + ret = stat(instance_path, &st); + if (ret < 0 || !S_ISDIR(st.st_mode)) { + free(instance_path); + continue; + } + free(instance_path); + + instance = allocate_instance(name); + if (!instance) + die("Failed to create instance"); + add_instance(instance, local_cpu_count); + } + + closedir(dir); + ret = 0; + + out_free: + free(instances_dir); + return ret; +} + +/** + * add_all_instances - Add all pre-existing instances to the internal list + * @tracing_dir: The top-level tracing directory + * + * Returns whether the operation succeeded + */ +void add_all_instances(void) +{ + const char *tracing_dir = tracefs_tracing_dir(); + if (!tracing_dir) + die("can't get the tracing directory"); + + __add_all_instances(tracing_dir); +} + +/** + * tracecmd_stat_cpu - show the buffer stats of a particular CPU + * @s: the trace_seq to record the data in. + * @cpu: the CPU to stat + * + */ +void tracecmd_stat_cpu_instance(struct buffer_instance *instance, + struct trace_seq *s, int cpu) +{ + char buf[BUFSIZ]; + char *path; + char *file; + int fd; + int r; + + file = malloc(40); + if (!file) + return; + snprintf(file, 40, "per_cpu/cpu%d/stats", cpu); + + path = tracefs_instance_get_file(instance->tracefs, file); + free(file); + fd = open(path, O_RDONLY); + tracefs_put_tracing_file(path); + if (fd < 0) + return; + + while ((r = read(fd, buf, BUFSIZ)) > 0) + trace_seq_printf(s, "%.*s", r, buf); + + close(fd); +} + +/** + * tracecmd_stat_cpu - show the buffer stats of a particular CPU + * @s: the trace_seq to record the data in. + * @cpu: the CPU to stat + * + */ +void tracecmd_stat_cpu(struct trace_seq *s, int cpu) +{ + tracecmd_stat_cpu_instance(&top_instance, s, cpu); +} + +static void add_event(struct buffer_instance *instance, struct event_list *event) +{ + *instance->event_next = event; + instance->event_next = &event->next; + event->next = NULL; +} + +static void reset_event_list(struct buffer_instance *instance) +{ + instance->events = NULL; + init_instance(instance); +} + +static char *get_temp_file(struct buffer_instance *instance, int cpu) +{ + const char *output_file = instance->output_file; + const char *name; + char *file = NULL; + int size; + + name = tracefs_instance_get_name(instance->tracefs); + if (name) { + size = snprintf(file, 0, "%s.%s.cpu%d", output_file, name, cpu); + file = malloc(size + 1); + if (!file) + die("Failed to allocate temp file for %s", name); + sprintf(file, "%s.%s.cpu%d", output_file, name, cpu); + } else { + size = snprintf(file, 0, "%s.cpu%d", output_file, cpu); + file = malloc(size + 1); + if (!file) + die("Failed to allocate temp file for %s", name); + sprintf(file, "%s.cpu%d", output_file, cpu); + } + + return file; +} + +char *trace_get_guest_file(const char *file, const char *guest) +{ + const char *p; + char *out = NULL; + int ret, base_len; + + p = strrchr(file, '.'); + if (p && p != file) + base_len = p - file; + else + base_len = strlen(file); + + ret = asprintf(&out, "%.*s-%s%s", base_len, file, + guest, file + base_len); + if (ret < 0) + return NULL; + return out; +} + +static void put_temp_file(char *file) +{ + free(file); +} + +static void delete_temp_file(struct buffer_instance *instance, int cpu) +{ + const char *output_file = instance->output_file; + const char *name; + char file[PATH_MAX]; + + name = tracefs_instance_get_name(instance->tracefs); + if (name) + snprintf(file, PATH_MAX, "%s.%s.cpu%d", output_file, name, cpu); + else + snprintf(file, PATH_MAX, "%s.cpu%d", output_file, cpu); + unlink(file); +} + +static int kill_thread_instance(int start, struct buffer_instance *instance) +{ + int n = start; + int i; + + for (i = 0; i < instance->cpu_count; i++) { + if (pids[n].pid > 0) { + kill(pids[n].pid, SIGKILL); + delete_temp_file(instance, i); + pids[n].pid = 0; + if (pids[n].brass[0] >= 0) + close(pids[n].brass[0]); + } + n++; + } + + return n; +} + +static void kill_threads(void) +{ + struct buffer_instance *instance; + int i = 0; + + if (!recorder_threads || !pids) + return; + + for_all_instances(instance) + i = kill_thread_instance(i, instance); +} + +void die(const char *fmt, ...) +{ + va_list ap; + int ret = errno; + + if (errno) + perror("trace-cmd"); + else + ret = -1; + + kill_threads(); + va_start(ap, fmt); + fprintf(stderr, " "); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, "\n"); + exit(ret); +} + +static int delete_thread_instance(int start, struct buffer_instance *instance) +{ + int n = start; + int i; + + for (i = 0; i < instance->cpu_count; i++) { + if (pids) { + if (pids[n].pid) { + delete_temp_file(instance, i); + if (pids[n].pid < 0) + pids[n].pid = 0; + } + n++; + } else + /* Extract does not allocate pids */ + delete_temp_file(instance, i); + } + return n; +} + +static void delete_thread_data(void) +{ + struct buffer_instance *instance; + int i = 0; + + for_all_instances(instance) + i = delete_thread_instance(i, instance); + /* + * Top instance temp files are still created even if it + * isn't used. + */ + if (no_top_instance()) { + for (i = 0; i < local_cpu_count; i++) + delete_temp_file(&top_instance, i); + } +} + +static void +add_tsc2nsec(struct tracecmd_output *handle, struct tsc_nsec *tsc2nsec) +{ + /* multiplier, shift, offset */ + struct iovec vector[3]; + + vector[0].iov_len = 4; + vector[0].iov_base = &tsc2nsec->mult; + vector[1].iov_len = 4; + vector[1].iov_base = &tsc2nsec->shift; + vector[2].iov_len = 8; + vector[2].iov_base = &tsc2nsec->offset; + + tracecmd_add_option_v(handle, TRACECMD_OPTION_TSC2NSEC, vector, 3); +} + +static void host_tsync_complete(struct common_record_context *ctx, + struct buffer_instance *instance) +{ + struct tracecmd_output *handle = NULL; + int fd = -1; + int ret; + + ret = tracecmd_tsync_with_guest_stop(instance->tsync); + if (!ret) { + fd = open(instance->output_file, O_RDWR); + if (fd < 0) + die("error opening %s", instance->output_file); + handle = tracecmd_get_output_handle_fd(fd); + if (!handle) + die("cannot create output handle"); + + if (ctx->tsc2nsec.mult) + add_tsc2nsec(handle, &ctx->tsc2nsec); + + tracecmd_write_guest_time_shift(handle, instance->tsync); + tracecmd_append_options(handle); + tracecmd_output_close(handle); + } + + tracecmd_tsync_free(instance->tsync); + instance->tsync = NULL; +} + +static void tell_guests_to_stop(struct common_record_context *ctx) +{ + struct buffer_instance *instance; + + /* Send close message to guests */ + for_all_instances(instance) { + if (is_guest(instance)) + tracecmd_msg_send_close_msg(instance->msg_handle); + } + + for_all_instances(instance) { + if (is_guest(instance)) + host_tsync_complete(ctx, instance); + } + + /* Wait for guests to acknowledge */ + for_all_instances(instance) { + if (is_guest(instance)) { + tracecmd_msg_wait_close_resp(instance->msg_handle); + tracecmd_msg_handle_close(instance->msg_handle); + } + } +} + +static void stop_threads(enum trace_type type) +{ + int ret; + int i; + + if (!recorder_threads) + return; + + /* Tell all threads to finish up */ + for (i = 0; i < recorder_threads; i++) { + if (pids[i].pid > 0) { + kill(pids[i].pid, SIGUSR1); + } + } + + /* Flush out the pipes */ + if (type & TRACE_TYPE_STREAM) { + do { + ret = trace_stream_read(pids, recorder_threads, NULL); + } while (ret > 0); + } +} + +static void wait_threads() +{ + int i; + + for (i = 0; i < recorder_threads; i++) { + if (pids[i].pid > 0) { + waitpid(pids[i].pid, NULL, 0); + pids[i].pid = -1; + } + } +} + +static int create_recorder(struct buffer_instance *instance, int cpu, + enum trace_type type, int *brass); + +static void flush_threads(void) +{ + struct buffer_instance *instance; + long ret; + int i; + + for_all_instances(instance) { + for (i = 0; i < instance->cpu_count; i++) { + /* Extract doesn't support sub buffers yet */ + ret = create_recorder(instance, i, TRACE_TYPE_EXTRACT, NULL); + if (ret < 0) + die("error reading ring buffer"); + } + } +} + +static int set_ftrace_enable(const char *path, int set) +{ + struct stat st; + int fd; + char *val = set ? "1" : "0"; + int ret; + + /* if ftace_enable does not exist, simply ignore it */ + fd = stat(path, &st); + if (fd < 0) + return -ENODEV; + + reset_save_file(path, RESET_DEFAULT_PRIO); + + ret = -1; + fd = open(path, O_WRONLY); + if (fd < 0) + goto out; + + /* Now set or clear the function option */ + ret = write(fd, val, 1); + close(fd); + + out: + return ret < 0 ? ret : 0; +} + +static int set_ftrace_proc(int set) +{ + const char *path = "/proc/sys/kernel/ftrace_enabled"; + int ret; + + ret = set_ftrace_enable(path, set); + if (ret == -1) + die ("Can't %s ftrace", set ? "enable" : "disable"); + return ret; +} + +static int set_ftrace(struct buffer_instance *instance, int set, int use_proc) +{ + char *path; + int ret; + + path = tracefs_instance_get_file(instance->tracefs, "options/function-trace"); + if (!path) + return -1; + ret = set_ftrace_enable(path, set); + tracefs_put_tracing_file(path); + + /* Always enable ftrace_enable proc file when set is true */ + if (ret < 0 || set || use_proc) + ret = set_ftrace_proc(set); + + return ret; +} + +static int write_file(const char *file, const char *str) +{ + int ret; + int fd; + + fd = open(file, O_WRONLY | O_TRUNC); + if (fd < 0) + die("opening to '%s'", file); + ret = write(fd, str, strlen(str)); + close(fd); + return ret; +} + +static void __clear_trace(struct buffer_instance *instance) +{ + FILE *fp; + char *path; + + if (is_guest(instance)) + return; + + /* reset the trace */ + path = tracefs_instance_get_file(instance->tracefs, "trace"); + fp = fopen(path, "w"); + if (!fp) + die("writing to '%s'", path); + tracefs_put_tracing_file(path); + fwrite("0", 1, 1, fp); + fclose(fp); +} + +static void clear_trace_instances(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + __clear_trace(instance); +} + +static void reset_max_latency(struct buffer_instance *instance) +{ + tracefs_instance_file_write(instance->tracefs, + "tracing_max_latency", "0"); +} + +static int add_filter_pid(struct buffer_instance *instance, int pid, int exclude) +{ + struct filter_pids *p; + char buf[100]; + + for (p = instance->filter_pids; p; p = p->next) { + if (p->pid == pid) { + p->exclude = exclude; + return 0; + } + } + + p = malloc(sizeof(*p)); + if (!p) + die("Failed to allocate pid filter"); + p->next = instance->filter_pids; + p->exclude = exclude; + p->pid = pid; + instance->filter_pids = p; + instance->nr_filter_pids++; + + instance->len_filter_pids += sprintf(buf, "%d", pid); + + return 1; +} + +static void add_filter_pid_all(int pid, int exclude) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + add_filter_pid(instance, pid, exclude); +} + +static void reset_save_ftrace_pid(struct buffer_instance *instance) +{ + static char *path; + + if (!tracefs_file_exists(instance->tracefs, "set_ftrace_pid")) + return; + + path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_pid"); + if (!path) + return; + + reset_save_file_cond(path, RESET_DEFAULT_PRIO, "no pid", ""); + + tracefs_put_tracing_file(path); +} + +static void update_ftrace_pid(struct buffer_instance *instance, + const char *pid, int reset) +{ + int fd = -1; + char *path; + int ret; + + if (!tracefs_file_exists(instance->tracefs, "set_ftrace_pid")) + return; + + path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_pid"); + if (!path) + return; + + fd = open(path, O_WRONLY | O_CLOEXEC | (reset ? O_TRUNC : 0)); + tracefs_put_tracing_file(path); + if (fd < 0) + return; + + ret = write(fd, pid, strlen(pid)); + + /* + * Older kernels required "-1" to disable pid + */ + if (ret < 0 && !strlen(pid)) + ret = write(fd, "-1", 2); + + if (ret < 0) + die("error writing to %s", path); + /* add whitespace in case another pid is written */ + write(fd, " ", 1); + close(fd); +} + +static void update_ftrace_pids(int reset) +{ + struct buffer_instance *instance; + struct filter_pids *pid; + static int first = 1; + char buf[100]; + int rst; + + for_all_instances(instance) { + if (first) + reset_save_ftrace_pid(instance); + rst = reset; + for (pid = instance->filter_pids; pid; pid = pid->next) { + if (pid->exclude) + continue; + snprintf(buf, 100, "%d ", pid->pid); + update_ftrace_pid(instance, buf, rst); + /* Only reset the first entry */ + rst = 0; + } + } + + if (first) + first = 0; +} + +static void update_event_filters(struct buffer_instance *instance); +static void update_pid_event_filters(struct buffer_instance *instance); + +static void append_filter_pid_range(char **filter, int *curr_len, + const char *field, + int start_pid, int end_pid, bool exclude) +{ + const char *op = "", *op1, *op2, *op3; + int len; + + if (*filter && **filter) + op = exclude ? "&&" : "||"; + + /* Handle thus case explicitly so that we get `pid==3` instead of + * `pid>=3&&pid<=3` for singleton ranges + */ + if (start_pid == end_pid) { +#define FMT "%s(%s%s%d)" + len = snprintf(NULL, 0, FMT, op, + field, exclude ? "!=" : "==", start_pid); + *filter = realloc(*filter, *curr_len + len + 1); + if (!*filter) + die("realloc"); + + len = snprintf(*filter + *curr_len, len + 1, FMT, op, + field, exclude ? "!=" : "==", start_pid); + *curr_len += len; + + return; +#undef FMT + } + + if (exclude) { + op1 = "<"; + op2 = "||"; + op3 = ">"; + } else { + op1 = ">="; + op2 = "&&"; + op3 = "<="; + } + +#define FMT "%s(%s%s%d%s%s%s%d)" + len = snprintf(NULL, 0, FMT, op, + field, op1, start_pid, op2, + field, op3, end_pid); + *filter = realloc(*filter, *curr_len + len + 1); + if (!*filter) + die("realloc"); + + len = snprintf(*filter + *curr_len, len + 1, FMT, op, + field, op1, start_pid, op2, + field, op3, end_pid); + *curr_len += len; +} + +/** + * make_pid_filter - create a filter string to all pids against @field + * @curr_filter: Append to a previous filter (may realloc). Can be NULL + * @field: The field to compare the pids against + * + * Creates a new string or appends to an existing one if @curr_filter + * is not NULL. The new string will contain a filter with all pids + * in pid_filter list with the format (@field == pid) || .. + * If @curr_filter is not NULL, it will add this string as: + * (@curr_filter) && ((@field == pid) || ...) + */ +static char *make_pid_filter(struct buffer_instance *instance, + char *curr_filter, const char *field) +{ + int start_pid = -1, last_pid = -1; + int last_exclude = -1; + struct filter_pids *p; + char *filter = NULL; + int curr_len = 0; + + /* Use the new method if possible */ + if (instance->have_set_event_pid) + return NULL; + + if (!instance->filter_pids) + return curr_filter; + + for (p = instance->filter_pids; p; p = p->next) { + /* + * PIDs are inserted in `filter_pids` from the front and that's + * why we expect them in descending order here. + */ + if (p->pid == last_pid - 1 && p->exclude == last_exclude) { + last_pid = p->pid; + continue; + } + + if (start_pid != -1) + append_filter_pid_range(&filter, &curr_len, field, + last_pid, start_pid, + last_exclude); + + start_pid = last_pid = p->pid; + last_exclude = p->exclude; + + } + append_filter_pid_range(&filter, &curr_len, field, + last_pid, start_pid, last_exclude); + + if (curr_filter) { + char *save = filter; + asprintf(&filter, "(%s)&&(%s)", curr_filter, filter); + free(save); + } + + return filter; +} + +#define _STRINGIFY(x) #x +#define STRINGIFY(x) _STRINGIFY(x) + +static int get_pid_addr_maps(struct buffer_instance *instance, int pid) +{ + struct pid_addr_maps *maps = instance->pid_maps; + struct tracecmd_proc_addr_map *map; + unsigned long long begin, end; + struct pid_addr_maps *m; + char mapname[PATH_MAX+1]; + char fname[PATH_MAX+1]; + char buf[PATH_MAX+100]; + FILE *f; + int ret; + int res; + int i; + + sprintf(fname, "/proc/%d/exe", pid); + ret = readlink(fname, mapname, PATH_MAX); + if (ret >= PATH_MAX || ret < 0) + return -ENOENT; + mapname[ret] = 0; + + sprintf(fname, "/proc/%d/maps", pid); + f = fopen(fname, "r"); + if (!f) + return -ENOENT; + + while (maps) { + if (pid == maps->pid) + break; + maps = maps->next; + } + + ret = -ENOMEM; + if (!maps) { + maps = calloc(1, sizeof(*maps)); + if (!maps) + goto out_fail; + maps->pid = pid; + maps->next = instance->pid_maps; + instance->pid_maps = maps; + } else { + for (i = 0; i < maps->nr_lib_maps; i++) + free(maps->lib_maps[i].lib_name); + free(maps->lib_maps); + maps->lib_maps = NULL; + maps->nr_lib_maps = 0; + free(maps->proc_name); + } + + maps->proc_name = strdup(mapname); + if (!maps->proc_name) + goto out; + + while (fgets(buf, sizeof(buf), f)) { + mapname[0] = '\0'; + res = sscanf(buf, "%llx-%llx %*s %*x %*s %*d %"STRINGIFY(PATH_MAX)"s", + &begin, &end, mapname); + if (res == 3 && mapname[0] != '\0') { + map = realloc(maps->lib_maps, + (maps->nr_lib_maps + 1) * sizeof(*map)); + if (!map) + goto out_fail; + map[maps->nr_lib_maps].end = end; + map[maps->nr_lib_maps].start = begin; + map[maps->nr_lib_maps].lib_name = strdup(mapname); + if (!map[maps->nr_lib_maps].lib_name) + goto out_fail; + maps->lib_maps = map; + maps->nr_lib_maps++; + } + } +out: + fclose(f); + return 0; + +out_fail: + fclose(f); + if (maps) { + for (i = 0; i < maps->nr_lib_maps; i++) + free(maps->lib_maps[i].lib_name); + if (instance->pid_maps != maps) { + m = instance->pid_maps; + while (m) { + if (m->next == maps) { + m->next = maps->next; + break; + } + m = m->next; + } + } else + instance->pid_maps = maps->next; + free(maps->lib_maps); + maps->lib_maps = NULL; + maps->nr_lib_maps = 0; + free(maps->proc_name); + maps->proc_name = NULL; + free(maps); + } + return ret; +} + +static void get_filter_pid_maps(void) +{ + struct buffer_instance *instance; + struct filter_pids *p; + + for_all_instances(instance) { + if (!instance->get_procmap) + continue; + for (p = instance->filter_pids; p; p = p->next) { + if (p->exclude) + continue; + get_pid_addr_maps(instance, p->pid); + } + } +} + +static void update_task_filter(void) +{ + struct buffer_instance *instance; + int pid = getpid(); + + if (no_filter) + return; + + get_filter_pid_maps(); + + if (filter_task) + add_filter_pid_all(pid, 0); + + for_all_instances(instance) { + if (!instance->filter_pids) + continue; + if (instance->common_pid_filter) + free(instance->common_pid_filter); + instance->common_pid_filter = make_pid_filter(instance, NULL, + "common_pid"); + } + update_ftrace_pids(1); + for_all_instances(instance) + update_pid_event_filters(instance); +} + +static pid_t trace_waitpid(enum trace_type type, pid_t pid, int *status, int options) +{ + struct timeval tv = { 1, 0 }; + int ret; + + if (type & TRACE_TYPE_STREAM) + options |= WNOHANG; + + do { + ret = waitpid(pid, status, options); + if (ret != 0) + return ret; + + if (type & TRACE_TYPE_STREAM) + trace_stream_read(pids, recorder_threads, &tv); + } while (1); +} + +#ifndef __NR_pidfd_open +#define __NR_pidfd_open 434 +#endif + +static int pidfd_open(pid_t pid, unsigned int flags) { + return syscall(__NR_pidfd_open, pid, flags); +} + +static int trace_waitpidfd(id_t pidfd) { + struct pollfd pollfd; + + pollfd.fd = pidfd; + pollfd.events = POLLIN; + + while (!finished) { + int ret = poll(&pollfd, 1, -1); + /* If waitid was interrupted, keep waiting */ + if (ret < 0 && errno == EINTR) + continue; + else if (ret < 0) + return 1; + else + break; + } + + return 0; +} + +static int trace_wait_for_processes(struct buffer_instance *instance) { + int ret = 0; + int nr_fds = 0; + int i; + int *pidfds; + struct filter_pids *pid; + + pidfds = malloc(sizeof(int) * instance->nr_process_pids); + if (!pidfds) + return 1; + + for (pid = instance->process_pids; + pid && instance->nr_process_pids; + pid = pid->next) { + if (pid->exclude) { + instance->nr_process_pids--; + continue; + } + pidfds[nr_fds] = pidfd_open(pid->pid, 0); + + /* If the pid doesn't exist, the process has probably exited */ + if (pidfds[nr_fds] < 0 && errno == ESRCH) { + instance->nr_process_pids--; + continue; + } else if (pidfds[nr_fds] < 0) { + ret = 1; + goto out; + } + + nr_fds++; + instance->nr_process_pids--; + } + + for (i = 0; i < nr_fds; i++) { + if (trace_waitpidfd(pidfds[i])) { + ret = 1; + goto out; + } + } + +out: + for (i = 0; i < nr_fds; i++) + close(pidfds[i]); + free(pidfds); + return ret; +} + +static void add_event_pid(struct buffer_instance *instance, const char *buf) +{ + tracefs_instance_file_write(instance->tracefs, "set_event_pid", buf); +} + +#ifndef NO_PTRACE +/** + * append_pid_filter - add a new pid to an existing filter + * @curr_filter: the filter to append to. If NULL, then allocate one + * @field: The fild to compare the pid to + * @pid: The pid to add to. + */ +static char *append_pid_filter(char *curr_filter, const char *field, int pid) +{ + char *filter; + int len; + + len = snprintf(NULL, 0, "(%s==%d)||", field, pid); + + if (!curr_filter) { + /* No need for +1 as we don't use the "||" */ + filter = malloc(len); + if (!filter) + die("Failed to allocate pid filter"); + sprintf(filter, "(%s==%d)", field, pid); + } else { + int indx = strlen(curr_filter); + + len += indx; + filter = realloc(curr_filter, len + indx + 1); + if (!filter) + die("realloc"); + sprintf(filter + indx, "||(%s==%d)", field, pid); + } + + return filter; +} + +static void append_sched_event(struct event_list *event, const char *field, int pid) +{ + if (!event || !event->pid_filter) + return; + + event->pid_filter = append_pid_filter(event->pid_filter, field, pid); +} + +static void update_sched_events(struct buffer_instance *instance, int pid) +{ + /* + * Also make sure that the sched_switch to this pid + * and wakeups of this pid are also traced. + * Only need to do this if the events are active. + */ + append_sched_event(instance->sched_switch_event, "next_pid", pid); + append_sched_event(instance->sched_wakeup_event, "pid", pid); + append_sched_event(instance->sched_wakeup_new_event, "pid", pid); +} + +static int open_instance_fd(struct buffer_instance *instance, + const char *file, int flags); + +static void add_new_filter_child_pid(int pid, int child) +{ + struct buffer_instance *instance; + struct filter_pids *fpid; + char buf[100]; + + for_all_instances(instance) { + if (!instance->ptrace_child || !instance->filter_pids) + continue; + for (fpid = instance->filter_pids; fpid; fpid = fpid->next) { + if (fpid->pid == pid) + break; + } + if (!fpid) + continue; + + add_filter_pid(instance, child, 0); + sprintf(buf, "%d", child); + update_ftrace_pid(instance, buf, 0); + + instance->common_pid_filter = append_pid_filter(instance->common_pid_filter, + "common_pid", pid); + if (instance->have_set_event_pid) { + add_event_pid(instance, buf); + } else { + update_sched_events(instance, pid); + update_event_filters(instance); + } + } + +} + +static void ptrace_attach(struct buffer_instance *instance, int pid) +{ + int ret; + + ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); + if (ret < 0) { + warning("Unable to trace process %d children", pid); + do_ptrace = 0; + return; + } + if (instance) + add_filter_pid(instance, pid, 0); + else + add_filter_pid_all(pid, 0); +} + +static void enable_ptrace(void) +{ + if (!do_ptrace || !filter_task) + return; + + ptrace(PTRACE_TRACEME, 0, NULL, 0); +} + +static struct buffer_instance *get_intance_fpid(int pid) +{ + struct buffer_instance *instance; + struct filter_pids *fpid; + + for_all_instances(instance) { + for (fpid = instance->filter_pids; fpid; fpid = fpid->next) { + if (fpid->exclude) + continue; + if (fpid->pid == pid) + break; + } + if (fpid) + return instance; + } + + return NULL; +} + +static void ptrace_wait(enum trace_type type) +{ + struct buffer_instance *instance; + struct filter_pids *fpid; + unsigned long send_sig; + unsigned long child; + int nr_pids = 0; + siginfo_t sig; + int main_pids; + int cstatus; + int status; + int i = 0; + int *pids; + int event; + int pid; + int ret; + + + for_all_instances(instance) + nr_pids += instance->nr_filter_pids; + + pids = calloc(nr_pids, sizeof(int)); + if (!pids) { + warning("Unable to allocate array for %d PIDs", nr_pids); + return; + } + for_all_instances(instance) { + if (!instance->ptrace_child && !instance->get_procmap) + continue; + + for (fpid = instance->filter_pids; fpid && i < nr_pids; fpid = fpid->next) { + if (fpid->exclude) + continue; + pids[i++] = fpid->pid; + } + } + main_pids = i; + + do { + ret = trace_waitpid(type, -1, &status, WSTOPPED | __WALL); + if (ret < 0) + continue; + + pid = ret; + + if (WIFSTOPPED(status)) { + event = (status >> 16) & 0xff; + ptrace(PTRACE_GETSIGINFO, pid, NULL, &sig); + send_sig = sig.si_signo; + /* Don't send ptrace sigs to child */ + if (send_sig == SIGTRAP || send_sig == SIGSTOP) + send_sig = 0; + switch (event) { + case PTRACE_EVENT_FORK: + case PTRACE_EVENT_VFORK: + case PTRACE_EVENT_CLONE: + /* forked a child */ + ptrace(PTRACE_GETEVENTMSG, pid, NULL, &child); + ptrace(PTRACE_SETOPTIONS, child, NULL, + PTRACE_O_TRACEFORK | + PTRACE_O_TRACEVFORK | + PTRACE_O_TRACECLONE | + PTRACE_O_TRACEEXIT); + add_new_filter_child_pid(pid, child); + ptrace(PTRACE_CONT, child, NULL, 0); + break; + + case PTRACE_EVENT_EXIT: + instance = get_intance_fpid(pid); + if (instance && instance->get_procmap) + get_pid_addr_maps(instance, pid); + ptrace(PTRACE_GETEVENTMSG, pid, NULL, &cstatus); + ptrace(PTRACE_DETACH, pid, NULL, NULL); + break; + } + ptrace(PTRACE_SETOPTIONS, pid, NULL, + PTRACE_O_TRACEFORK | + PTRACE_O_TRACEVFORK | + PTRACE_O_TRACECLONE | + PTRACE_O_TRACEEXIT); + ptrace(PTRACE_CONT, pid, NULL, send_sig); + } + if (WIFEXITED(status) || + (WIFSTOPPED(status) && event == PTRACE_EVENT_EXIT)) { + for (i = 0; i < nr_pids; i++) { + if (pid == pids[i]) { + pids[i] = 0; + main_pids--; + if (!main_pids) + finished = 1; + } + } + } + } while (!finished && ret > 0); + + free(pids); +} +#else +static inline void ptrace_wait(enum trace_type type) { } +static inline void enable_ptrace(void) { } +static inline void ptrace_attach(struct buffer_instance *instance, int pid) { } + +#endif /* NO_PTRACE */ + +static void trace_or_sleep(enum trace_type type, bool pwait) +{ + struct timeval tv = { 1 , 0 }; + + if (pwait) + ptrace_wait(type); + else if (type & TRACE_TYPE_STREAM) + trace_stream_read(pids, recorder_threads, &tv); + else + sleep(10); +} + +static int change_user(const char *user) +{ + struct passwd *pwd; + + if (!user) + return 0; + + pwd = getpwnam(user); + if (!pwd) + return -1; + if (initgroups(user, pwd->pw_gid) < 0) + return -1; + if (setgid(pwd->pw_gid) < 0) + return -1; + if (setuid(pwd->pw_uid) < 0) + return -1; + + if (setenv("HOME", pwd->pw_dir, 1) < 0) + return -1; + if (setenv("USER", pwd->pw_name, 1) < 0) + return -1; + if (setenv("LOGNAME", pwd->pw_name, 1) < 0) + return -1; + + return 0; +} + +static void run_cmd(enum trace_type type, const char *user, int argc, char **argv) +{ + int status; + int pid; + + if ((pid = fork()) < 0) + die("failed to fork"); + if (!pid) { + /* child */ + update_task_filter(); + tracecmd_enable_tracing(); + if (!fork_process) + enable_ptrace(); + /* + * If we are using stderr for stdout, switch + * it back to the saved stdout for the code we run. + */ + if (save_stdout >= 0) { + close(1); + dup2(save_stdout, 1); + close(save_stdout); + } + + if (change_user(user) < 0) + die("Failed to change user to %s", user); + + if (execvp(argv[0], argv)) { + fprintf(stderr, "\n********************\n"); + fprintf(stderr, " Unable to exec %s\n", argv[0]); + fprintf(stderr, "********************\n"); + die("Failed to exec %s", argv[0]); + } + } + if (fork_process) + exit(0); + if (do_ptrace) { + ptrace_attach(NULL, pid); + ptrace_wait(type); + } else + trace_waitpid(type, pid, &status, 0); + if (type & (TRACE_TYPE_START | TRACE_TYPE_SET)) + exit(0); +} + +static void +set_plugin_instance(struct buffer_instance *instance, const char *name) +{ + char *path; + char zero = '0'; + int ret; + int fd; + + if (is_guest(instance)) + return; + + path = tracefs_instance_get_file(instance->tracefs, "current_tracer"); + fd = open(path, O_WRONLY); + if (fd < 0) { + /* + * Legacy kernels do not have current_tracer file, and they + * always use nop. So, it doesn't need to try to change the + * plugin for those if name is "nop". + */ + if (!strncmp(name, "nop", 3)) { + tracefs_put_tracing_file(path); + return; + } + die("Opening '%s'", path); + } + ret = write(fd, name, strlen(name)); + close(fd); + + if (ret < 0) + die("writing to '%s'", path); + + tracefs_put_tracing_file(path); + + if (strncmp(name, "function", 8) != 0) + return; + + /* Make sure func_stack_trace option is disabled */ + /* First try instance file, then top level */ + path = tracefs_instance_get_file(instance->tracefs, "options/func_stack_trace"); + fd = open(path, O_WRONLY); + if (fd < 0) { + tracefs_put_tracing_file(path); + path = tracefs_get_tracing_file("options/func_stack_trace"); + fd = open(path, O_WRONLY); + if (fd < 0) { + tracefs_put_tracing_file(path); + return; + } + } + /* + * Always reset func_stack_trace to zero. Don't bother saving + * the original content. + */ + add_reset_file(path, "0", RESET_HIGH_PRIO); + tracefs_put_tracing_file(path); + write(fd, &zero, 1); + close(fd); +} + +static void set_plugin(const char *name) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + set_plugin_instance(instance, name); +} + +static void save_option(struct buffer_instance *instance, const char *option) +{ + struct opt_list *opt; + + opt = malloc(sizeof(*opt)); + if (!opt) + die("Failed to allocate option"); + opt->next = instance->options; + instance->options = opt; + opt->option = option; +} + +static int set_option(struct buffer_instance *instance, const char *option) +{ + FILE *fp; + char *path; + + path = tracefs_instance_get_file(instance->tracefs, "trace_options"); + fp = fopen(path, "w"); + if (!fp) + warning("writing to '%s'", path); + tracefs_put_tracing_file(path); + + if (!fp) + return -1; + + fwrite(option, 1, strlen(option), fp); + fclose(fp); + + return 0; +} + +static void disable_func_stack_trace_instance(struct buffer_instance *instance) +{ + struct stat st; + char *content; + char *path; + char *cond; + int size; + int ret; + + if (is_guest(instance)) + return; + + path = tracefs_instance_get_file(instance->tracefs, "current_tracer"); + ret = stat(path, &st); + tracefs_put_tracing_file(path); + if (ret < 0) + return; + + content = tracefs_instance_file_read(instance->tracefs, + "current_tracer", &size); + cond = strstrip(content); + if (memcmp(cond, "function", size - (cond - content)) !=0) + goto out; + + set_option(instance, "nofunc_stack_trace"); + out: + free(content); +} + +static void disable_func_stack_trace(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + disable_func_stack_trace_instance(instance); +} + +static void add_reset_options(struct buffer_instance *instance) +{ + struct opt_list *opt; + const char *option; + char *content; + char *path; + char *ptr; + int len; + + if (keep) + return; + + path = tracefs_instance_get_file(instance->tracefs, "trace_options"); + content = get_file_content(path); + + for (opt = instance->options; opt; opt = opt->next) { + option = opt->option; + len = strlen(option); + ptr = content; + again: + ptr = strstr(ptr, option); + if (ptr) { + /* First make sure its the option we want */ + if (ptr[len] != '\n') { + ptr += len; + goto again; + } + if (ptr - content >= 2 && strncmp(ptr - 2, "no", 2) == 0) { + /* Make sure this isn't ohno-option */ + if (ptr > content + 2 && *(ptr - 3) != '\n') { + ptr += len; + goto again; + } + /* we enabled it */ + ptr[len] = 0; + add_reset_file(path, ptr-2, RESET_DEFAULT_PRIO); + ptr[len] = '\n'; + continue; + } + /* make sure this is our option */ + if (ptr > content && *(ptr - 1) != '\n') { + ptr += len; + goto again; + } + /* this option hasn't changed, ignore it */ + continue; + } + + /* ptr is NULL, not found, maybe option is a no */ + if (strncmp(option, "no", 2) != 0) + /* option is really not found? */ + continue; + + option += 2; + len = strlen(option); + ptr = content; + loop: + ptr = strstr(content, option); + if (!ptr) + /* Really not found? */ + continue; + + /* make sure this is our option */ + if (ptr[len] != '\n') { + ptr += len; + goto loop; + } + + if (ptr > content && *(ptr - 1) != '\n') { + ptr += len; + goto loop; + } + + add_reset_file(path, option, RESET_DEFAULT_PRIO); + } + tracefs_put_tracing_file(path); + free(content); +} + +static void set_options(void) +{ + struct buffer_instance *instance; + struct opt_list *opt; + int ret; + + for_all_instances(instance) { + add_reset_options(instance); + while (instance->options) { + opt = instance->options; + instance->options = opt->next; + ret = set_option(instance, opt->option); + if (ret < 0) + die("Failed to set ftrace option %s", + opt->option); + free(opt); + } + } +} + +static void set_saved_cmdlines_size(struct common_record_context *ctx) +{ + int fd, len, ret = -1; + char *path, *str; + + if (!ctx->saved_cmdlines_size) + return; + + path = tracefs_get_tracing_file("saved_cmdlines_size"); + if (!path) + goto err; + + reset_save_file(path, RESET_DEFAULT_PRIO); + + fd = open(path, O_WRONLY); + tracefs_put_tracing_file(path); + if (fd < 0) + goto err; + + len = asprintf(&str, "%d", ctx->saved_cmdlines_size); + if (len < 0) + die("%s couldn't allocate memory", __func__); + + if (write(fd, str, len) > 0) + ret = 0; + + close(fd); + free(str); +err: + if (ret) + warning("Couldn't set saved_cmdlines_size"); +} + +static int trace_check_file_exists(struct buffer_instance *instance, char *file) +{ + struct stat st; + char *path; + int ret; + + path = tracefs_instance_get_file(instance->tracefs, file); + ret = stat(path, &st); + tracefs_put_tracing_file(path); + + return ret < 0 ? 0 : 1; +} + +static int use_old_event_method(void) +{ + static int old_event_method; + static int processed; + + if (processed) + return old_event_method; + + /* Check if the kernel has the events/enable file */ + if (!trace_check_file_exists(&top_instance, "events/enable")) + old_event_method = 1; + + processed = 1; + + return old_event_method; +} + +static void old_update_events(const char *name, char update) +{ + char *path; + FILE *fp; + int ret; + + if (strcmp(name, "all") == 0) + name = "*:*"; + + /* need to use old way */ + path = tracefs_get_tracing_file("set_event"); + fp = fopen(path, "w"); + if (!fp) + die("opening '%s'", path); + tracefs_put_tracing_file(path); + + /* Disable the event with "!" */ + if (update == '0') + fwrite("!", 1, 1, fp); + + ret = fwrite(name, 1, strlen(name), fp); + if (ret < 0) + die("bad event '%s'", name); + + ret = fwrite("\n", 1, 1, fp); + if (ret < 0) + die("bad event '%s'", name); + + fclose(fp); + + return; +} + +static void +reset_events_instance(struct buffer_instance *instance) +{ + glob_t globbuf; + char *path; + char c; + int fd; + int i; + int ret; + + if (is_guest(instance)) + return; + + if (use_old_event_method()) { + /* old way only had top instance */ + if (!is_top_instance(instance)) + return; + old_update_events("all", '0'); + return; + } + + c = '0'; + path = tracefs_instance_get_file(instance->tracefs, "events/enable"); + fd = open(path, O_WRONLY); + if (fd < 0) + die("opening to '%s'", path); + ret = write(fd, &c, 1); + close(fd); + tracefs_put_tracing_file(path); + + path = tracefs_instance_get_file(instance->tracefs, "events/*/filter"); + globbuf.gl_offs = 0; + ret = glob(path, 0, NULL, &globbuf); + tracefs_put_tracing_file(path); + if (ret < 0) + return; + + for (i = 0; i < globbuf.gl_pathc; i++) { + path = globbuf.gl_pathv[i]; + fd = open(path, O_WRONLY); + if (fd < 0) + die("opening to '%s'", path); + ret = write(fd, &c, 1); + close(fd); + } + globfree(&globbuf); +} + +static void reset_events(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + reset_events_instance(instance); +} + +enum { + STATE_NEWLINE, + STATE_SKIP, + STATE_COPY, +}; + +static char *read_file(const char *file) +{ + char stbuf[BUFSIZ]; + char *buf = NULL; + int size = 0; + char *nbuf; + int fd; + int r; + + fd = open(file, O_RDONLY); + if (fd < 0) + return NULL; + + do { + r = read(fd, stbuf, BUFSIZ); + if (r <= 0) + continue; + nbuf = realloc(buf, size+r+1); + if (!nbuf) { + free(buf); + buf = NULL; + break; + } + buf = nbuf; + memcpy(buf+size, stbuf, r); + size += r; + } while (r > 0); + + close(fd); + if (r == 0 && size > 0) + buf[size] = '\0'; + + return buf; +} + +static void read_error_log(const char *log) +{ + char *buf, *line; + char *start = NULL; + char *p; + + buf = read_file(log); + if (!buf) + return; + + line = buf; + + /* Only the last lines have meaning */ + while ((p = strstr(line, "\n")) && p[1]) { + if (line[0] != ' ') + start = line; + line = p + 1; + } + + if (start) + printf("%s", start); + + free(buf); +} + +static void show_error(const char *file, const char *type) +{ + struct stat st; + char *path = strdup(file); + char *p; + int ret; + + if (!path) + die("Could not allocate memory"); + + p = strstr(path, "tracing"); + if (p) { + if (strncmp(p + sizeof("tracing"), "instances", sizeof("instances") - 1) == 0) { + p = strstr(p + sizeof("tracing") + sizeof("instances"), "/"); + if (!p) + goto read_file; + } else { + p += sizeof("tracing") - 1; + } + ret = asprintf(&p, "%.*s/error_log", (int)(p - path), path); + if (ret < 0) + die("Could not allocate memory"); + ret = stat(p, &st); + if (ret < 0) { + free(p); + goto read_file; + } + read_error_log(p); + goto out; + } + + read_file: + p = read_file(path); + if (p) + printf("%s", p); + + out: + printf("Failed %s of %s\n", type, file); + free(path); + return; +} + +static void write_filter(const char *file, const char *filter) +{ + if (write_file(file, filter) < 0) + show_error(file, "filter"); +} + +static void clear_filter(const char *file) +{ + write_filter(file, "0"); +} + +static void write_trigger(const char *file, const char *trigger) +{ + if (write_file(file, trigger) < 0) + show_error(file, "trigger"); +} + +static int clear_trigger(const char *file) +{ + char trigger[BUFSIZ]; + char *save = NULL; + char *line; + char *buf; + int len; + int ret; + + buf = read_file(file); + if (!buf) { + perror(file); + return 0; + } + + trigger[0] = '!'; + + for (line = strtok_r(buf, "\n", &save); line; line = strtok_r(NULL, "\n", &save)) { + if (line[0] == '#') + continue; + len = strlen(line); + if (len > BUFSIZ - 2) + len = BUFSIZ - 2; + strncpy(trigger + 1, line, len); + trigger[len + 1] = '\0'; + /* We don't want any filters or extra on the line */ + strtok(trigger, " "); + write_file(file, trigger); + } + + free(buf); + + /* + * Some triggers have an order in removing them. + * They will not be removed if done in the wrong order. + */ + buf = read_file(file); + if (!buf) + return 0; + + ret = 0; + for (line = strtok(buf, "\n"); line; line = strtok(NULL, "\n")) { + if (line[0] == '#') + continue; + ret = 1; + break; + } + free(buf); + return ret; +} + +static void clear_func_filter(const char *file) +{ + char filter[BUFSIZ]; + struct stat st; + char *line; + char *buf; + char *p; + int len; + int ret; + int fd; + + /* Function filters may not exist */ + ret = stat(file, &st); + if (ret < 0) + return; + + /* First zero out normal filters */ + fd = open(file, O_WRONLY | O_TRUNC); + if (fd < 0) + die("opening to '%s'", file); + close(fd); + + buf = read_file(file); + if (!buf) { + perror(file); + return; + } + + /* Now remove filters */ + filter[0] = '!'; + + /* + * To delete a filter, we need to write a '!filter' + * to the file for each filter. + */ + for (line = strtok(buf, "\n"); line; line = strtok(NULL, "\n")) { + if (line[0] == '#') + continue; + len = strlen(line); + if (len > BUFSIZ - 2) + len = BUFSIZ - 2; + + strncpy(filter + 1, line, len); + filter[len + 1] = '\0'; + /* + * To remove "unlimited" filters, we must remove + * the ":unlimited" from what we write. + */ + if ((p = strstr(filter, ":unlimited"))) { + *p = '\0'; + len = p - filter; + } + /* + * The write to this file expects white space + * at the end :-p + */ + filter[len] = '\n'; + filter[len+1] = '\0'; + write_file(file, filter); + } +} + +static void update_reset_triggers(void) +{ + struct reset_file *reset; + + while (reset_triggers) { + reset = reset_triggers; + reset_triggers = reset->next; + + clear_trigger(reset->path); + free(reset->path); + free(reset); + } +} + +static void update_reset_files(void) +{ + struct reset_file *reset; + + while (reset_files) { + reset = reset_files; + reset_files = reset->next; + + if (!keep) + write_file(reset->path, reset->reset); + free(reset->path); + free(reset->reset); + free(reset); + } +} + +static void +update_event(struct event_list *event, const char *filter, + int filter_only, char update) +{ + const char *name = event->event; + FILE *fp; + char *path; + int ret; + + if (use_old_event_method()) { + if (filter_only) + return; + old_update_events(name, update); + return; + } + + if (filter && event->filter_file) { + add_reset_file(event->filter_file, "0", RESET_DEFAULT_PRIO); + write_filter(event->filter_file, filter); + } + + if (event->trigger_file) { + add_reset_trigger(event->trigger_file); + clear_trigger(event->trigger_file); + write_trigger(event->trigger_file, event->trigger); + /* Make sure we don't write this again */ + free(event->trigger_file); + free(event->trigger); + event->trigger_file = NULL; + event->trigger = NULL; + } + + if (filter_only || !event->enable_file) + return; + + path = event->enable_file; + + fp = fopen(path, "w"); + if (!fp) + die("writing to '%s'", path); + ret = fwrite(&update, 1, 1, fp); + fclose(fp); + if (ret < 0) + die("writing to '%s'", path); +} + +/* + * The debugfs file tracing_enabled needs to be deprecated. + * But just in case anyone fiddled with it. If it exists, + * make sure it is one. + * No error checking needed here. + */ +static void check_tracing_enabled(void) +{ + static int fd = -1; + char *path; + + if (fd < 0) { + path = tracefs_get_tracing_file("tracing_enabled"); + fd = open(path, O_WRONLY | O_CLOEXEC); + tracefs_put_tracing_file(path); + + if (fd < 0) + return; + } + write(fd, "1", 1); +} + +static int open_instance_fd(struct buffer_instance *instance, + const char *file, int flags) +{ + int fd; + char *path; + + path = tracefs_instance_get_file(instance->tracefs, file); + fd = open(path, flags); + if (fd < 0) { + /* instances may not be created yet */ + if (is_top_instance(instance)) + die("opening '%s'", path); + } + tracefs_put_tracing_file(path); + + return fd; +} + +static int open_tracing_on(struct buffer_instance *instance) +{ + int fd = instance->tracing_on_fd; + + /* OK, we keep zero for stdin */ + if (fd > 0) + return fd; + + fd = open_instance_fd(instance, "tracing_on", O_RDWR | O_CLOEXEC); + if (fd < 0) { + return fd; + } + instance->tracing_on_fd = fd; + + return fd; +} + +static void write_tracing_on(struct buffer_instance *instance, int on) +{ + int ret; + int fd; + + if (is_guest(instance)) + return; + + fd = open_tracing_on(instance); + if (fd < 0) + return; + + if (on) + ret = write(fd, "1", 1); + else + ret = write(fd, "0", 1); + + if (ret < 0) + die("writing 'tracing_on'"); +} + +static int read_tracing_on(struct buffer_instance *instance) +{ + int fd; + char buf[10]; + int ret; + + if (is_guest(instance)) + return -1; + + fd = open_tracing_on(instance); + if (fd < 0) + return fd; + + ret = read(fd, buf, 10); + if (ret <= 0) + die("Reading 'tracing_on'"); + buf[9] = 0; + ret = atoi(buf); + + return ret; +} + +static void reset_max_latency_instance(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + reset_max_latency(instance); +} + +void tracecmd_enable_tracing(void) +{ + struct buffer_instance *instance; + + check_tracing_enabled(); + + for_all_instances(instance) + write_tracing_on(instance, 1); + + if (latency) + reset_max_latency_instance(); +} + +void tracecmd_disable_tracing(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + write_tracing_on(instance, 0); +} + +void tracecmd_disable_all_tracing(int disable_tracer) +{ + struct buffer_instance *instance; + + tracecmd_disable_tracing(); + + if (disable_tracer) { + disable_func_stack_trace(); + set_plugin("nop"); + } + + reset_events(); + + /* Force close and reset of ftrace pid file */ + for_all_instances(instance) + update_ftrace_pid(instance, "", 1); + + clear_trace_instances(); +} + +static void +update_sched_event(struct buffer_instance *instance, + struct event_list *event, const char *field) +{ + if (!event) + return; + + event->pid_filter = make_pid_filter(instance, event->pid_filter, field); +} + +static void update_event_filters(struct buffer_instance *instance) +{ + struct event_list *event; + char *event_filter; + int free_it; + int len; + int common_len = 0; + + if (instance->common_pid_filter) + common_len = strlen(instance->common_pid_filter); + + for (event = instance->events; event; event = event->next) { + if (!event->neg) { + + free_it = 0; + if (event->filter) { + if (!instance->common_pid_filter) + /* + * event->pid_filter is only created if + * common_pid_filter is. No need to check that. + * Just use the current event->filter. + */ + event_filter = event->filter; + else if (event->pid_filter) { + free_it = 1; + len = common_len + strlen(event->pid_filter) + + strlen(event->filter) + strlen("()&&(||)") + 1; + event_filter = malloc(len); + if (!event_filter) + die("Failed to allocate event_filter"); + sprintf(event_filter, "(%s)&&(%s||%s)", + event->filter, instance->common_pid_filter, + event->pid_filter); + } else { + free_it = 1; + len = common_len + strlen(event->filter) + + strlen("()&&()") + 1; + event_filter = malloc(len); + if (!event_filter) + die("Failed to allocate event_filter"); + sprintf(event_filter, "(%s)&&(%s)", + event->filter, instance->common_pid_filter); + } + } else { + /* event->pid_filter only exists when common_pid_filter does */ + if (!instance->common_pid_filter) + continue; + + if (event->pid_filter) { + free_it = 1; + len = common_len + strlen(event->pid_filter) + + strlen("||") + 1; + event_filter = malloc(len); + if (!event_filter) + die("Failed to allocate event_filter"); + sprintf(event_filter, "%s||%s", + instance->common_pid_filter, event->pid_filter); + } else + event_filter = instance->common_pid_filter; + } + + update_event(event, event_filter, 1, '1'); + if (free_it) + free(event_filter); + } + } +} + +static void update_pid_filters(struct buffer_instance *instance) +{ + struct filter_pids *p; + char *filter; + char *str; + int len; + int ret; + int fd; + + if (is_guest(instance)) + return; + + fd = open_instance_fd(instance, "set_event_pid", + O_WRONLY | O_CLOEXEC | O_TRUNC); + if (fd < 0) + die("Failed to access set_event_pid"); + + len = instance->len_filter_pids + instance->nr_filter_pids; + filter = malloc(len); + if (!filter) + die("Failed to allocate pid filter"); + + str = filter; + + for (p = instance->filter_pids; p; p = p->next) { + if (p->exclude) + continue; + len = sprintf(str, "%d ", p->pid); + str += len; + } + + if (filter == str) + goto out; + + len = str - filter; + str = filter; + do { + ret = write(fd, str, len); + if (ret < 0) + die("Failed to write to set_event_pid"); + str += ret; + len -= ret; + } while (ret >= 0 && len); + + out: + close(fd); +} + +static void update_pid_event_filters(struct buffer_instance *instance) +{ + if (instance->have_set_event_pid) + return update_pid_filters(instance); + /* + * Also make sure that the sched_switch to this pid + * and wakeups of this pid are also traced. + * Only need to do this if the events are active. + */ + update_sched_event(instance, instance->sched_switch_event, "next_pid"); + update_sched_event(instance, instance->sched_wakeup_event, "pid"); + update_sched_event(instance, instance->sched_wakeup_new_event, "pid"); + + update_event_filters(instance); +} + +#define MASK_STR_MAX 4096 /* Don't expect more than 32768 CPUS */ + +static char *alloc_mask_from_hex(struct buffer_instance *instance, const char *str) +{ + char *cpumask; + + if (strcmp(str, "-1") == 0) { + /* set all CPUs */ + int bytes = (instance->cpu_count + 7) / 8; + int last = instance->cpu_count % 8; + int i; + + cpumask = malloc(MASK_STR_MAX); + if (!cpumask) + die("can't allocate cpumask"); + + if (bytes > (MASK_STR_MAX-1)) { + warning("cpumask can't handle more than 32768 CPUS!"); + bytes = MASK_STR_MAX-1; + } + + sprintf(cpumask, "%x", (1 << last) - 1); + + for (i = 1; i < bytes; i++) + cpumask[i] = 'f'; + + cpumask[i+1] = 0; + } else { + cpumask = strdup(str); + if (!cpumask) + die("can't allocate cpumask"); + } + + return cpumask; +} + +static void set_mask(struct buffer_instance *instance) +{ + struct stat st; + char *path; + int fd; + int ret; + + if (is_guest(instance)) + return; + + if (!instance->cpumask) + return; + + path = tracefs_instance_get_file(instance->tracefs, "tracing_cpumask"); + if (!path) + die("could not allocate path"); + reset_save_file(path, RESET_DEFAULT_PRIO); + + ret = stat(path, &st); + if (ret < 0) { + warning("%s not found", path); + goto out; + } + + fd = open(path, O_WRONLY | O_TRUNC); + if (fd < 0) + die("could not open %s\n", path); + + write(fd, instance->cpumask, strlen(instance->cpumask)); + + close(fd); + out: + tracefs_put_tracing_file(path); + free(instance->cpumask); + instance->cpumask = NULL; +} + +static void enable_events(struct buffer_instance *instance) +{ + struct event_list *event; + + if (is_guest(instance)) + return; + + for (event = instance->events; event; event = event->next) { + if (!event->neg) + update_event(event, event->filter, 0, '1'); + } + + /* Now disable any events */ + for (event = instance->events; event; event = event->next) { + if (event->neg) + update_event(event, NULL, 0, '0'); + } +} + +void tracecmd_enable_events(void) +{ + enable_events(first_instance); +} + +static void set_clock(struct common_record_context *ctx, struct buffer_instance *instance) +{ + const char *clock; + char *path; + char *content; + char *str; + + if (is_guest(instance)) + return; + + if (instance->clock) + clock = instance->clock; + else + clock = ctx->clock; + + if (!clock) + return; + + /* The current clock is in brackets, reset it when we are done */ + content = tracefs_instance_file_read(instance->tracefs, + "trace_clock", NULL); + + /* check if first clock is set */ + if (*content == '[') + str = strtok(content+1, "]"); + else { + str = strtok(content, "["); + if (!str) + die("Can not find clock in trace_clock"); + str = strtok(NULL, "]"); + } + path = tracefs_instance_get_file(instance->tracefs, "trace_clock"); + add_reset_file(path, str, RESET_DEFAULT_PRIO); + + free(content); + tracefs_put_tracing_file(path); + + tracefs_instance_file_write(instance->tracefs, + "trace_clock", clock); +} + +static void set_max_graph_depth(struct buffer_instance *instance, char *max_graph_depth) +{ + char *path; + int ret; + + if (is_guest(instance)) + return; + + path = tracefs_instance_get_file(instance->tracefs, "max_graph_depth"); + reset_save_file(path, RESET_DEFAULT_PRIO); + tracefs_put_tracing_file(path); + ret = tracefs_instance_file_write(instance->tracefs, "max_graph_depth", + max_graph_depth); + if (ret < 0) + die("could not write to max_graph_depth"); +} + +static bool check_file_in_dir(char *dir, char *file) +{ + struct stat st; + char *path; + int ret; + + ret = asprintf(&path, "%s/%s", dir, file); + if (ret < 0) + die("Failed to allocate id file path for %s/%s", dir, file); + ret = stat(path, &st); + free(path); + if (ret < 0 || S_ISDIR(st.st_mode)) + return false; + return true; +} + +/** + * create_event - create and event descriptor + * @instance: instance to use + * @path: path to event attribute + * @old_event: event descriptor to use as base + * + * NOTE: the function purpose is to create a data structure to describe + * an ftrace event. During the process it becomes handy to change the + * string `path`. So, do not rely on the content of `path` after you + * invoke this function. + */ +static struct event_list * +create_event(struct buffer_instance *instance, char *path, struct event_list *old_event) +{ + struct event_list *event; + struct stat st; + char *path_dirname; + char *p; + int ret; + + event = malloc(sizeof(*event)); + if (!event) + die("Failed to allocate event"); + *event = *old_event; + add_event(instance, event); + + if (event->filter || filter_task || instance->filter_pids) { + event->filter_file = strdup(path); + if (!event->filter_file) + die("malloc filter file"); + } + + path_dirname = dirname(path); + + ret = asprintf(&p, "%s/enable", path_dirname); + if (ret < 0) + die("Failed to allocate enable path for %s", path); + ret = stat(p, &st); + if (ret >= 0) + event->enable_file = p; + else + free(p); + + if (old_event->trigger) { + if (check_file_in_dir(path_dirname, "trigger")) { + event->trigger = strdup(old_event->trigger); + ret = asprintf(&p, "%s/trigger", path_dirname); + if (ret < 0) + die("Failed to allocate trigger path for %s", path); + event->trigger_file = p; + } else { + /* Check if this is event or system. + * Systems do not have trigger files by design + */ + if (check_file_in_dir(path_dirname, "id")) + die("trigger specified but not supported by this kernel"); + } + } + + return event; +} + +static void make_sched_event(struct buffer_instance *instance, + struct event_list **event, struct event_list *sched, + const char *sched_path) +{ + char *path_dirname; + char *tmp_file; + char *path; + int ret; + + /* Do nothing if the event already exists */ + if (*event) + return; + + /* we do not want to corrupt sched->filter_file when using dirname() */ + tmp_file = strdup(sched->filter_file); + if (!tmp_file) + die("Failed to allocate path for %s", sched_path); + path_dirname = dirname(tmp_file); + + ret = asprintf(&path, "%s/%s/filter", path_dirname, sched_path); + free(tmp_file); + if (ret < 0) + die("Failed to allocate path for %s", sched_path); + + *event = create_event(instance, path, sched); + free(path); +} + +static void test_event(struct event_list *event, const char *path, + const char *name, struct event_list **save, int len) +{ + path += len - strlen(name); + + if (strcmp(path, name) != 0) + return; + + *save = event; +} + +static void print_event(const char *fmt, ...) +{ + va_list ap; + + if (!show_status) + return; + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + + printf("\n"); +} + + +static int expand_event_files(struct buffer_instance *instance, + const char *file, struct event_list *old_event) +{ + struct event_list **save_event_tail = instance->event_next; + struct event_list *sched_event = NULL; + struct event_list *event; + glob_t globbuf; + char *path; + char *p; + int ret; + int i; + + ret = asprintf(&p, "events/%s/filter", file); + if (ret < 0) + die("Failed to allocate event filter path for %s", file); + + path = tracefs_instance_get_file(instance->tracefs, p); + + globbuf.gl_offs = 0; + ret = glob(path, 0, NULL, &globbuf); + tracefs_put_tracing_file(path); + free(p); + + if (ret < 0) + die("No filters found"); + + for (i = 0; i < globbuf.gl_pathc; i++) { + int len; + + path = globbuf.gl_pathv[i]; + + event = create_event(instance, path, old_event); + print_event("%s\n", path); + + len = strlen(path); + + test_event(event, path, "sched", &sched_event, len); + test_event(event, path, "sched/sched_switch", &instance->sched_switch_event, len); + test_event(event, path, "sched/sched_wakeup_new", &instance->sched_wakeup_new_event, len); + test_event(event, path, "sched/sched_wakeup", &instance->sched_wakeup_event, len); + } + + if (sched_event && sched_event->filter_file) { + /* make sure all sched events exist */ + make_sched_event(instance, &instance->sched_switch_event, + sched_event, "sched_switch"); + make_sched_event(instance, &instance->sched_wakeup_event, + sched_event, "sched_wakeup"); + make_sched_event(instance, &instance->sched_wakeup_new_event, + sched_event, "sched_wakeup_new"); + + } + + + globfree(&globbuf); + + /* If the event list tail changed, that means events were added */ + return save_event_tail == instance->event_next; +} + +static int expand_events_all(struct buffer_instance *instance, + char *system_name, char *event_name, + struct event_list *event) +{ + char *name; + int ret; + + ret = asprintf(&name, "%s/%s", system_name, event_name); + if (ret < 0) + die("Failed to allocate system/event for %s/%s", + system_name, event_name); + ret = expand_event_files(instance, name, event); + free(name); + + return ret; +} + +static void expand_event(struct buffer_instance *instance, struct event_list *event) +{ + const char *name = event->event; + char *str; + char *ptr; + int ret; + + /* + * We allow the user to use "all" to enable all events. + * Expand event_selection to all systems. + */ + if (strcmp(name, "all") == 0) { + expand_event_files(instance, "*", event); + return; + } + + str = strdup(name); + if (!str) + die("Failed to allocate %s string", name); + + ptr = strchr(str, ':'); + if (ptr) { + *ptr = '\0'; + ptr++; + + if (strlen(ptr)) + ret = expand_events_all(instance, str, ptr, event); + else + ret = expand_events_all(instance, str, "*", event); + + if (!ignore_event_not_found && ret) + die("No events enabled with %s", name); + + goto out; + } + + /* No ':' so enable all matching systems and events */ + ret = expand_event_files(instance, str, event); + ret &= expand_events_all(instance, "*", str, event); + if (event->trigger) + ret &= expand_events_all(instance, str, "*", event); + + if (!ignore_event_not_found && ret) + die("No events enabled with %s", name); + +out: + free(str); +} + +static void expand_event_instance(struct buffer_instance *instance) +{ + struct event_list *compressed_list = instance->events; + struct event_list *event; + + if (is_guest(instance)) + return; + + reset_event_list(instance); + + while (compressed_list) { + event = compressed_list; + compressed_list = event->next; + expand_event(instance, event); + free(event->trigger); + free(event); + } +} + +static void expand_event_list(void) +{ + struct buffer_instance *instance; + + if (use_old_event_method()) + return; + + for_all_instances(instance) + expand_event_instance(instance); +} + +static void finish(int sig) +{ + /* all done */ + if (recorder) + tracecmd_stop_recording(recorder); + finished = 1; +} + +static struct addrinfo *do_getaddrinfo(const char *host, unsigned int port, + enum port_type type) +{ + struct addrinfo *results; + struct addrinfo hints; + char buf[BUFSIZ]; + int s; + + snprintf(buf, BUFSIZ, "%u", port); + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = type == USE_TCP ? SOCK_STREAM : SOCK_DGRAM; + + s = getaddrinfo(host, buf, &hints, &results); + if (s != 0) { + gai_err = gai_strerror(s); + return NULL; + } + + dprint("Attached port %s: %d to results: %p\n", + type == USE_TCP ? "TCP" : "UDP", port, results); + + return results; +} + +static int connect_addr(struct addrinfo *results) +{ + struct addrinfo *rp; + int sfd = -1; + + for (rp = results; rp != NULL; rp = rp->ai_next) { + sfd = socket(rp->ai_family, rp->ai_socktype, + rp->ai_protocol); + if (sfd == -1) + continue; + if (connect(sfd, rp->ai_addr, rp->ai_addrlen) != -1) + break; + close(sfd); + } + + if (rp == NULL) + return -1; + + dprint("connect results: %p with fd: %d\n", results, sfd); + + return sfd; +} + +static int connect_port(const char *host, unsigned int port, enum port_type type) +{ + struct addrinfo *results; + int sfd; + + if (type == USE_VSOCK) + return trace_vsock_open(atoi(host), port); + + results = do_getaddrinfo(host, port, type); + + if (!results) + die("connecting to %s server %s:%u", + type == USE_TCP ? "TCP" : "UDP", host, port); + + sfd = connect_addr(results); + + freeaddrinfo(results); + + if (sfd < 0) + die("Can not connect to %s server %s:%u", + type == USE_TCP ? "TCP" : "UDP", host, port); + + return sfd; +} + +static int do_accept(int sd) +{ + int cd; + + for (;;) { + dprint("Wait on accept: %d\n", sd); + cd = accept(sd, NULL, NULL); + dprint("accepted: %d\n", cd); + if (cd < 0) { + if (errno == EINTR) + continue; + die("accept"); + } + + return cd; + } + + return -1; +} + +/* Find all the tasks associated with the guest pid */ +static void find_tasks(struct trace_guest *guest) +{ + struct dirent *dent; + char *path; + DIR *dir; + int ret; + int tasks = 0; + + ret = asprintf(&path, "/proc/%d/task", guest->pid); + if (ret < 0) + return; + + dir = opendir(path); + free(path); + if (!dir) + return; + + while ((dent = readdir(dir))) { + int *pids; + if (!(dent->d_type == DT_DIR && is_digits(dent->d_name))) + continue; + pids = realloc(guest->task_pids, sizeof(int) * (tasks + 2)); + if (!pids) + break; + pids[tasks++] = strtol(dent->d_name, NULL, 0); + pids[tasks] = -1; + guest->task_pids = pids; + } + closedir(dir); +} + +static char *parse_guest_name(char *gname, int *cid, int *port, + struct addrinfo **res) +{ + struct trace_guest *guest = NULL; + struct addrinfo *result; + char *ip = NULL; + char *p; + + *res = NULL; + + *port = -1; + for (p = gname + strlen(gname); p > gname; p--) { + if (*p == ':') + break; + } + if (p > gname) { + *p = '\0'; + *port = atoi(p + 1); + } + + *cid = -1; + p = strrchr(gname, '@'); + if (p) { + *p = '\0'; + *cid = atoi(p + 1); + } else if (is_digits(gname)) { + *cid = atoi(gname); + } else { + /* Check if this is an IP address */ + if (strstr(gname, ":") || strstr(gname, ".")) + ip = gname; + } + + if (!ip && *cid < 0) + read_qemu_guests(); + + if (!ip) + guest = trace_get_guest(*cid, gname); + if (guest) { + *cid = guest->cid; + /* Mapping not found, search for them */ + if (!guest->cpu_pid) + find_tasks(guest); + return guest->name; + } + + /* Test to see if this is an internet address */ + result = do_getaddrinfo(gname, *port, USE_TCP); + if (!result) + return NULL; + + *res = result; + + return gname; +} + +static void set_prio(int prio) +{ + struct sched_param sp; + + memset(&sp, 0, sizeof(sp)); + sp.sched_priority = prio; + if (sched_setscheduler(0, SCHED_FIFO, &sp) < 0) + warning("failed to set priority"); +} + +static struct tracecmd_recorder * +create_recorder_instance_pipe(struct buffer_instance *instance, + int cpu, int *brass) +{ + struct tracecmd_recorder *recorder; + unsigned flags = recorder_flags | TRACECMD_RECORD_BLOCK_SPLICE; + char *path; + + path = tracefs_instance_get_dir(instance->tracefs); + + if (!path) + die("malloc"); + + /* This is already the child */ + close(brass[0]); + + recorder = tracecmd_create_buffer_recorder_fd(brass[1], cpu, flags, path); + + tracefs_put_tracing_file(path); + + return recorder; +} + +static struct tracecmd_recorder * +create_recorder_instance(struct buffer_instance *instance, const char *file, int cpu, + int *brass) +{ + struct tracecmd_recorder *record; + struct addrinfo *result; + char *path; + + if (is_guest(instance)) { + int fd; + unsigned int flags; + + if (instance->use_fifos) + fd = instance->fds[cpu]; + else if (is_network(instance)) { + result = do_getaddrinfo(instance->name, + instance->client_ports[cpu], + instance->port_type); + if (!result) + die("Failed to connect to %s port %d\n", + instance->name, + instance->client_ports[cpu]); + fd = connect_addr(result); + freeaddrinfo(result); + } else + fd = trace_vsock_open(instance->cid, instance->client_ports[cpu]); + if (fd < 0) + die("Failed to connect to agent"); + + flags = recorder_flags; + if (instance->use_fifos) + flags |= TRACECMD_RECORD_NOBRASS; + else if (!trace_vsock_can_splice_read()) + flags |= TRACECMD_RECORD_NOSPLICE; + return tracecmd_create_recorder_virt(file, cpu, flags, fd); + } + + if (brass) + return create_recorder_instance_pipe(instance, cpu, brass); + + if (!tracefs_instance_get_name(instance->tracefs)) + return tracecmd_create_recorder_maxkb(file, cpu, recorder_flags, max_kb); + + path = tracefs_instance_get_dir(instance->tracefs); + + record = tracecmd_create_buffer_recorder_maxkb(file, cpu, recorder_flags, + path, max_kb); + tracefs_put_tracing_file(path); + + return record; +} + +/* + * If extract is set, then this is going to set up the recorder, + * connections and exit as the tracing is serialized by a single thread. + */ +static int create_recorder(struct buffer_instance *instance, int cpu, + enum trace_type type, int *brass) +{ + long ret; + char *file; + pid_t pid; + + if (type != TRACE_TYPE_EXTRACT) { + + pid = fork(); + if (pid < 0) + die("fork"); + + if (pid) + return pid; + + signal(SIGINT, SIG_IGN); + signal(SIGUSR1, finish); + + if (rt_prio) + set_prio(rt_prio); + + /* do not kill tasks on error */ + instance->cpu_count = 0; + } + + if ((instance->client_ports && !is_guest(instance)) || is_agent(instance)) { + unsigned int flags = recorder_flags; + char *path = NULL; + int fd; + + if (is_agent(instance)) { + if (instance->use_fifos) + fd = instance->fds[cpu]; + else { + again: + fd = do_accept(instance->fds[cpu]); + if (instance->host && + !trace_net_cmp_connection_fd(fd, instance->host)) { + dprint("Client does not match '%s' for cpu:%d\n", + instance->host, cpu); + goto again; + } + } + } else { + fd = connect_port(host, instance->client_ports[cpu], + instance->port_type); + } + if (fd < 0) + die("Failed connecting to client"); + if (tracefs_instance_get_name(instance->tracefs) && !is_agent(instance)) { + path = tracefs_instance_get_dir(instance->tracefs); + } else { + const char *dir = tracefs_tracing_dir(); + + if (dir) + path = strdup(dir); + } + if (!path) + die("can't get the tracing directory"); + + recorder = tracecmd_create_buffer_recorder_fd(fd, cpu, flags, path); + tracefs_put_tracing_file(path); + } else { + file = get_temp_file(instance, cpu); + recorder = create_recorder_instance(instance, file, cpu, brass); + put_temp_file(file); + } + + if (!recorder) + die ("can't create recorder"); + + if (type == TRACE_TYPE_EXTRACT) { + ret = tracecmd_flush_recording(recorder); + tracecmd_free_recorder(recorder); + recorder = NULL; + return ret; + } + + while (!finished) { + if (tracecmd_start_recording(recorder, sleep_time) < 0) + break; + } + tracecmd_free_recorder(recorder); + recorder = NULL; + + exit(0); +} + +static void check_first_msg_from_server(struct tracecmd_msg_handle *msg_handle) +{ + char buf[BUFSIZ]; + + read(msg_handle->fd, buf, 8); + + /* Make sure the server is the tracecmd server */ + if (memcmp(buf, "tracecmd", 8) != 0) + die("server not tracecmd server"); +} + +static void communicate_with_listener_v1(struct tracecmd_msg_handle *msg_handle, + struct buffer_instance *instance) +{ + unsigned int *client_ports; + char buf[BUFSIZ]; + ssize_t n; + int cpu, i; + + check_first_msg_from_server(msg_handle); + + /* write the number of CPUs we have (in ASCII) */ + sprintf(buf, "%d", local_cpu_count); + + /* include \0 */ + write(msg_handle->fd, buf, strlen(buf)+1); + + /* write the pagesize (in ASCII) */ + sprintf(buf, "%d", page_size); + + /* include \0 */ + write(msg_handle->fd, buf, strlen(buf)+1); + + /* + * If we are using IPV4 and our page size is greater than + * or equal to 64K, we need to punt and use TCP. :-( + */ + + /* TODO, test for ipv4 */ + if (page_size >= UDP_MAX_PACKET) { + warning("page size too big for UDP using TCP in live read"); + instance->port_type = USE_TCP; + msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP; + } + + if (instance->port_type == USE_TCP) { + /* Send one option */ + write(msg_handle->fd, "1", 2); + /* Size 4 */ + write(msg_handle->fd, "4", 2); + /* use TCP */ + write(msg_handle->fd, "TCP", 4); + } else + /* No options */ + write(msg_handle->fd, "0", 2); + + client_ports = malloc(local_cpu_count * sizeof(*client_ports)); + if (!client_ports) + die("Failed to allocate client ports for %d cpus", local_cpu_count); + + /* + * Now we will receive back a comma deliminated list + * of client ports to connect to. + */ + for (cpu = 0; cpu < local_cpu_count; cpu++) { + for (i = 0; i < BUFSIZ; i++) { + n = read(msg_handle->fd, buf+i, 1); + if (n != 1) + die("Error, reading server ports"); + if (!buf[i] || buf[i] == ',') + break; + } + if (i == BUFSIZ) + die("read bad port number"); + buf[i] = 0; + client_ports[cpu] = atoi(buf); + } + + instance->client_ports = client_ports; +} + +static void communicate_with_listener_v3(struct tracecmd_msg_handle *msg_handle, + unsigned int **client_ports) +{ + if (tracecmd_msg_send_init_data(msg_handle, client_ports) < 0) + die("Cannot communicate with server"); +} + +static void check_protocol_version(struct tracecmd_msg_handle *msg_handle) +{ + char buf[BUFSIZ]; + int fd = msg_handle->fd; + int n; + + check_first_msg_from_server(msg_handle); + + /* + * Write the protocol version, the magic number, and the dummy + * option(0) (in ASCII). The client understands whether the client + * uses the v3 protocol or not by checking a reply message from the + * server. If the message is "V3", the server uses v3 protocol. On the + * other hands, if the message is just number strings, the server + * returned port numbers. So, in that time, the client understands the + * server uses the v1 protocol. However, the old server tells the + * client port numbers after reading cpu_count, page_size, and option. + * So, we add the dummy number (the magic number and 0 option) to the + * first client message. + */ + write(fd, V3_CPU, sizeof(V3_CPU)); + + buf[0] = 0; + + /* read a reply message */ + n = read(fd, buf, BUFSIZ); + + if (n < 0 || !buf[0]) { + /* the server uses the v1 protocol, so we'll use it */ + msg_handle->version = V1_PROTOCOL; + tracecmd_plog("Use the v1 protocol\n"); + } else { + if (memcmp(buf, "V3", n) != 0) + die("Cannot handle the protocol %s", buf); + /* OK, let's use v3 protocol */ + write(fd, V3_MAGIC, sizeof(V3_MAGIC)); + + n = read(fd, buf, BUFSIZ - 1); + if (n != 2 || memcmp(buf, "OK", 2) != 0) { + if (n < 0) + n = 0; + buf[n] = 0; + die("Cannot handle the protocol %s", buf); + } + } +} + +static int connect_vsock(char *vhost) +{ + char *cid; + char *port; + char *p; + int sd; + + host = strdup(vhost); + if (!host) + die("alloctating server"); + + cid = strtok_r(host, ":", &p); + port = strtok_r(NULL, "", &p); + + if (!port) + die("vsocket must have format of 'CID:PORT'"); + + sd = trace_vsock_open(atoi(cid), atoi(port)); + + return sd; +} + +static int connect_ip(char *thost) +{ + struct addrinfo *result; + int sfd; + char *server; + char *port; + char *p; + + if (!strchr(host, ':')) { + server = strdup("localhost"); + if (!server) + die("alloctating server"); + port = thost; + host = server; + } else { + host = strdup(thost); + if (!host) + die("alloctating server"); + server = strtok_r(host, ":", &p); + port = strtok_r(NULL, ":", &p); + } + + result = do_getaddrinfo(server, atoi(port), USE_TCP); + if (!result) + die("getaddrinfo: %s", gai_err); + + sfd = connect_addr(result); + + freeaddrinfo(result); + + if (sfd < 0) + die("Can not connect to %s:%s", server, port); + + return sfd; +} + +static struct tracecmd_msg_handle *setup_network(struct buffer_instance *instance) +{ + struct tracecmd_msg_handle *msg_handle = NULL; + enum port_type type = instance->port_type; + int sfd; + +again: + switch (type) { + case USE_VSOCK: + sfd = connect_vsock(host); + break; + default: + sfd = connect_ip(host); + } + + if (sfd < 0) + return NULL; + + if (msg_handle) { + msg_handle->fd = sfd; + } else { + msg_handle = tracecmd_msg_handle_alloc(sfd, 0); + if (!msg_handle) + die("Failed to allocate message handle"); + + msg_handle->cpu_count = local_cpu_count; + msg_handle->version = V3_PROTOCOL; + } + + switch (type) { + case USE_TCP: + msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP; + break; + case USE_VSOCK: + msg_handle->flags |= TRACECMD_MSG_FL_USE_VSOCK; + break; + default: + break; + } + + if (msg_handle->version == V3_PROTOCOL) { + check_protocol_version(msg_handle); + if (msg_handle->version == V1_PROTOCOL) { + /* reconnect to the server for using the v1 protocol */ + close(sfd); + free(host); + goto again; + } + communicate_with_listener_v3(msg_handle, &instance->client_ports); + } + + if (msg_handle->version == V1_PROTOCOL) + communicate_with_listener_v1(msg_handle, instance); + + return msg_handle; +} + +static void add_options(struct tracecmd_output *handle, struct common_record_context *ctx); + +static struct tracecmd_output *create_net_output(struct common_record_context *ctx, + struct tracecmd_msg_handle *msg_handle) +{ + struct tracecmd_output *out; + + out = tracecmd_output_create(NULL); + if (!out) + return NULL; + if (ctx->file_version && tracecmd_output_set_version(out, ctx->file_version)) + goto error; + if (tracecmd_output_set_msg(out, msg_handle)) + goto error; + + if (ctx->compression) { + if (tracecmd_output_set_compression(out, ctx->compression)) + goto error; + } else if (ctx->file_version >= FILE_VERSION_COMPRESSION) { + tracecmd_output_set_compression(out, "any"); + } + + if (tracecmd_output_write_headers(out, listed_events)) + goto error; + + return out; +error: + tracecmd_output_close(out); + return NULL; +} + +static struct tracecmd_msg_handle * +setup_connection(struct buffer_instance *instance, struct common_record_context *ctx) +{ + struct tracecmd_msg_handle *msg_handle = NULL; + struct tracecmd_output *network_handle = NULL; + int ret; + + msg_handle = setup_network(instance); + if (!msg_handle) + die("Failed to make connection"); + + /* Now create the handle through this socket */ + if (msg_handle->version == V3_PROTOCOL) { + network_handle = create_net_output(ctx, msg_handle); + if (!network_handle) + goto error; + tracecmd_set_quiet(network_handle, quiet); + add_options(network_handle, ctx); + ret = tracecmd_write_cmdlines(network_handle); + if (ret) + goto error; + ret = tracecmd_write_cpus(network_handle, instance->cpu_count); + if (ret) + goto error; + ret = tracecmd_write_buffer_info(network_handle); + if (ret) + goto error; + ret = tracecmd_write_options(network_handle); + if (ret) + goto error; + ret = tracecmd_msg_finish_sending_data(msg_handle); + if (ret) + goto error; + } else { + network_handle = tracecmd_output_create_fd(msg_handle->fd); + if (!network_handle) + goto error; + if (tracecmd_output_set_version(network_handle, ctx->file_version)) + goto error; + + if (ctx->compression) { + if (tracecmd_output_set_compression(network_handle, ctx->compression)) + goto error; + } else if (ctx->file_version >= FILE_VERSION_COMPRESSION) { + tracecmd_output_set_compression(network_handle, "any"); + } + + if (tracecmd_output_write_headers(network_handle, listed_events)) + goto error; + tracecmd_set_quiet(network_handle, quiet); + } + + instance->network_handle = network_handle; + + /* OK, we are all set, let'r rip! */ + return msg_handle; + +error: + if (msg_handle) + tracecmd_msg_handle_close(msg_handle); + if (network_handle) + tracecmd_output_close(network_handle); + return NULL; +} + +static void finish_network(struct tracecmd_msg_handle *msg_handle) +{ + if (msg_handle->version == V3_PROTOCOL) + tracecmd_msg_send_close_msg(msg_handle); + tracecmd_msg_handle_close(msg_handle); + free(host); +} + +static int open_guest_fifos(const char *guest, int **fds) +{ + char path[PATH_MAX]; + int i, fd, flags; + + for (i = 0; ; i++) { + snprintf(path, sizeof(path), GUEST_FIFO_FMT ".out", guest, i); + + /* O_NONBLOCK so we don't wait for writers */ + fd = open(path, O_RDONLY | O_NONBLOCK); + if (fd < 0) + break; + + /* Success, now clear O_NONBLOCK */ + flags = fcntl(fd, F_GETFL); + fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); + + *fds = realloc(*fds, i + 1); + (*fds)[i] = fd; + } + + return i; +} + +struct trace_mapping { + struct tep_event *kvm_entry; + struct tep_format_field *vcpu_id; + struct tep_format_field *common_pid; + int *pids; + int *map; + int max_cpus; +}; + +static void start_mapping_vcpus(struct trace_guest *guest) +{ + char *pids = NULL; + char *t; + int len = 0; + int s; + int i; + + if (!guest->task_pids) + return; + + guest->instance = tracefs_instance_create("map_guest_pids"); + if (!guest->instance) + return; + + for (i = 0; guest->task_pids[i] >= 0; i++) { + s = snprintf(NULL, 0, "%d ", guest->task_pids[i]); + t = realloc(pids, len + s + 1); + if (!t) { + free(pids); + pids = NULL; + break; + } + pids = t; + sprintf(pids + len, "%d ", guest->task_pids[i]); + len += s; + } + if (pids) { + tracefs_instance_file_write(guest->instance, "set_event_pid", pids); + free(pids); + } + tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "1"); +} + +static int map_vcpus(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_mapping *tmap = context; + unsigned long long val; + int type; + int pid; + int ret; + int i; + + /* Do we have junk in the buffer? */ + type = tep_data_type(event->tep, record); + if (type != tmap->kvm_entry->id) + return 0; + + ret = tep_read_number_field(tmap->common_pid, record->data, &val); + if (ret < 0) + return 0; + pid = (int)val; + + for (i = 0; tmap->pids[i] >= 0; i++) { + if (pid == tmap->pids[i]) + break; + } + /* Is this thread one we care about ? */ + if (tmap->pids[i] < 0) + return 0; + + ret = tep_read_number_field(tmap->vcpu_id, record->data, &val); + if (ret < 0) + return 0; + + cpu = (int)val; + + /* Sanity check, warn? */ + if (cpu >= tmap->max_cpus) + return 0; + + /* Already have this one? Should we check if it is the same? */ + if (tmap->map[cpu] >= 0) + return 0; + + tmap->map[cpu] = pid; + + /* Did we get them all */ + for (i = 0; i < tmap->max_cpus; i++) { + if (tmap->map[i] < 0) + break; + } + + return i == tmap->max_cpus; +} + +static void stop_mapping_vcpus(struct buffer_instance *instance, + struct trace_guest *guest) +{ + struct trace_mapping tmap = { }; + struct tep_handle *tep; + const char *systems[] = { "kvm", NULL }; + int i; + + if (!guest->instance) + return; + + tmap.pids = guest->task_pids; + tmap.max_cpus = instance->cpu_count; + + tmap.map = malloc(sizeof(*tmap.map) * tmap.max_cpus); + if (!tmap.map) + return; + + for (i = 0; i < tmap.max_cpus; i++) + tmap.map[i] = -1; + + tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "0"); + + tep = tracefs_local_events_system(NULL, systems); + if (!tep) + goto out; + + tmap.kvm_entry = tep_find_event_by_name(tep, "kvm", "kvm_entry"); + if (!tmap.kvm_entry) + goto out_free; + + tmap.vcpu_id = tep_find_field(tmap.kvm_entry, "vcpu_id"); + if (!tmap.vcpu_id) + goto out_free; + + tmap.common_pid = tep_find_any_field(tmap.kvm_entry, "common_pid"); + if (!tmap.common_pid) + goto out_free; + + tracefs_iterate_raw_events(tep, guest->instance, NULL, 0, map_vcpus, &tmap); + + for (i = 0; i < tmap.max_cpus; i++) { + if (tmap.map[i] < 0) + break; + } + /* We found all the mapped CPUs */ + if (i == tmap.max_cpus) { + guest->cpu_pid = tmap.map; + guest->cpu_max = tmap.max_cpus; + tmap.map = NULL; + } + + out_free: + tep_free(tep); + out: + free(tmap.map); + tracefs_instance_destroy(guest->instance); + tracefs_instance_free(guest->instance); +} + +static int host_tsync(struct common_record_context *ctx, + struct buffer_instance *instance, + unsigned int tsync_port, char *proto) +{ + struct trace_guest *guest; + int guest_pid = -1; + int fd; + + if (!proto) + return -1; + + if (is_network(instance)) { + fd = connect_port(instance->name, tsync_port, + instance->port_type); + } else { + guest = trace_get_guest(instance->cid, NULL); + if (guest == NULL) + return -1; + + guest_pid = guest->pid; + start_mapping_vcpus(guest); + fd = trace_vsock_open(instance->cid, tsync_port); + } + + instance->tsync = tracecmd_tsync_with_guest(top_instance.trace_id, + instance->tsync_loop_interval, + fd, guest_pid, + instance->cpu_count, + proto, ctx->clock); + if (!is_network(instance)) + stop_mapping_vcpus(instance, guest); + + if (!instance->tsync) + return -1; + + return 0; +} + +static void connect_to_agent(struct common_record_context *ctx, + struct buffer_instance *instance) +{ + struct tracecmd_tsync_protos *protos = NULL; + int sd, ret, nr_fifos, nr_cpus, page_size; + struct tracecmd_msg_handle *msg_handle; + enum tracecmd_time_sync_role role; + char *tsync_protos_reply = NULL; + unsigned int tsync_port = 0; + unsigned int *ports; + int i, *fds = NULL; + bool use_fifos = false; + + if (!no_fifos) { + nr_fifos = open_guest_fifos(instance->name, &fds); + use_fifos = nr_fifos > 0; + } + + if (ctx->instance->result) { + role = TRACECMD_TIME_SYNC_ROLE_CLIENT; + sd = connect_addr(ctx->instance->result); + if (sd < 0) + die("Failed to connect to host %s:%u", + instance->name, instance->port); + } else { + role = TRACECMD_TIME_SYNC_ROLE_HOST; + sd = trace_vsock_open(instance->cid, instance->port); + if (sd < 0) + die("Failed to connect to vsocket @%u:%u", + instance->cid, instance->port); + } + + msg_handle = tracecmd_msg_handle_alloc(sd, 0); + if (!msg_handle) + die("Failed to allocate message handle"); + + if (!instance->clock) + instance->clock = tracefs_get_clock(NULL); + + if (instance->tsync_loop_interval >= 0) + tracecmd_tsync_proto_getall(&protos, instance->clock, role); + + ret = tracecmd_msg_send_trace_req(msg_handle, instance->argc, + instance->argv, use_fifos, + top_instance.trace_id, protos); + if (ret < 0) + die("Failed to send trace request"); + + if (protos) { + free(protos->names); + free(protos); + } + ret = tracecmd_msg_recv_trace_resp(msg_handle, &nr_cpus, &page_size, + &ports, &use_fifos, + &instance->trace_id, + &tsync_protos_reply, &tsync_port); + if (ret < 0) + die("Failed to receive trace response %d", ret); + if (tsync_protos_reply && tsync_protos_reply[0]) { + if (tsync_proto_is_supported(tsync_protos_reply)) { + printf("Negotiated %s time sync protocol with guest %s\n", + tsync_protos_reply, + instance->name); + instance->cpu_count = nr_cpus; + host_tsync(ctx, instance, tsync_port, tsync_protos_reply); + } else + warning("Failed to negotiate timestamps synchronization with the guest"); + } + free(tsync_protos_reply); + + if (use_fifos) { + if (nr_cpus != nr_fifos) { + warning("number of FIFOs (%d) for guest %s differs " + "from number of virtual CPUs (%d)", + nr_fifos, instance->name, nr_cpus); + nr_cpus = nr_cpus < nr_fifos ? nr_cpus : nr_fifos; + } + free(ports); + instance->fds = fds; + } else { + for (i = 0; i < nr_fifos; i++) + close(fds[i]); + free(fds); + instance->client_ports = ports; + } + + instance->use_fifos = use_fifos; + instance->cpu_count = nr_cpus; + + /* the msg_handle now points to the guest fd */ + instance->msg_handle = msg_handle; +} + +static void setup_guest(struct buffer_instance *instance) +{ + struct tracecmd_msg_handle *msg_handle = instance->msg_handle; + const char *output_file = instance->output_file; + char *file; + int fd; + + /* Create a place to store the guest meta data */ + file = trace_get_guest_file(output_file, instance->name); + if (!file) + die("Failed to allocate memory"); + + free(instance->output_file); + instance->output_file = file; + + fd = open(file, O_CREAT|O_WRONLY|O_TRUNC, 0644); + if (fd < 0) + die("Failed to open %s", file); + + /* Start reading tracing metadata */ + if (tracecmd_msg_read_data(msg_handle, fd)) + die("Failed receiving metadata"); + close(fd); +} + +static void setup_agent(struct buffer_instance *instance, + struct common_record_context *ctx) +{ + struct tracecmd_output *network_handle; + + network_handle = create_net_output(ctx, instance->msg_handle); + add_options(network_handle, ctx); + tracecmd_write_cmdlines(network_handle); + tracecmd_write_cpus(network_handle, instance->cpu_count); + tracecmd_write_buffer_info(network_handle); + tracecmd_write_options(network_handle); + tracecmd_write_meta_strings(network_handle); + tracecmd_msg_finish_sending_data(instance->msg_handle); + instance->network_handle = network_handle; +} + +void start_threads(enum trace_type type, struct common_record_context *ctx) +{ + struct buffer_instance *instance; + int total_cpu_count = 0; + int i = 0; + int ret; + + for_all_instances(instance) { + /* Start the connection now to find out how many CPUs we need */ + if (is_guest(instance)) + connect_to_agent(ctx, instance); + total_cpu_count += instance->cpu_count; + } + + /* make a thread for every CPU we have */ + pids = calloc(total_cpu_count * (buffers + 1), sizeof(*pids)); + if (!pids) + die("Failed to allocate pids for %d cpus", total_cpu_count); + + for_all_instances(instance) { + int *brass = NULL; + int x, pid; + + if (is_agent(instance)) { + setup_agent(instance, ctx); + } else if (is_guest(instance)) { + setup_guest(instance); + } else if (host) { + instance->msg_handle = setup_connection(instance, ctx); + if (!instance->msg_handle) + die("Failed to make connection"); + } + + for (x = 0; x < instance->cpu_count; x++) { + if (type & TRACE_TYPE_STREAM) { + brass = pids[i].brass; + ret = pipe(brass); + if (ret < 0) + die("pipe"); + pids[i].stream = trace_stream_init(instance, x, + brass[0], + instance->cpu_count, + hooks, handle_init, + ctx->global); + if (!pids[i].stream) + die("Creating stream for %d", i); + } else + pids[i].brass[0] = -1; + pids[i].cpu = x; + pids[i].instance = instance; + /* Make sure all output is flushed before forking */ + fflush(stdout); + pid = pids[i++].pid = create_recorder(instance, x, type, brass); + if (brass) + close(brass[1]); + if (pid > 0) + add_filter_pid(instance, pid, 1); + } + } + recorder_threads = i; +} + +static void touch_file(const char *file) +{ + int fd; + + fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + die("could not create file %s\n", file); + close(fd); +} + +static void append_buffer(struct tracecmd_output *handle, + struct buffer_instance *instance, + char **temp_files) +{ + int cpu_count = instance->cpu_count; + int i; + + /* + * Since we can record remote and virtual machines in the same file + * as the host, the buffers may no longer have matching number of + * CPU data as the host. For backward compatibility for older + * trace-cmd versions, which will blindly read the number of CPUs + * for each buffer instance as there are for the host, if there are + * fewer CPUs on the remote machine than on the host, an "empty" + * CPU is needed for each CPU that the host has that the remote does + * not. If there are more CPUs on the remote, older executables will + * simply ignore them (which is OK, we only need to guarantee that + * old executables don't crash). + */ + if (instance->cpu_count < local_cpu_count) + cpu_count = local_cpu_count; + + for (i = 0; i < cpu_count; i++) { + temp_files[i] = get_temp_file(instance, i); + if (i >= instance->cpu_count) + touch_file(temp_files[i]); + } + + tracecmd_append_buffer_cpu_data(handle, tracefs_instance_get_name(instance->tracefs), + cpu_count, temp_files); + + for (i = 0; i < instance->cpu_count; i++) { + if (i >= instance->cpu_count) + delete_temp_file(instance, i); + put_temp_file(temp_files[i]); + } +} + +static void +add_guest_info(struct tracecmd_output *handle, struct buffer_instance *instance) +{ + struct trace_guest *guest; + const char *name; + char *buf, *p; + int size; + int pid; + int i; + + if (is_network(instance)) { + name = instance->name; + } else { + guest = trace_get_guest(instance->cid, NULL); + if (!guest) + return; + name = guest->name; + } + + size = strlen(name) + 1; + size += sizeof(long long); /* trace_id */ + size += sizeof(int); /* cpu count */ + size += instance->cpu_count * 2 * sizeof(int); /* cpu,pid pair */ + + buf = calloc(1, size); + if (!buf) + return; + p = buf; + strcpy(p, name); + p += strlen(name) + 1; + + memcpy(p, &instance->trace_id, sizeof(long long)); + p += sizeof(long long); + + memcpy(p, &instance->cpu_count, sizeof(int)); + p += sizeof(int); + for (i = 0; i < instance->cpu_count; i++) { + pid = -1; + if (!is_network(instance)) { + if (i < guest->cpu_max) + pid = guest->cpu_pid[i]; + } + memcpy(p, &i, sizeof(int)); + p += sizeof(int); + memcpy(p, &pid, sizeof(int)); + p += sizeof(int); + } + + tracecmd_add_option(handle, TRACECMD_OPTION_GUEST, size, buf); + free(buf); +} + +static void +add_pid_maps(struct tracecmd_output *handle, struct buffer_instance *instance) +{ + struct pid_addr_maps *maps = instance->pid_maps; + struct trace_seq s; + int i; + + trace_seq_init(&s); + while (maps) { + if (!maps->nr_lib_maps) { + maps = maps->next; + continue; + } + trace_seq_reset(&s); + trace_seq_printf(&s, "%x %x %s\n", + maps->pid, maps->nr_lib_maps, maps->proc_name); + for (i = 0; i < maps->nr_lib_maps; i++) + trace_seq_printf(&s, "%llx %llx %s\n", + maps->lib_maps[i].start, + maps->lib_maps[i].end, + maps->lib_maps[i].lib_name); + trace_seq_terminate(&s); + tracecmd_add_option(handle, TRACECMD_OPTION_PROCMAPS, + s.len + 1, s.buffer); + maps = maps->next; + } + trace_seq_destroy(&s); +} + +static void +add_trace_id(struct tracecmd_output *handle, struct buffer_instance *instance) +{ + tracecmd_add_option(handle, TRACECMD_OPTION_TRACEID, + sizeof(long long), &instance->trace_id); +} + +static void +add_buffer_stat(struct tracecmd_output *handle, struct buffer_instance *instance) +{ + struct trace_seq s; + int i; + + trace_seq_init(&s); + trace_seq_printf(&s, "\nBuffer: %s\n\n", + tracefs_instance_get_name(instance->tracefs)); + tracecmd_add_option(handle, TRACECMD_OPTION_CPUSTAT, + s.len+1, s.buffer); + trace_seq_destroy(&s); + + for (i = 0; i < instance->cpu_count; i++) + tracecmd_add_option(handle, TRACECMD_OPTION_CPUSTAT, + instance->s_save[i].len+1, + instance->s_save[i].buffer); +} + +static void add_option_hooks(struct tracecmd_output *handle) +{ + struct hook_list *hook; + int len; + + for (hook = hooks; hook; hook = hook->next) { + len = strlen(hook->hook); + tracecmd_add_option(handle, TRACECMD_OPTION_HOOK, + len + 1, hook->hook); + } +} + +static void add_uname(struct tracecmd_output *handle) +{ + struct utsname buf; + char *str; + int len; + int ret; + + ret = uname(&buf); + /* if this fails for some reason, just ignore it */ + if (ret < 0) + return; + + len = strlen(buf.sysname) + strlen(buf.nodename) + + strlen(buf.release) + strlen(buf.machine) + 4; + str = malloc(len); + if (!str) + return; + sprintf(str, "%s %s %s %s", buf.sysname, buf.nodename, buf.release, buf.machine); + tracecmd_add_option(handle, TRACECMD_OPTION_UNAME, len, str); + free(str); +} + +static void add_version(struct tracecmd_output *handle) +{ + char *str; + int len; + + len = asprintf(&str, "%s %s", VERSION_STRING, VERSION_GIT); + if (len < 0) + return; + + tracecmd_add_option(handle, TRACECMD_OPTION_VERSION, len+1, str); + free(str); +} + +static void print_stat(struct buffer_instance *instance) +{ + int cpu; + + if (quiet) + return; + + if (!is_top_instance(instance)) + printf("\nBuffer: %s\n\n", + tracefs_instance_get_name(instance->tracefs)); + + for (cpu = 0; cpu < instance->cpu_count; cpu++) + trace_seq_do_printf(&instance->s_print[cpu]); +} + +static char *get_trace_clock(bool selected) +{ + struct buffer_instance *instance; + + for_all_instances(instance) { + if (is_guest(instance)) + continue; + break; + } + + if (selected) + return tracefs_get_clock(instance ? instance->tracefs : NULL); + else + return tracefs_instance_file_read(instance ? instance->tracefs : NULL, + "trace_clock", NULL); +} + +enum { + DATA_FL_NONE = 0, + DATA_FL_DATE = 1, + DATA_FL_OFFSET = 2, + DATA_FL_GUEST = 4, +}; + +static void add_options(struct tracecmd_output *handle, struct common_record_context *ctx) +{ + int type = 0; + char *clocks; + + if (ctx->date2ts) { + if (ctx->data_flags & DATA_FL_DATE) + type = TRACECMD_OPTION_DATE; + else if (ctx->data_flags & DATA_FL_OFFSET) + type = TRACECMD_OPTION_OFFSET; + } + + if (type) + tracecmd_add_option(handle, type, strlen(ctx->date2ts)+1, ctx->date2ts); + + clocks = get_trace_clock(false); + tracecmd_add_option(handle, TRACECMD_OPTION_TRACECLOCK, + clocks ? strlen(clocks)+1 : 0, clocks); + add_option_hooks(handle); + add_uname(handle); + add_version(handle); + if (!no_top_instance()) + add_trace_id(handle, &top_instance); + free(clocks); +} + +static void write_guest_file(struct buffer_instance *instance) +{ + struct tracecmd_output *handle; + int cpu_count = instance->cpu_count; + char *file; + char **temp_files; + int i, fd; + + file = instance->output_file; + fd = open(file, O_RDWR); + if (fd < 0) + die("error opening %s", file); + + handle = tracecmd_get_output_handle_fd(fd); + if (!handle) + die("error writing to %s", file); + if (instance->flags & BUFFER_FL_TSC2NSEC) + tracecmd_set_out_clock(handle, TSCNSEC_CLOCK); + temp_files = malloc(sizeof(*temp_files) * cpu_count); + if (!temp_files) + die("failed to allocate temp_files for %d cpus", + cpu_count); + + for (i = 0; i < cpu_count; i++) { + temp_files[i] = get_temp_file(instance, i); + if (!temp_files[i]) + die("failed to allocate memory"); + } + + if (tracecmd_write_cpu_data(handle, cpu_count, temp_files, NULL) < 0) + die("failed to write CPU data"); + tracecmd_output_close(handle); + + for (i = 0; i < cpu_count; i++) + put_temp_file(temp_files[i]); + free(temp_files); +} + +static struct tracecmd_output *create_output(struct common_record_context *ctx) +{ + struct tracecmd_output *out; + + if (!ctx->output) + return NULL; + + out = tracecmd_output_create(ctx->output); + if (!out) + goto error; + if (ctx->file_version && tracecmd_output_set_version(out, ctx->file_version)) + goto error; + + if (ctx->compression) { + if (tracecmd_output_set_compression(out, ctx->compression)) + goto error; + } else if (ctx->file_version >= FILE_VERSION_COMPRESSION) { + tracecmd_output_set_compression(out, "any"); + } + + if (tracecmd_output_write_headers(out, listed_events)) + goto error; + + return out; +error: + if (out) + tracecmd_output_close(out); + unlink(ctx->output); + return NULL; +} + +static void record_data(struct common_record_context *ctx) +{ + struct tracecmd_output *handle; + struct buffer_instance *instance; + bool local = false; + int max_cpu_count = local_cpu_count; + char **temp_files; + int i; + + for_all_instances(instance) { + if (is_guest(instance)) + write_guest_file(instance); + else if (host && instance->msg_handle) + finish_network(instance->msg_handle); + else + local = true; + } + + if (!local) + return; + + if (latency) { + handle = tracecmd_create_file_latency(ctx->output, local_cpu_count, + ctx->file_version, ctx->compression); + tracecmd_set_quiet(handle, quiet); + } else { + if (!local_cpu_count) + return; + + /* Allocate enough temp files to handle each instance */ + for_all_instances(instance) { + if (instance->msg_handle) + continue; + if (instance->cpu_count > max_cpu_count) + max_cpu_count = instance->cpu_count; + } + + temp_files = malloc(sizeof(*temp_files) * max_cpu_count); + if (!temp_files) + die("Failed to allocate temp_files for %d cpus", + local_cpu_count); + + for (i = 0; i < max_cpu_count; i++) + temp_files[i] = get_temp_file(&top_instance, i); + + /* + * If top_instance was not used, we still need to create + * empty trace.dat files for it. + */ + if (no_top_instance() || top_instance.msg_handle) { + for (i = 0; i < local_cpu_count; i++) + touch_file(temp_files[i]); + } + + handle = create_output(ctx); + if (!handle) + die("Error creating output file"); + tracecmd_set_quiet(handle, quiet); + + add_options(handle, ctx); + + /* Only record the top instance under TRACECMD_OPTION_CPUSTAT*/ + if (!no_top_instance() && !top_instance.msg_handle) { + struct trace_seq *s = top_instance.s_save; + + for (i = 0; i < local_cpu_count; i++) + tracecmd_add_option(handle, TRACECMD_OPTION_CPUSTAT, + s[i].len+1, s[i].buffer); + } + + if (buffers) { + i = 0; + for_each_instance(instance) { + int cpus = instance->cpu_count != local_cpu_count ? + instance->cpu_count : 0; + + if (instance->msg_handle) + continue; + tracecmd_add_buffer_info(handle, + tracefs_instance_get_name(instance->tracefs), + cpus); + add_buffer_stat(handle, instance); + } + } + + if (!no_top_instance() && !top_instance.msg_handle) + print_stat(&top_instance); + + for_all_instances(instance) { + add_pid_maps(handle, instance); + } + + for_all_instances(instance) { + if (is_guest(instance)) + add_guest_info(handle, instance); + } + + if (ctx->tsc2nsec.mult) { + add_tsc2nsec(handle, &ctx->tsc2nsec); + tracecmd_set_out_clock(handle, TSCNSEC_CLOCK); + } + if (tracecmd_write_cmdlines(handle)) + die("Writing cmdlines"); + + tracecmd_append_cpu_data(handle, local_cpu_count, temp_files); + + for (i = 0; i < max_cpu_count; i++) + put_temp_file(temp_files[i]); + + if (buffers) { + i = 0; + for_each_instance(instance) { + if (instance->msg_handle) + continue; + print_stat(instance); + append_buffer(handle, instance, temp_files); + } + } + + free(temp_files); + } + if (!handle) + die("could not write to file"); + tracecmd_output_close(handle); +} + +enum filter_type { + FUNC_FILTER, + FUNC_NOTRACE, +}; + +static int filter_command(struct tracefs_instance *instance, const char *cmd) +{ + return tracefs_instance_file_append(instance, "set_ftrace_filter", cmd); +} + +static int write_func_filter(enum filter_type type, struct buffer_instance *instance, + struct func_list **list) +{ + struct func_list *item, *cmds = NULL; + const char *file; + int ret = -1; + int (*filter_function)(struct tracefs_instance *instance, const char *filter, + const char *module, unsigned int flags); + + if (!*list) + return 0; + + switch (type) { + case FUNC_FILTER: + filter_function = tracefs_function_filter; + file = "set_ftrace_filter"; + break; + case FUNC_NOTRACE: + filter_function = tracefs_function_notrace; + file = "set_ftrace_notrace"; + break; + } + + ret = filter_function(instance->tracefs, NULL, NULL, + TRACEFS_FL_RESET | TRACEFS_FL_CONTINUE); + if (ret < 0) + return ret; + + while (*list) { + item = *list; + *list = item->next; + /* Do commands separately at the end */ + if (type == FUNC_FILTER && strstr(item->func, ":")) { + item->next = cmds; + cmds = item; + continue; + } + ret = filter_function(instance->tracefs, item->func, item->mod, + TRACEFS_FL_CONTINUE); + if (ret < 0) + goto failed; + free(item); + } + ret = filter_function(instance->tracefs, NULL, NULL, 0); + + /* Now add any commands */ + while (cmds) { + item = cmds; + cmds = item->next; + ret = filter_command(instance->tracefs, item->func); + if (ret < 0) + goto failed; + free(item); + } + return ret; + failed: + die("Failed to write %s to %s.\n" + "Perhaps this function is not available for tracing.\n" + "run 'trace-cmd list -f %s' to see if it is.", + item->func, file, item->func); + return ret; +} + +static int write_func_file(struct buffer_instance *instance, + const char *file, struct func_list **list) +{ + struct func_list *item; + const char *prefix = ":mod:"; + char *path; + int fd; + int ret = -1; + + if (!*list) + return 0; + + path = tracefs_instance_get_file(instance->tracefs, file); + + fd = open(path, O_WRONLY | O_TRUNC); + if (fd < 0) + goto free; + + while (*list) { + item = *list; + *list = item->next; + ret = write(fd, item->func, strlen(item->func)); + if (ret < 0) + goto failed; + if (item->mod) { + ret = write(fd, prefix, strlen(prefix)); + if (ret < 0) + goto failed; + ret = write(fd, item->mod, strlen(item->mod)); + if (ret < 0) + goto failed; + } + ret = write(fd, " ", 1); + if (ret < 0) + goto failed; + free(item); + } + close(fd); + ret = 0; + free: + tracefs_put_tracing_file(path); + return ret; + failed: + die("Failed to write %s to %s.\n" + "Perhaps this function is not available for tracing.\n" + "run 'trace-cmd list -f %s' to see if it is.", + item->func, file, item->func); + return ret; +} + +static int functions_filtered(struct buffer_instance *instance) +{ + char buf[1] = { '#' }; + char *path; + int fd; + + path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_filter"); + fd = open(path, O_RDONLY); + tracefs_put_tracing_file(path); + if (fd < 0) { + if (is_top_instance(instance)) + warning("Can not set set_ftrace_filter"); + else + warning("Can not set set_ftrace_filter for %s", + tracefs_instance_get_name(instance->tracefs)); + return 0; + } + + /* + * If functions are not filtered, than the first character + * will be '#'. Make sure it is not an '#' and also not space. + */ + read(fd, buf, 1); + close(fd); + + if (buf[0] == '#' || isspace(buf[0])) + return 0; + return 1; +} + +static void set_funcs(struct buffer_instance *instance) +{ + int set_notrace = 0; + int ret; + + if (is_guest(instance)) + return; + + ret = write_func_filter(FUNC_FILTER, instance, &instance->filter_funcs); + if (ret < 0) + die("set_ftrace_filter does not exist. Can not filter functions"); + + /* graph tracing currently only works for top instance */ + if (is_top_instance(instance)) { + ret = write_func_file(instance, "set_graph_function", &graph_funcs); + if (ret < 0) + die("set_graph_function does not exist."); + if (instance->plugin && strcmp(instance->plugin, "function_graph") == 0) { + ret = write_func_file(instance, "set_graph_notrace", + &instance->notrace_funcs); + if (!ret) + set_notrace = 1; + } + if (!set_notrace) { + ret = write_func_filter(FUNC_NOTRACE, instance, + &instance->notrace_funcs); + if (ret < 0) + die("set_ftrace_notrace does not exist. Can not filter functions"); + } + } else + write_func_filter(FUNC_NOTRACE, instance, &instance->notrace_funcs); + + /* make sure we are filtering functions */ + if (func_stack && is_top_instance(instance)) { + if (!functions_filtered(instance)) + die("Function stack trace set, but functions not filtered"); + save_option(instance, FUNC_STACK_TRACE); + } + clear_function_filters = 1; +} + +static void add_func(struct func_list **list, const char *mod, const char *func) +{ + struct func_list *item; + + item = malloc(sizeof(*item)); + if (!item) + die("Failed to allocate function descriptor"); + item->func = func; + item->mod = mod; + item->next = *list; + *list = item; +} + +static int find_ts(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + unsigned long long *ts = (unsigned long long *)context; + struct tep_format_field *field; + + if (!ts) + return -1; + + field = tep_find_field(event, "buf"); + if (field && strcmp(STAMP"\n", record->data + field->offset) == 0) { + *ts = record->ts; + return 1; + } + + return 0; +} + +static unsigned long long find_time_stamp(struct tep_handle *tep, + struct tracefs_instance *instance) +{ + unsigned long long ts = 0; + + if (!tracefs_iterate_raw_events(tep, instance, NULL, 0, find_ts, &ts)) + return ts; + + return 0; +} + + +static char *read_top_file(char *file, int *psize) +{ + return tracefs_instance_file_read(top_instance.tracefs, file, psize); +} + +static struct tep_handle *get_ftrace_tep(void) +{ + const char *systems[] = {"ftrace", NULL}; + struct tep_handle *tep; + char *buf; + int size; + int ret; + + tep = tracefs_local_events_system(NULL, systems); + if (!tep) + return NULL; + tep_set_file_bigendian(tep, tracecmd_host_bigendian()); + buf = read_top_file("events/header_page", &size); + if (!buf) + goto error; + ret = tep_parse_header_page(tep, buf, size, sizeof(unsigned long)); + free(buf); + if (ret < 0) + goto error; + + return tep; + +error: + tep_free(tep); + return NULL; +} + +/* + * Try to write the date into the ftrace buffer and then + * read it back, mapping the timestamp to the date. + */ +static char *get_date_to_ts(void) +{ + struct tep_handle *tep; + unsigned long long min = -1ULL; + unsigned long long diff; + unsigned long long stamp; + unsigned long long min_stamp; + unsigned long long min_ts; + unsigned long long ts; + struct timespec start; + struct timespec end; + char *date2ts = NULL; + int tfd; + int i; + + /* Set up a tep to read the raw format */ + tep = get_ftrace_tep(); + if (!tep) { + warning("failed to alloc tep, --date ignored"); + return NULL; + } + tfd = tracefs_instance_file_open(NULL, "trace_marker", O_WRONLY); + if (tfd < 0) { + warning("Can not open 'trace_marker', --date ignored"); + goto out_pevent; + } + + for (i = 0; i < date2ts_tries; i++) { + tracecmd_disable_tracing(); + clear_trace_instances(); + tracecmd_enable_tracing(); + + clock_gettime(CLOCK_REALTIME, &start); + write(tfd, STAMP, 5); + clock_gettime(CLOCK_REALTIME, &end); + + tracecmd_disable_tracing(); + ts = find_time_stamp(tep, NULL); + if (!ts) + continue; + + diff = (unsigned long long)end.tv_sec * 1000000000LL; + diff += (unsigned long long)end.tv_nsec; + stamp = diff; + diff -= (unsigned long long)start.tv_sec * 1000000000LL; + diff -= (unsigned long long)start.tv_nsec; + + if (diff < min) { + min_ts = ts; + min_stamp = stamp - diff / 2; + min = diff; + } + } + + close(tfd); + + if (min == -1ULL) { + warning("Failed to make date offset, --date ignored"); + goto out_pevent; + } + + /* 16 hex chars + 0x + \0 */ + date2ts = malloc(19); + if (!date2ts) + goto out_pevent; + + /* + * The difference between the timestamp and the gtod is + * stored as an ASCII string in hex. + */ + diff = min_stamp - min_ts; + snprintf(date2ts, 19, "0x%llx", diff/1000); + out_pevent: + tep_free(tep); + + return date2ts; +} + +static void set_buffer_size_instance(struct buffer_instance *instance) +{ + int buffer_size = instance->buffer_size; + char buf[BUFSIZ]; + char *path; + int ret; + int fd; + + if (is_guest(instance)) + return; + + if (!buffer_size) + return; + + if (buffer_size < 0) + die("buffer size must be positive"); + + snprintf(buf, BUFSIZ, "%d", buffer_size); + + path = tracefs_instance_get_file(instance->tracefs, "buffer_size_kb"); + fd = open(path, O_WRONLY); + if (fd < 0) { + warning("can't open %s", path); + goto out; + } + + ret = write(fd, buf, strlen(buf)); + if (ret < 0) + warning("Can't write to %s", path); + close(fd); + out: + tracefs_put_tracing_file(path); +} + +void set_buffer_size(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + set_buffer_size_instance(instance); +} + +static int +process_event_trigger(char *path, struct event_iter *iter) +{ + const char *system = iter->system_dent->d_name; + const char *event = iter->event_dent->d_name; + struct stat st; + char *trigger = NULL; + char *file; + int ret; + + path = append_file(path, system); + file = append_file(path, event); + free(path); + + ret = stat(file, &st); + if (ret < 0 || !S_ISDIR(st.st_mode)) + goto out; + + trigger = append_file(file, "trigger"); + + ret = stat(trigger, &st); + if (ret < 0) + goto out; + + ret = clear_trigger(trigger); + out: + free(trigger); + free(file); + return ret; +} + +static void clear_instance_triggers(struct buffer_instance *instance) +{ + enum event_iter_type type; + struct event_iter *iter; + char *system; + char *path; + int retry = 0; + int ret; + + path = tracefs_instance_get_file(instance->tracefs, "events"); + if (!path) + die("malloc"); + + iter = trace_event_iter_alloc(path); + + system = NULL; + while ((type = trace_event_iter_next(iter, path, system))) { + + if (type == EVENT_ITER_SYSTEM) { + system = iter->system_dent->d_name; + continue; + } + + ret = process_event_trigger(path, iter); + if (ret > 0) + retry++; + } + + trace_event_iter_free(iter); + + if (retry) { + int i; + + /* Order matters for some triggers */ + for (i = 0; i < retry; i++) { + int tries = 0; + + iter = trace_event_iter_alloc(path); + system = NULL; + while ((type = trace_event_iter_next(iter, path, system))) { + + if (type == EVENT_ITER_SYSTEM) { + system = iter->system_dent->d_name; + continue; + } + + ret = process_event_trigger(path, iter); + if (ret > 0) + tries++; + } + trace_event_iter_free(iter); + if (!tries) + break; + } + } + + tracefs_put_tracing_file(path); +} + +static void +process_event_filter(char *path, struct event_iter *iter, enum event_process *processed) +{ + const char *system = iter->system_dent->d_name; + const char *event = iter->event_dent->d_name; + struct stat st; + char *filter = NULL; + char *file; + int ret; + + path = append_file(path, system); + file = append_file(path, event); + free(path); + + ret = stat(file, &st); + if (ret < 0 || !S_ISDIR(st.st_mode)) + goto out; + + filter = append_file(file, "filter"); + + ret = stat(filter, &st); + if (ret < 0) + goto out; + + clear_filter(filter); + out: + free(filter); + free(file); +} + +static void clear_instance_filters(struct buffer_instance *instance) +{ + struct event_iter *iter; + char *path; + char *system; + enum event_iter_type type; + enum event_process processed = PROCESSED_NONE; + + path = tracefs_instance_get_file(instance->tracefs, "events"); + if (!path) + die("malloc"); + + iter = trace_event_iter_alloc(path); + + processed = PROCESSED_NONE; + system = NULL; + while ((type = trace_event_iter_next(iter, path, system))) { + + if (type == EVENT_ITER_SYSTEM) { + system = iter->system_dent->d_name; + continue; + } + + process_event_filter(path, iter, &processed); + } + + trace_event_iter_free(iter); + + tracefs_put_tracing_file(path); +} + +static void clear_filters(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + clear_instance_filters(instance); +} + +static void reset_clock(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + tracefs_instance_file_write(instance->tracefs, + "trace_clock", "local"); +} + +static void reset_cpu_mask(void) +{ + struct buffer_instance *instance; + int cpus = tracecmd_count_cpus(); + int fullwords = (cpus - 1) / 32; + int bits = (cpus - 1) % 32 + 1; + int len = (fullwords + 1) * 9; + char buf[len + 1]; + + buf[0] = '\0'; + + sprintf(buf, "%x", (unsigned int)((1ULL << bits) - 1)); + while (fullwords-- > 0) + strcat(buf, ",ffffffff"); + + for_all_instances(instance) + tracefs_instance_file_write(instance->tracefs, + "tracing_cpumask", buf); +} + +static void reset_event_pid(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + add_event_pid(instance, ""); +} + +static void clear_triggers(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + clear_instance_triggers(instance); +} + +static void clear_instance_error_log(struct buffer_instance *instance) +{ + char *file; + + if (!tracefs_file_exists(instance->tracefs, "error_log")) + return; + + file = tracefs_instance_get_file(instance->tracefs, "error_log"); + if (!file) + return; + write_file(file, " "); + tracefs_put_tracing_file(file); +} + +static void clear_error_log(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + clear_instance_error_log(instance); +} + +static void clear_all_dynamic_events(void) +{ + /* Clear event probes first, as they may be attached to other dynamic event */ + tracefs_dynevent_destroy_all(TRACEFS_DYNEVENT_EPROBE, true); + tracefs_dynevent_destroy_all(TRACEFS_DYNEVENT_ALL, true); +} + +static void clear_func_filters(void) +{ + struct buffer_instance *instance; + char *path; + int i; + const char * const files[] = { "set_ftrace_filter", + "set_ftrace_notrace", + "set_graph_function", + "set_graph_notrace", + NULL }; + + for_all_instances(instance) { + for (i = 0; files[i]; i++) { + path = tracefs_instance_get_file(instance->tracefs, files[i]); + clear_func_filter(path); + tracefs_put_tracing_file(path); + } + } +} + +static void make_instances(void) +{ + struct buffer_instance *instance; + + for_each_instance(instance) { + if (is_guest(instance)) + continue; + if (instance->name && !instance->tracefs) { + instance->tracefs = tracefs_instance_create(instance->name); + /* Don't delete instances that already exist */ + if (instance->tracefs && !tracefs_instance_is_new(instance->tracefs)) + instance->flags |= BUFFER_FL_KEEP; + } + } +} + +void tracecmd_remove_instances(void) +{ + struct buffer_instance *instance; + + for_each_instance(instance) { + /* Only delete what we created */ + if (is_guest(instance) || (instance->flags & BUFFER_FL_KEEP)) + continue; + if (instance->tracing_on_fd > 0) { + close(instance->tracing_on_fd); + instance->tracing_on_fd = 0; + } + tracefs_instance_destroy(instance->tracefs); + } +} + +static void check_plugin(const char *plugin) +{ + char *buf; + char *str; + char *tok; + + /* + * nop is special. We may want to just trace + * trace_printks, that are in the kernel. + */ + if (strcmp(plugin, "nop") == 0) + return; + + buf = read_top_file("available_tracers", NULL); + if (!buf) + die("No plugins available"); + + str = buf; + while ((tok = strtok(str, " "))) { + str = NULL; + if (strcmp(tok, plugin) == 0) + goto out; + } + die ("Plugin '%s' does not exist", plugin); + out: + if (!quiet) + fprintf(stderr, " plugin '%s'\n", plugin); + free(buf); +} + +static void check_function_plugin(void) +{ + const char *plugin; + + /* We only care about the top_instance */ + if (no_top_instance()) + return; + + plugin = top_instance.plugin; + if (!plugin) + return; + + if (plugin && strncmp(plugin, "function", 8) == 0 && + func_stack && !top_instance.filter_funcs) + die("Must supply function filtering with --func-stack\n"); +} + +static int __check_doing_something(struct buffer_instance *instance) +{ + return is_guest(instance) || (instance->flags & BUFFER_FL_PROFILE) || + instance->plugin || instance->events || instance->get_procmap; +} + +static void check_doing_something(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) { + if (__check_doing_something(instance)) + return; + } + + die("no event or plugin was specified... aborting"); +} + +static void +update_plugin_instance(struct buffer_instance *instance, + enum trace_type type) +{ + const char *plugin = instance->plugin; + + if (is_guest(instance)) + return; + + if (!plugin) + return; + + check_plugin(plugin); + + /* + * Latency tracers just save the trace and kill + * the threads. + */ + if (strcmp(plugin, "irqsoff") == 0 || + strcmp(plugin, "preemptoff") == 0 || + strcmp(plugin, "preemptirqsoff") == 0 || + strcmp(plugin, "wakeup") == 0 || + strcmp(plugin, "wakeup_rt") == 0) { + latency = 1; + if (host) + die("Network tracing not available with latency tracer plugins"); + if (type & TRACE_TYPE_STREAM) + die("Streaming is not available with latency tracer plugins"); + } else if (type == TRACE_TYPE_RECORD) { + if (latency) + die("Can not record latency tracer and non latency trace together"); + } + + if (fset < 0 && (strcmp(plugin, "function") == 0 || + strcmp(plugin, "function_graph") == 0)) + die("function tracing not configured on this kernel"); + + if (type != TRACE_TYPE_EXTRACT) + set_plugin_instance(instance, plugin); +} + +static void update_plugins(enum trace_type type) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + update_plugin_instance(instance, type); +} + +static void allocate_seq(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) { + instance->s_save = malloc(sizeof(struct trace_seq) * instance->cpu_count); + instance->s_print = malloc(sizeof(struct trace_seq) * instance->cpu_count); + if (!instance->s_save || !instance->s_print) + die("Failed to allocate instance info"); + } +} + +/* Find the overrun output, and add it to the print seq */ +static void add_overrun(int cpu, struct trace_seq *src, struct trace_seq *dst) +{ + const char overrun_str[] = "overrun: "; + const char commit_overrun_str[] = "commit overrun: "; + const char *p; + int overrun; + int commit_overrun; + + p = strstr(src->buffer, overrun_str); + if (!p) { + /* Warn? */ + trace_seq_printf(dst, "CPU %d: no overrun found?\n", cpu); + return; + } + + overrun = atoi(p + strlen(overrun_str)); + + p = strstr(p + 9, commit_overrun_str); + if (p) + commit_overrun = atoi(p + strlen(commit_overrun_str)); + else + commit_overrun = -1; + + if (!overrun && !commit_overrun) + return; + + trace_seq_printf(dst, "CPU %d:", cpu); + + if (overrun) + trace_seq_printf(dst, " %d events lost", overrun); + + if (commit_overrun) + trace_seq_printf(dst, " %d events lost due to commit overrun", + commit_overrun); + + trace_seq_putc(dst, '\n'); +} + +static void record_stats(void) +{ + struct buffer_instance *instance; + struct trace_seq *s_save; + struct trace_seq *s_print; + int cpu; + + for_all_instances(instance) { + if (is_guest(instance)) + continue; + + s_save = instance->s_save; + s_print = instance->s_print; + for (cpu = 0; cpu < instance->cpu_count; cpu++) { + trace_seq_init(&s_save[cpu]); + trace_seq_init(&s_print[cpu]); + trace_seq_printf(&s_save[cpu], "CPU: %d\n", cpu); + tracecmd_stat_cpu_instance(instance, &s_save[cpu], cpu); + add_overrun(cpu, &s_save[cpu], &s_print[cpu]); + } + } +} + +static void print_stats(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + print_stat(instance); +} + +static void destroy_stats(void) +{ + struct buffer_instance *instance; + int cpu; + + for_all_instances(instance) { + if (is_guest(instance)) + continue; + + for (cpu = 0; cpu < instance->cpu_count; cpu++) { + trace_seq_destroy(&instance->s_save[cpu]); + trace_seq_destroy(&instance->s_print[cpu]); + } + } +} + +static void list_event(const char *event) +{ + struct tracecmd_event_list *list; + + list = malloc(sizeof(*list)); + if (!list) + die("Failed to allocate list for event"); + list->next = listed_events; + list->glob = event; + listed_events = list; +} + +#define ALL_EVENTS "*/*" + +static void record_all_events(void) +{ + struct tracecmd_event_list *list; + + while (listed_events) { + list = listed_events; + listed_events = list->next; + free(list); + } + list = malloc(sizeof(*list)); + if (!list) + die("Failed to allocate list for all events"); + list->next = NULL; + list->glob = ALL_EVENTS; + listed_events = list; +} + +static int recording_all_events(void) +{ + return listed_events && strcmp(listed_events->glob, ALL_EVENTS) == 0; +} + +static void add_trigger(struct event_list *event, const char *trigger) +{ + int ret; + + if (event->trigger) { + event->trigger = realloc(event->trigger, + strlen(event->trigger) + strlen("\n") + + strlen(trigger) + 1); + strcat(event->trigger, "\n"); + strcat(event->trigger, trigger); + } else { + ret = asprintf(&event->trigger, "%s", trigger); + if (ret < 0) + die("Failed to allocate event trigger"); + } +} + +static int test_stacktrace_trigger(struct buffer_instance *instance) +{ + char *path; + int ret = 0; + int fd; + + path = tracefs_instance_get_file(instance->tracefs, + "events/sched/sched_switch/trigger"); + + clear_trigger(path); + + fd = open(path, O_WRONLY); + if (fd < 0) + goto out; + + ret = write(fd, "stacktrace", 10); + if (ret != 10) + ret = 0; + else + ret = 1; + close(fd); + out: + tracefs_put_tracing_file(path); + + return ret; +} + +static int +profile_add_event(struct buffer_instance *instance, const char *event_str, int stack) +{ + struct event_list *event; + char buf[BUFSIZ]; + char *p; + + strcpy(buf, "events/"); + strncpy(buf + 7, event_str, BUFSIZ - 7); + buf[BUFSIZ-1] = 0; + + if ((p = strstr(buf, ":"))) { + *p = '/'; + p++; + } + + if (!trace_check_file_exists(instance, buf)) + return -1; + + /* Only add event if it isn't already added */ + for (event = instance->events; event; event = event->next) { + if (p && strcmp(event->event, p) == 0) + break; + if (strcmp(event->event, event_str) == 0) + break; + } + + if (!event) { + event = malloc(sizeof(*event)); + if (!event) + die("Failed to allocate event"); + memset(event, 0, sizeof(*event)); + event->event = event_str; + add_event(instance, event); + } + + if (!recording_all_events()) + list_event(event_str); + + if (stack) { + if (!event->trigger || !strstr(event->trigger, "stacktrace")) + add_trigger(event, "stacktrace"); + } + + return 0; +} + +int tracecmd_add_event(const char *event_str, int stack) +{ + return profile_add_event(first_instance, event_str, stack); +} + +static void enable_profile(struct buffer_instance *instance) +{ + int stacktrace = 0; + int i; + char *trigger_events[] = { + "sched:sched_switch", + "sched:sched_wakeup", + NULL, + }; + char *events[] = { + "exceptions:page_fault_user", + "irq:irq_handler_entry", + "irq:irq_handler_exit", + "irq:softirq_entry", + "irq:softirq_exit", + "irq:softirq_raise", + "sched:sched_process_exec", + "raw_syscalls", + NULL, + }; + + if (!instance->plugin) { + if (trace_check_file_exists(instance, "max_graph_depth")) { + instance->plugin = "function_graph"; + set_max_graph_depth(instance, "1"); + } else + warning("Kernel does not support max_graph_depth\n" + " Skipping user/kernel profiling"); + } + + if (test_stacktrace_trigger(instance)) + stacktrace = 1; + else + /* + * The stacktrace trigger is not implemented with this + * kernel, then we need to default to the stack trace option. + * This is less efficient but still works. + */ + save_option(instance, "stacktrace"); + + + for (i = 0; trigger_events[i]; i++) + profile_add_event(instance, trigger_events[i], stacktrace); + + for (i = 0; events[i]; i++) + profile_add_event(instance, events[i], 0); +} + +static struct event_list * +create_hook_event(struct buffer_instance *instance, + const char *system, const char *event) +{ + struct event_list *event_list; + char *event_name; + int len; + + if (!system) + system = "*"; + + len = strlen(event); + len += strlen(system) + 2; + + event_name = malloc(len); + if (!event_name) + die("Failed to allocate %s/%s", system, event); + sprintf(event_name, "%s:%s", system, event); + + event_list = malloc(sizeof(*event_list)); + if (!event_list) + die("Failed to allocate event list for %s", event_name); + memset(event_list, 0, sizeof(*event_list)); + event_list->event = event_name; + add_event(instance, event_list); + + list_event(event_name); + + return event_list; +} + +static void add_hook(struct buffer_instance *instance, const char *arg) +{ + struct event_list *event; + struct hook_list *hook; + + hook = tracecmd_create_event_hook(arg); + if (!hook) + die("Failed to create event hook %s", arg); + + hook->instance = instance; + hook->next = hooks; + hooks = hook; + + /* Make sure the event is enabled */ + event = create_hook_event(instance, hook->start_system, hook->start_event); + create_hook_event(instance, hook->end_system, hook->end_event); + + if (hook->stack) { + if (!event->trigger || !strstr(event->trigger, "stacktrace")) + add_trigger(event, "stacktrace"); + } +} + +void update_first_instance(struct buffer_instance *instance, int topt) +{ + if (topt || instance == &top_instance) + first_instance = &top_instance; + else + first_instance = buffer_instances; +} + +void init_top_instance(void) +{ + if (!top_instance.tracefs) + top_instance.tracefs = tracefs_instance_create(NULL); + top_instance.cpu_count = tracecmd_count_cpus(); + top_instance.flags = BUFFER_FL_KEEP; + top_instance.trace_id = tracecmd_generate_traceid(); + init_instance(&top_instance); +} + +enum { + OPT_compression = 237, + OPT_file_ver = 238, + OPT_verbose = 239, + OPT_tsc2nsec = 240, + OPT_fork = 241, + OPT_tsyncinterval = 242, + OPT_user = 243, + OPT_procmap = 244, + OPT_quiet = 245, + OPT_debug = 246, + OPT_no_filter = 247, + OPT_max_graph_depth = 248, + OPT_tsoffset = 249, + OPT_bycomm = 250, + OPT_stderr = 251, + OPT_profile = 252, + OPT_nosplice = 253, + OPT_funcstack = 254, + OPT_date = 255, + OPT_module = 256, + OPT_nofifos = 257, + OPT_cmdlines_size = 258, + OPT_poll = 259, + OPT_name = 260, +}; + +void trace_stop(int argc, char **argv) +{ + int topt = 0; + struct buffer_instance *instance = &top_instance; + + init_top_instance(); + + for (;;) { + int c; + + c = getopt(argc-1, argv+1, "hatB:"); + if (c == -1) + break; + + switch (c) { + case 'h': + usage(argv); + break; + case 'B': + instance = allocate_instance(optarg); + if (!instance) + die("Failed to create instance"); + add_instance(instance, local_cpu_count); + break; + case 'a': + add_all_instances(); + break; + case 't': + /* Force to use top instance */ + topt = 1; + instance = &top_instance; + break; + default: + usage(argv); + } + } + update_first_instance(instance, topt); + tracecmd_disable_tracing(); + exit(0); +} + +void trace_restart(int argc, char **argv) +{ + int topt = 0; + struct buffer_instance *instance = &top_instance; + + init_top_instance(); + + for (;;) { + int c; + + c = getopt(argc-1, argv+1, "hatB:"); + if (c == -1) + break; + switch (c) { + case 'h': + usage(argv); + break; + case 'B': + instance = allocate_instance(optarg); + if (!instance) + die("Failed to create instance"); + add_instance(instance, local_cpu_count); + break; + case 'a': + add_all_instances(); + break; + case 't': + /* Force to use top instance */ + topt = 1; + instance = &top_instance; + break; + default: + usage(argv); + } + + } + update_first_instance(instance, topt); + tracecmd_enable_tracing(); + exit(0); +} + +void trace_reset(int argc, char **argv) +{ + int c; + int topt = 0; + struct buffer_instance *instance = &top_instance; + + init_top_instance(); + + /* if last arg is -a, then -b and -d apply to all instances */ + int last_specified_all = 0; + struct buffer_instance *inst; /* iterator */ + + while ((c = getopt(argc-1, argv+1, "hab:B:td")) >= 0) { + + switch (c) { + case 'h': + usage(argv); + break; + case 'b': + { + int size = atoi(optarg); + /* Min buffer size is 1 */ + if (size <= 1) + size = 1; + if (last_specified_all) { + for_each_instance(inst) { + inst->buffer_size = size; + } + } else { + instance->buffer_size = size; + } + break; + } + case 'B': + last_specified_all = 0; + instance = allocate_instance(optarg); + if (!instance) + die("Failed to create instance"); + add_instance(instance, local_cpu_count); + /* -d will remove keep */ + instance->flags |= BUFFER_FL_KEEP; + break; + case 't': + /* Force to use top instance */ + last_specified_all = 0; + topt = 1; + instance = &top_instance; + break; + case 'a': + last_specified_all = 1; + add_all_instances(); + for_each_instance(inst) { + inst->flags |= BUFFER_FL_KEEP; + } + break; + case 'd': + if (last_specified_all) { + for_each_instance(inst) { + inst->flags &= ~BUFFER_FL_KEEP; + } + } else { + if (is_top_instance(instance)) + die("Can not delete top level buffer"); + instance->flags &= ~BUFFER_FL_KEEP; + } + break; + } + } + update_first_instance(instance, topt); + tracecmd_disable_all_tracing(1); + set_buffer_size(); + clear_filters(); + clear_triggers(); + clear_all_dynamic_events(); + clear_error_log(); + /* set clock to "local" */ + reset_clock(); + reset_event_pid(); + reset_max_latency_instance(); + reset_cpu_mask(); + tracecmd_remove_instances(); + clear_func_filters(); + /* restore tracing_on to 1 */ + tracecmd_enable_tracing(); + exit(0); +} + +static void init_common_record_context(struct common_record_context *ctx, + enum trace_cmd curr_cmd) +{ + memset(ctx, 0, sizeof(*ctx)); + ctx->instance = &top_instance; + ctx->curr_cmd = curr_cmd; + local_cpu_count = tracecmd_count_cpus(); + ctx->file_version = tracecmd_default_file_version(); + init_top_instance(); +} + +#define IS_EXTRACT(ctx) ((ctx)->curr_cmd == CMD_extract) +#define IS_START(ctx) ((ctx)->curr_cmd == CMD_start) +#define IS_CMDSET(ctx) ((ctx)->curr_cmd == CMD_set) +#define IS_STREAM(ctx) ((ctx)->curr_cmd == CMD_stream) +#define IS_PROFILE(ctx) ((ctx)->curr_cmd == CMD_profile) +#define IS_RECORD(ctx) ((ctx)->curr_cmd == CMD_record) +#define IS_RECORD_AGENT(ctx) ((ctx)->curr_cmd == CMD_record_agent) + +static void add_argv(struct buffer_instance *instance, char *arg, bool prepend) +{ + instance->argv = realloc(instance->argv, + (instance->argc + 1) * sizeof(char *)); + if (!instance->argv) + die("Can not allocate instance args"); + if (prepend) { + memmove(instance->argv + 1, instance->argv, + instance->argc * sizeof(*instance->argv)); + instance->argv[0] = arg; + } else { + instance->argv[instance->argc] = arg; + } + instance->argc++; +} + +static void add_arg(struct buffer_instance *instance, + int c, const char *opts, + struct option *long_options, char *optarg) +{ + char *ptr, *arg; + int i, ret; + + /* Short or long arg */ + if (!(c & 0x80)) { + ptr = strchr(opts, c); + if (!ptr) + return; /* Not found? */ + ret = asprintf(&arg, "-%c", c); + if (ret < 0) + die("Can not allocate argument"); + add_argv(instance, arg, false); + if (ptr[1] == ':') { + arg = strdup(optarg); + if (!arg) + die("Can not allocate arguments"); + add_argv(instance, arg, false); + } + return; + } + for (i = 0; long_options[i].name; i++) { + if (c != long_options[i].val) + continue; + ret = asprintf(&arg, "--%s", long_options[i].name); + if (ret < 0) + die("Can not allocate argument"); + add_argv(instance, arg, false); + if (long_options[i].has_arg) { + arg = strdup(optarg); + if (!arg) + die("Can not allocate arguments"); + add_argv(instance, arg, false); + } + return; + } + /* Not found? */ +} + +static inline void cmd_check_die(struct common_record_context *ctx, + enum trace_cmd id, char *cmd, char *param) +{ + if (ctx->curr_cmd == id) + die("%s has no effect with the command %s\n" + "Did you mean 'record'?", param, cmd); +} + +static inline void remove_instances(struct buffer_instance *instances) +{ + struct buffer_instance *del; + + while (instances) { + del = instances; + instances = instances->next; + free(del->name); + tracefs_instance_destroy(del->tracefs); + tracefs_instance_free(del->tracefs); + free(del); + } +} + +static inline void +check_instance_die(struct buffer_instance *instance, char *param) +{ + if (instance->delete) + die("Instance %s is marked for deletion, invalid option %s", + tracefs_instance_get_name(instance->tracefs), param); +} + +static bool clock_is_supported(struct tracefs_instance *instance, const char *clock) +{ + char *all_clocks = NULL; + char *ret = NULL; + + all_clocks = tracefs_instance_file_read(instance, "trace_clock", NULL); + if (!all_clocks) + return false; + + ret = strstr(all_clocks, clock); + if (ret && (ret == all_clocks || ret[-1] == ' ' || ret[-1] == '[')) { + switch (ret[strlen(clock)]) { + case ' ': + case '\0': + case ']': + case '\n': + break; + default: + ret = NULL; + } + } else { + ret = NULL; + } + free(all_clocks); + + return ret != NULL; +} + +#ifdef PERF +static int get_tsc_nsec(int *shift, int *mult) +{ + static int cpu_shift, cpu_mult; + static int supported; + int cpus = tracecmd_count_cpus(); + struct trace_perf perf; + int i; + + if (supported) + goto out; + + supported = -1; + if (trace_perf_init(&perf, 1, 0, getpid())) + return -1; + if (trace_perf_open(&perf)) + return -1; + cpu_shift = perf.mmap->time_shift; + cpu_mult = perf.mmap->time_mult; + for (i = 1; i < cpus; i++) { + trace_perf_close(&perf); + if (trace_perf_init(&perf, 1, i, getpid())) + break; + if (trace_perf_open(&perf)) + break; + if (perf.mmap->time_shift != cpu_shift || + perf.mmap->time_mult != cpu_mult) { + warning("Found different TSC multiplier and shift for CPU %d: %d;%d instead of %d;%d", + i, perf.mmap->time_mult, perf.mmap->time_shift, cpu_mult, cpu_shift); + break; + } + } + trace_perf_close(&perf); + if (i < cpus) + return -1; + + if (cpu_shift || cpu_mult) + supported = 1; +out: + if (supported < 0) + return -1; + + if (shift) + *shift = cpu_shift; + if (mult) + *mult = cpu_mult; + + return 0; +} +#else +static int get_tsc_nsec(int *shift, int *mult) +{ + return -1; +} +#endif + +bool trace_tsc2nsec_is_supported(void) +{ + return get_tsc_nsec(NULL, NULL) == 0; +} + +static void parse_record_options(int argc, + char **argv, + enum trace_cmd curr_cmd, + struct common_record_context *ctx) +{ + const char *plugin = NULL; + const char *option; + struct event_list *event = NULL; + struct event_list *last_event = NULL; + struct addrinfo *result; + char *pids; + char *pid; + char *sav; + int name_counter = 0; + int negative = 0; + struct buffer_instance *instance, *del_list = NULL; + int do_children = 0; + int fpids_count = 0; + + init_common_record_context(ctx, curr_cmd); + + if (IS_CMDSET(ctx)) + keep = 1; + + for (;;) { + int option_index = 0; + int ret; + int c; + const char *opts; + static struct option long_options[] = { + {"date", no_argument, NULL, OPT_date}, + {"func-stack", no_argument, NULL, OPT_funcstack}, + {"nosplice", no_argument, NULL, OPT_nosplice}, + {"nofifos", no_argument, NULL, OPT_nofifos}, + {"profile", no_argument, NULL, OPT_profile}, + {"stderr", no_argument, NULL, OPT_stderr}, + {"by-comm", no_argument, NULL, OPT_bycomm}, + {"ts-offset", required_argument, NULL, OPT_tsoffset}, + {"max-graph-depth", required_argument, NULL, OPT_max_graph_depth}, + {"cmdlines-size", required_argument, NULL, OPT_cmdlines_size}, + {"no-filter", no_argument, NULL, OPT_no_filter}, + {"debug", no_argument, NULL, OPT_debug}, + {"quiet", no_argument, NULL, OPT_quiet}, + {"help", no_argument, NULL, '?'}, + {"proc-map", no_argument, NULL, OPT_procmap}, + {"user", required_argument, NULL, OPT_user}, + {"module", required_argument, NULL, OPT_module}, + {"tsync-interval", required_argument, NULL, OPT_tsyncinterval}, + {"fork", no_argument, NULL, OPT_fork}, + {"tsc2nsec", no_argument, NULL, OPT_tsc2nsec}, + {"poll", no_argument, NULL, OPT_poll}, + {"name", required_argument, NULL, OPT_name}, + {"verbose", optional_argument, NULL, OPT_verbose}, + {"compression", required_argument, NULL, OPT_compression}, + {"file-version", required_argument, NULL, OPT_file_ver}, + {NULL, 0, NULL, 0} + }; + + if (IS_EXTRACT(ctx)) + opts = "+haf:Fp:co:O:sr:g:l:n:P:N:tb:B:ksiT"; + else + opts = "+hae:f:FA:p:cC:dDGo:O:s:r:V:vg:l:n:P:N:tb:R:B:ksSiTm:M:H:q"; + c = getopt_long (argc-1, argv+1, opts, long_options, &option_index); + if (c == -1) + break; + + /* + * If the current instance is to record a guest, then save + * all the arguments for this instance. + */ + if (c != 'B' && c != 'A' && c != OPT_name && is_guest(ctx->instance)) { + add_arg(ctx->instance, c, opts, long_options, optarg); + if (c == 'C') + ctx->instance->flags |= BUFFER_FL_HAS_CLOCK; + continue; + } + + switch (c) { + case 'h': + usage(argv); + break; + case 'a': + cmd_check_die(ctx, CMD_set, *(argv+1), "-a"); + if (IS_EXTRACT(ctx)) { + add_all_instances(); + } else { + ctx->record_all = 1; + record_all_events(); + } + break; + case 'e': + check_instance_die(ctx->instance, "-e"); + ctx->events = 1; + event = malloc(sizeof(*event)); + if (!event) + die("Failed to allocate event %s", optarg); + memset(event, 0, sizeof(*event)); + event->event = optarg; + add_event(ctx->instance, event); + event->neg = negative; + event->filter = NULL; + last_event = event; + + if (!ctx->record_all) + list_event(optarg); + break; + case 'f': + if (!last_event) + die("filter must come after event"); + if (last_event->filter) { + last_event->filter = + realloc(last_event->filter, + strlen(last_event->filter) + + strlen("&&()") + + strlen(optarg) + 1); + strcat(last_event->filter, "&&("); + strcat(last_event->filter, optarg); + strcat(last_event->filter, ")"); + } else { + ret = asprintf(&last_event->filter, "(%s)", optarg); + if (ret < 0) + die("Failed to allocate filter %s", optarg); + } + break; + + case 'R': + if (!last_event) + die("trigger must come after event"); + add_trigger(event, optarg); + break; + + case OPT_name: + if (!ctx->instance) + die("No instance defined for name option\n"); + if (!is_guest(ctx->instance)) + die(" --name is only used for -A options\n"); + free(ctx->instance->name); + ctx->instance->name = strdup(optarg); + if (!ctx->instance->name) + die("Failed to allocate name"); + break; + + case 'A': { + char *name = NULL; + int cid = -1, port = -1; + + if (!IS_RECORD(ctx)) + die("-A is only allowed for record operations"); + + name = parse_guest_name(optarg, &cid, &port, &result); + if (cid == -1 && !result) + die("guest %s not found", optarg); + if (port == -1) + port = TRACE_AGENT_DEFAULT_PORT; + if (!name || !*name) { + ret = asprintf(&name, "unnamed-%d", name_counter++); + if (ret < 0) + name = NULL; + } else { + /* Needs to be allocate */ + name = strdup(name); + } + if (!name) + die("Failed to allocate guest name"); + + ctx->instance = allocate_instance(name); + if (!ctx->instance) + die("Failed to allocate instance"); + + if (result) { + ctx->instance->flags |= BUFFER_FL_NETWORK; + ctx->instance->port_type = USE_TCP; + } + + ctx->instance->flags |= BUFFER_FL_GUEST; + ctx->instance->result = result; + ctx->instance->cid = cid; + ctx->instance->port = port; + ctx->instance->name = name; + add_instance(ctx->instance, 0); + ctx->data_flags |= DATA_FL_GUEST; + break; + } + case 'F': + test_set_event_pid(ctx->instance); + filter_task = 1; + break; + case 'G': + cmd_check_die(ctx, CMD_set, *(argv+1), "-G"); + ctx->global = 1; + break; + case 'P': + check_instance_die(ctx->instance, "-P"); + test_set_event_pid(ctx->instance); + pids = strdup(optarg); + if (!pids) + die("strdup"); + pid = strtok_r(pids, ",", &sav); + while (pid) { + fpids_count += add_filter_pid(ctx->instance, + atoi(pid), 0); + pid = strtok_r(NULL, ",", &sav); + ctx->instance->nr_process_pids++; + } + ctx->instance->process_pids = ctx->instance->filter_pids; + free(pids); + break; + case 'c': + check_instance_die(ctx->instance, "-c"); + test_set_event_pid(ctx->instance); + do_children = 1; + if (!ctx->instance->have_event_fork) { +#ifdef NO_PTRACE + die("-c invalid: ptrace not supported"); +#endif + do_ptrace = 1; + ctx->instance->ptrace_child = 1; + + } else { + save_option(ctx->instance, "event-fork"); + } + if (ctx->instance->have_func_fork) + save_option(ctx->instance, "function-fork"); + break; + case 'C': + check_instance_die(ctx->instance, "-C"); + if (strcmp(optarg, TSCNSEC_CLOCK) == 0) { + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, + &ctx->tsc2nsec.mult); + if (ret) + die("TSC to nanosecond is not supported"); + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; + ctx->instance->clock = TSC_CLOCK; + } else { + ctx->instance->clock = optarg; + } + if (!clock_is_supported(NULL, ctx->instance->clock)) + die("Clock %s is not supported", ctx->instance->clock); + ctx->instance->clock = strdup(ctx->instance->clock); + if (!ctx->instance->clock) + die("Failed allocation"); + ctx->instance->flags |= BUFFER_FL_HAS_CLOCK; + if (!ctx->clock && !is_guest(ctx->instance)) + ctx->clock = ctx->instance->clock; + break; + case 'v': + negative = 1; + break; + case 'l': + add_func(&ctx->instance->filter_funcs, + ctx->instance->filter_mod, optarg); + ctx->filtered = 1; + break; + case 'n': + check_instance_die(ctx->instance, "-n"); + add_func(&ctx->instance->notrace_funcs, + ctx->instance->filter_mod, optarg); + ctx->filtered = 1; + break; + case 'g': + check_instance_die(ctx->instance, "-g"); + add_func(&graph_funcs, ctx->instance->filter_mod, optarg); + ctx->filtered = 1; + break; + case 'p': + check_instance_die(ctx->instance, "-p"); + if (ctx->instance->plugin) + die("only one plugin allowed"); + for (plugin = optarg; isspace(*plugin); plugin++) + ; + ctx->instance->plugin = plugin; + for (optarg += strlen(optarg) - 1; + optarg > plugin && isspace(*optarg); optarg--) + ; + optarg++; + optarg[0] = '\0'; + break; + case 'D': + ctx->total_disable = 1; + /* fall through */ + case 'd': + ctx->disable = 1; + break; + case 'o': + cmd_check_die(ctx, CMD_set, *(argv+1), "-o"); + if (IS_RECORD_AGENT(ctx)) + die("-o incompatible with agent recording"); + if (host) + die("-o incompatible with -N"); + if (IS_START(ctx)) + die("start does not take output\n" + "Did you mean 'record'?"); + if (IS_STREAM(ctx)) + die("stream does not take output\n" + "Did you mean 'record'?"); + if (ctx->output) + die("only one output file allowed"); + ctx->output = optarg; + + if (IS_PROFILE(ctx)) { + int fd; + + /* pipe the output to this file instead of stdout */ + save_stdout = dup(1); + close(1); + fd = open(optarg, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + die("can't write to %s", optarg); + if (fd != 1) { + dup2(fd, 1); + close(fd); + } + } + break; + case 'O': + check_instance_die(ctx->instance, "-O"); + option = optarg; + save_option(ctx->instance, option); + break; + case 'T': + check_instance_die(ctx->instance, "-T"); + save_option(ctx->instance, "stacktrace"); + break; + case 'H': + cmd_check_die(ctx, CMD_set, *(argv+1), "-H"); + check_instance_die(ctx->instance, "-H"); + add_hook(ctx->instance, optarg); + ctx->events = 1; + break; + case 's': + cmd_check_die(ctx, CMD_set, *(argv+1), "-s"); + if (IS_EXTRACT(ctx)) { + if (optarg) + usage(argv); + recorder_flags |= TRACECMD_RECORD_SNAPSHOT; + break; + } + if (!optarg) + usage(argv); + sleep_time = atoi(optarg); + break; + case 'S': + cmd_check_die(ctx, CMD_set, *(argv+1), "-S"); + ctx->manual = 1; + /* User sets events for profiling */ + if (!event) + ctx->events = 0; + break; + case 'r': + cmd_check_die(ctx, CMD_set, *(argv+1), "-r"); + rt_prio = atoi(optarg); + break; + case 'N': + cmd_check_die(ctx, CMD_set, *(argv+1), "-N"); + if (!IS_RECORD(ctx)) + die("-N only available with record"); + if (IS_RECORD_AGENT(ctx)) + die("-N incompatible with agent recording"); + if (ctx->output) + die("-N incompatible with -o"); + host = optarg; + break; + case 'V': + cmd_check_die(ctx, CMD_set, *(argv+1), "-V"); + if (!IS_RECORD(ctx)) + die("-V only available with record"); + if (IS_RECORD_AGENT(ctx)) + die("-V incompatible with agent recording"); + if (ctx->output) + die("-V incompatible with -o"); + host = optarg; + ctx->instance->port_type = USE_VSOCK; + break; + case 'm': + if (max_kb) + die("-m can only be specified once"); + if (!IS_RECORD(ctx)) + die("only record take 'm' option"); + max_kb = atoi(optarg); + break; + case 'M': + check_instance_die(ctx->instance, "-M"); + ctx->instance->cpumask = alloc_mask_from_hex(ctx->instance, optarg); + break; + case 't': + cmd_check_die(ctx, CMD_set, *(argv+1), "-t"); + if (IS_EXTRACT(ctx)) + ctx->topt = 1; /* Extract top instance also */ + else + ctx->instance->port_type = USE_TCP; + break; + case 'b': + check_instance_die(ctx->instance, "-b"); + ctx->instance->buffer_size = atoi(optarg); + break; + case 'B': + ctx->instance = allocate_instance(optarg); + if (!ctx->instance) + die("Failed to create instance"); + ctx->instance->delete = negative; + negative = 0; + if (ctx->instance->delete) { + ctx->instance->next = del_list; + del_list = ctx->instance; + } else + add_instance(ctx->instance, local_cpu_count); + if (IS_PROFILE(ctx)) + ctx->instance->flags |= BUFFER_FL_PROFILE; + break; + case 'k': + cmd_check_die(ctx, CMD_set, *(argv+1), "-k"); + keep = 1; + break; + case 'i': + ignore_event_not_found = 1; + break; + case OPT_user: + ctx->user = strdup(optarg); + if (!ctx->user) + die("Failed to allocate user name"); + break; + case OPT_procmap: + cmd_check_die(ctx, CMD_start, *(argv+1), "--proc-map"); + cmd_check_die(ctx, CMD_set, *(argv+1), "--proc-map"); + check_instance_die(ctx->instance, "--proc-map"); + ctx->instance->get_procmap = 1; + break; + case OPT_date: + cmd_check_die(ctx, CMD_set, *(argv+1), "--date"); + ctx->date = 1; + if (ctx->data_flags & DATA_FL_OFFSET) + die("Can not use both --date and --ts-offset"); + ctx->data_flags |= DATA_FL_DATE; + break; + case OPT_funcstack: + func_stack = 1; + break; + case OPT_nosplice: + cmd_check_die(ctx, CMD_set, *(argv+1), "--nosplice"); + recorder_flags |= TRACECMD_RECORD_NOSPLICE; + break; + case OPT_nofifos: + cmd_check_die(ctx, CMD_set, *(argv+1), "--nofifos"); + no_fifos = true; + break; + case OPT_profile: + cmd_check_die(ctx, CMD_set, *(argv+1), "--profile"); + check_instance_die(ctx->instance, "--profile"); + handle_init = trace_init_profile; + ctx->instance->flags |= BUFFER_FL_PROFILE; + ctx->events = 1; + break; + case OPT_stderr: + /* if -o was used (for profile), ignore this */ + if (save_stdout >= 0) + break; + save_stdout = dup(1); + close(1); + dup2(2, 1); + break; + case OPT_bycomm: + cmd_check_die(ctx, CMD_set, *(argv+1), "--by-comm"); + trace_profile_set_merge_like_comms(); + break; + case OPT_tsoffset: + cmd_check_die(ctx, CMD_set, *(argv+1), "--ts-offset"); + ctx->date2ts = strdup(optarg); + if (ctx->data_flags & DATA_FL_DATE) + die("Can not use both --date and --ts-offset"); + ctx->data_flags |= DATA_FL_OFFSET; + break; + case OPT_max_graph_depth: + check_instance_die(ctx->instance, "--max-graph-depth"); + free(ctx->instance->max_graph_depth); + ctx->instance->max_graph_depth = strdup(optarg); + if (!ctx->instance->max_graph_depth) + die("Could not allocate option"); + break; + case OPT_cmdlines_size: + ctx->saved_cmdlines_size = atoi(optarg); + break; + case OPT_no_filter: + cmd_check_die(ctx, CMD_set, *(argv+1), "--no-filter"); + no_filter = true; + break; + case OPT_debug: + tracecmd_set_debug(true); + break; + case OPT_module: + check_instance_die(ctx->instance, "--module"); + if (ctx->instance->filter_mod) + add_func(&ctx->instance->filter_funcs, + ctx->instance->filter_mod, "*"); + ctx->instance->filter_mod = optarg; + ctx->filtered = 0; + break; + case OPT_tsyncinterval: + cmd_check_die(ctx, CMD_set, *(argv+1), "--tsync-interval"); + ctx->tsync_loop_interval = atoi(optarg); + break; + case OPT_fork: + if (!IS_START(ctx)) + die("--fork option used for 'start' command only"); + fork_process = true; + break; + case OPT_tsc2nsec: + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, + &ctx->tsc2nsec.mult); + if (ret) + die("TSC to nanosecond is not supported"); + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; + break; + case OPT_poll: + cmd_check_die(ctx, CMD_set, *(argv+1), "--poll"); + recorder_flags |= TRACECMD_RECORD_POLL; + break; + case OPT_compression: + cmd_check_die(ctx, CMD_start, *(argv+1), "--compression"); + cmd_check_die(ctx, CMD_set, *(argv+1), "--compression"); + cmd_check_die(ctx, CMD_extract, *(argv+1), "--compression"); + cmd_check_die(ctx, CMD_stream, *(argv+1), "--compression"); + cmd_check_die(ctx, CMD_profile, *(argv+1), "--compression"); + if (strcmp(optarg, "any") && strcmp(optarg, "none") && + !tracecmd_compress_is_supported(optarg, NULL)) + die("Compression algorithm %s is not supported", optarg); + ctx->compression = strdup(optarg); + break; + case OPT_file_ver: + if (ctx->curr_cmd != CMD_record && ctx->curr_cmd != CMD_record_agent) + die("--file_version has no effect with the command %s\n", + *(argv+1)); + ctx->file_version = atoi(optarg); + if (ctx->file_version < FILE_VERSION_MIN || + ctx->file_version > FILE_VERSION_MAX) + die("Unsupported file version %d, " + "supported versions are from %d to %d", + ctx->file_version, FILE_VERSION_MIN, FILE_VERSION_MAX); + break; + case OPT_quiet: + case 'q': + quiet = true; + break; + case OPT_verbose: + if (trace_set_verbose(optarg) < 0) + die("invalid verbose level %s", optarg); + break; + default: + usage(argv); + } + } + + remove_instances(del_list); + + /* If --date is specified, prepend it to all guest VM flags */ + if (ctx->date) { + struct buffer_instance *instance; + + for_all_instances(instance) { + if (is_guest(instance)) + add_argv(instance, "--date", true); + } + } + + if (!ctx->filtered && ctx->instance->filter_mod) + add_func(&ctx->instance->filter_funcs, + ctx->instance->filter_mod, "*"); + + if (do_children && !filter_task && !fpids_count) + die(" -c can only be used with -F (or -P with event-fork support)"); + + if ((argc - optind) >= 2) { + if (IS_EXTRACT(ctx)) + die("Command extract does not take any commands\n" + "Did you mean 'record'?"); + ctx->run_command = 1; + } + if (ctx->user && !ctx->run_command) + warning("--user %s is ignored, no command is specified", + ctx->user); + + if (top_instance.get_procmap) { + /* use ptrace to get procmap on the command exit */ + if (ctx->run_command) { + do_ptrace = 1; + } else if (!top_instance.nr_filter_pids) { + warning("--proc-map is ignored for top instance, " + "no command or filtered PIDs are specified."); + top_instance.get_procmap = 0; + } + } + + for_all_instances(instance) { + if (instance->get_procmap && !instance->nr_filter_pids) { + warning("--proc-map is ignored for instance %s, " + "no filtered PIDs are specified.", + tracefs_instance_get_name(instance->tracefs)); + instance->get_procmap = 0; + } + } +} + +static enum trace_type get_trace_cmd_type(enum trace_cmd cmd) +{ + const static struct { + enum trace_cmd cmd; + enum trace_type ttype; + } trace_type_per_command[] = { + {CMD_record, TRACE_TYPE_RECORD}, + {CMD_stream, TRACE_TYPE_STREAM}, + {CMD_extract, TRACE_TYPE_EXTRACT}, + {CMD_profile, TRACE_TYPE_STREAM}, + {CMD_start, TRACE_TYPE_START}, + {CMD_record_agent, TRACE_TYPE_RECORD}, + {CMD_set, TRACE_TYPE_SET} + }; + + for (int i = 0; i < ARRAY_SIZE(trace_type_per_command); i++) { + if (trace_type_per_command[i].cmd == cmd) + return trace_type_per_command[i].ttype; + } + + die("Trace type UNKNOWN for the given cmd_fun"); +} + +static void finalize_record_trace(struct common_record_context *ctx) +{ + struct buffer_instance *instance; + + if (keep) + return; + + update_reset_files(); + update_reset_triggers(); + if (clear_function_filters) + clear_func_filters(); + + set_plugin("nop"); + + tracecmd_remove_instances(); + + /* If tracing_on was enabled before we started, set it on now */ + for_all_instances(instance) { + if (instance->flags & BUFFER_FL_KEEP) + write_tracing_on(instance, + instance->tracing_on_init_val); + if (is_agent(instance)) { + tracecmd_msg_send_close_resp_msg(instance->msg_handle); + tracecmd_output_close(instance->network_handle); + } + } + + if (host) + tracecmd_output_close(ctx->instance->network_handle); +} + +static bool has_local_instances(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) { + if (is_guest(instance)) + continue; + if (host && instance->msg_handle) + continue; + return true; + } + return false; +} + +static void set_tsync_params(struct common_record_context *ctx) +{ + struct buffer_instance *instance; + int shift, mult; + bool force_tsc = false; + char *clock = NULL; + + if (!ctx->clock) { + /* + * If no clock is configured && + * KVM time sync protocol is available && + * there is information of each guest PID process && + * tsc-x86 clock is supported && + * TSC to nsec multiplier and shift are available: + * force using the x86-tsc clock for this host-guest tracing session + * and store TSC to nsec multiplier and shift. + */ + if (tsync_proto_is_supported("kvm") && + trace_have_guests_pid() && + clock_is_supported(NULL, TSC_CLOCK) && + !get_tsc_nsec(&shift, &mult) && mult) { + clock = strdup(TSC_CLOCK); + if (!clock) + die("Cannot not allocate clock"); + ctx->tsc2nsec.mult = mult; + ctx->tsc2nsec.shift = shift; + force_tsc = true; + } else { /* Use the current clock of the first host instance */ + clock = get_trace_clock(true); + } + } else { + clock = strdup(ctx->clock); + if (!clock) + die("Cannot not allocate clock"); + } + + if (!clock && !ctx->tsync_loop_interval) + goto out; + for_all_instances(instance) { + if (clock && !(instance->flags & BUFFER_FL_HAS_CLOCK)) { + /* use the same clock in all tracing peers */ + if (is_guest(instance)) { + if (!instance->clock) { + instance->clock = strdup(clock); + if (!instance->clock) + die("Can not allocate instance clock"); + } + add_argv(instance, (char *)instance->clock, true); + add_argv(instance, "-C", true); + if (ctx->tsc2nsec.mult) + instance->flags |= BUFFER_FL_TSC2NSEC; + } else if (force_tsc && !instance->clock) { + instance->clock = strdup(clock); + if (!instance->clock) + die("Can not allocate instance clock"); + } + } + instance->tsync_loop_interval = ctx->tsync_loop_interval; + } +out: + free(clock); +} + +static void record_trace(int argc, char **argv, + struct common_record_context *ctx) +{ + enum trace_type type = get_trace_cmd_type(ctx->curr_cmd); + struct buffer_instance *instance; + struct filter_pids *pid; + + /* + * If top_instance doesn't have any plugins or events, then + * remove it from being processed. + */ + if (!__check_doing_something(&top_instance) && !filter_task) + first_instance = buffer_instances; + else + ctx->topt = 1; + + update_first_instance(ctx->instance, ctx->topt); + if (!IS_CMDSET(ctx)) { + check_doing_something(); + check_function_plugin(); + } + + if (!ctx->output) + ctx->output = DEFAULT_INPUT_FILE; + + if (ctx->data_flags & DATA_FL_GUEST) + set_tsync_params(ctx); + + make_instances(); + + /* Save the state of tracing_on before starting */ + for_all_instances(instance) { + instance->output_file = strdup(ctx->output); + if (!instance->output_file) + die("Failed to allocate output file name for instance"); + if (!ctx->manual && instance->flags & BUFFER_FL_PROFILE) + enable_profile(instance); + + instance->tracing_on_init_val = read_tracing_on(instance); + /* Some instances may not be created yet */ + if (instance->tracing_on_init_val < 0) + instance->tracing_on_init_val = 1; + } + + if (ctx->events) + expand_event_list(); + + page_size = getpagesize(); + + if (!is_guest(ctx->instance)) + fset = set_ftrace(ctx->instance, !ctx->disable, ctx->total_disable); + if (!IS_CMDSET(ctx)) + tracecmd_disable_all_tracing(1); + + for_all_instances(instance) + set_clock(ctx, instance); + + + /* Record records the date first */ + if (ctx->date && + ((IS_RECORD(ctx) && has_local_instances()) || IS_RECORD_AGENT(ctx))) + ctx->date2ts = get_date_to_ts(); + + for_all_instances(instance) { + set_funcs(instance); + set_mask(instance); + } + + if (ctx->events) { + for_all_instances(instance) + enable_events(instance); + } + + set_saved_cmdlines_size(ctx); + set_buffer_size(); + update_plugins(type); + set_options(); + + for_all_instances(instance) { + if (instance->max_graph_depth) { + set_max_graph_depth(instance, instance->max_graph_depth); + free(instance->max_graph_depth); + instance->max_graph_depth = NULL; + } + } + + allocate_seq(); + + if (type & (TRACE_TYPE_RECORD | TRACE_TYPE_STREAM)) { + signal(SIGINT, finish); + if (!latency) + start_threads(type, ctx); + } + + if (ctx->run_command) { + run_cmd(type, ctx->user, (argc - optind) - 1, &argv[optind + 1]); + } else if (ctx->instance && is_agent(ctx->instance)) { + update_task_filter(); + tracecmd_enable_tracing(); + tracecmd_msg_wait_close(ctx->instance->msg_handle); + } else { + bool pwait = false; + bool wait_indefinitely = false; + + update_task_filter(); + + if (!IS_CMDSET(ctx)) + tracecmd_enable_tracing(); + + if (type & (TRACE_TYPE_START | TRACE_TYPE_SET)) + exit(0); + + /* We don't ptrace ourself */ + if (do_ptrace) { + for_all_instances(instance) { + for (pid = instance->filter_pids; pid; pid = pid->next) { + if (!pid->exclude && instance->ptrace_child) { + ptrace_attach(instance, pid->pid); + pwait = true; + } + } + } + } + /* sleep till we are woken with Ctrl^C */ + printf("Hit Ctrl^C to stop recording\n"); + for_all_instances(instance) { + /* If an instance is not tracing individual processes + * or there is an error while waiting for a process to + * exit, fallback to waiting indefinitely. + */ + if (!instance->nr_process_pids || + trace_wait_for_processes(instance)) + wait_indefinitely = true; + } + while (!finished && wait_indefinitely) + trace_or_sleep(type, pwait); + } + + tell_guests_to_stop(ctx); + tracecmd_disable_tracing(); + if (!latency) + stop_threads(type); + + record_stats(); + + if (!latency) + wait_threads(); + + if (IS_RECORD(ctx)) { + record_data(ctx); + delete_thread_data(); + } else + print_stats(); + + if (!keep) + tracecmd_disable_all_tracing(0); + + destroy_stats(); + finalize_record_trace(ctx); +} + +/* + * This function contains common code for the following commands: + * record, start, stream, profile. + */ +static void record_trace_command(int argc, char **argv, + struct common_record_context *ctx) +{ + tracecmd_tsync_init(); + record_trace(argc, argv, ctx); +} + +void trace_start(int argc, char **argv) +{ + struct common_record_context ctx; + + parse_record_options(argc, argv, CMD_start, &ctx); + record_trace_command(argc, argv, &ctx); + exit(0); +} + +void trace_set(int argc, char **argv) +{ + struct common_record_context ctx; + + parse_record_options(argc, argv, CMD_set, &ctx); + record_trace_command(argc, argv, &ctx); + exit(0); +} + +void trace_extract(int argc, char **argv) +{ + struct common_record_context ctx; + struct buffer_instance *instance; + enum trace_type type; + + parse_record_options(argc, argv, CMD_extract, &ctx); + + type = get_trace_cmd_type(ctx.curr_cmd); + + update_first_instance(ctx.instance, 1); + check_function_plugin(); + + if (!ctx.output) + ctx.output = DEFAULT_INPUT_FILE; + + /* Save the state of tracing_on before starting */ + for_all_instances(instance) { + instance->output_file = strdup(ctx.output); + if (!instance->output_file) + die("Failed to allocate output file name for instance"); + + if (!ctx.manual && instance->flags & BUFFER_FL_PROFILE) + enable_profile(ctx.instance); + + instance->tracing_on_init_val = read_tracing_on(instance); + /* Some instances may not be created yet */ + if (instance->tracing_on_init_val < 0) + instance->tracing_on_init_val = 1; + } + + /* Extracting data records all events in the system. */ + if (!ctx.record_all) + record_all_events(); + + if (ctx.events) + expand_event_list(); + + page_size = getpagesize(); + update_plugins(type); + set_options(); + + for_all_instances(instance) { + if (instance->max_graph_depth) { + set_max_graph_depth(instance, instance->max_graph_depth); + free(instance->max_graph_depth); + instance->max_graph_depth = NULL; + } + } + + allocate_seq(); + flush_threads(); + record_stats(); + + if (!keep) + tracecmd_disable_all_tracing(0); + + /* extract records the date after extraction */ + if (ctx.date) { + /* + * We need to start tracing, don't let other traces + * screw with our trace_marker. + */ + tracecmd_disable_all_tracing(1); + ctx.date2ts = get_date_to_ts(); + } + + record_data(&ctx); + delete_thread_data(); + destroy_stats(); + finalize_record_trace(&ctx); + exit(0); +} + +void trace_stream(int argc, char **argv) +{ + struct common_record_context ctx; + + parse_record_options(argc, argv, CMD_stream, &ctx); + record_trace_command(argc, argv, &ctx); + exit(0); +} + +void trace_profile(int argc, char **argv) +{ + struct common_record_context ctx; + + parse_record_options(argc, argv, CMD_profile, &ctx); + + handle_init = trace_init_profile; + ctx.events = 1; + + /* + * If no instances were set, then enable profiling on the top instance. + */ + if (!buffer_instances) + top_instance.flags |= BUFFER_FL_PROFILE; + + record_trace_command(argc, argv, &ctx); + do_trace_profile(); + exit(0); +} + +void trace_record(int argc, char **argv) +{ + struct common_record_context ctx; + + parse_record_options(argc, argv, CMD_record, &ctx); + record_trace_command(argc, argv, &ctx); + exit(0); +} + +int trace_record_agent(struct tracecmd_msg_handle *msg_handle, + int cpus, int *fds, + int argc, char **argv, + bool use_fifos, + unsigned long long trace_id, const char *host) +{ + struct common_record_context ctx; + char **argv_plus; + + /* Reset optind for getopt_long */ + optind = 1; + /* + * argc is the number of elements in argv, but we need to convert + * argc and argv into "trace-cmd", "record", argv. + * where argc needs to grow by two. + */ + argv_plus = calloc(argc + 2, sizeof(char *)); + if (!argv_plus) + die("Failed to allocate record arguments"); + + argv_plus[0] = "trace-cmd"; + argv_plus[1] = "record"; + memmove(argv_plus + 2, argv, argc * sizeof(char *)); + argc += 2; + + parse_record_options(argc, argv_plus, CMD_record_agent, &ctx); + if (ctx.run_command) + return -EINVAL; + + ctx.instance->fds = fds; + ctx.instance->use_fifos = use_fifos; + ctx.instance->flags |= BUFFER_FL_AGENT; + ctx.instance->msg_handle = msg_handle; + ctx.instance->host = host; + msg_handle->version = V3_PROTOCOL; + top_instance.trace_id = trace_id; + record_trace(argc, argv, &ctx); + + free(argv_plus); + return 0; +} diff --git a/tracecmd/trace-restore.c b/tracecmd/trace-restore.c new file mode 100644 index 00000000..5bf29c52 --- /dev/null +++ b/tracecmd/trace-restore.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#define _LARGEFILE64_SOURCE +#include <dirent.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <stdarg.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <signal.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> + +#include "trace-local.h" + +static struct tracecmd_output *create_output(const char *file, + const char *tracing_dir, const char *kallsyms) +{ + struct tracecmd_output *out; + + out = tracecmd_output_create(file); + if (!out) + goto error; + + if (tracing_dir && tracecmd_output_set_trace_dir(out, tracing_dir)) + goto error; + if (kallsyms && tracecmd_output_set_kallsyms(out, kallsyms)) + goto error; + if (tracecmd_output_write_headers(out, NULL)) + goto error; + return out; +error: + if (out) + tracecmd_output_close(out); + unlink(file); + return NULL; +} + +void trace_restore (int argc, char **argv) +{ + struct tracecmd_output *handle; + const char *output_file = DEFAULT_INPUT_FILE; + const char *output = NULL; + const char *input = NULL; + const char *tracing_dir = NULL; + const char *kallsyms = NULL; + struct stat st1; + struct stat st2; + int first_arg; + int create_only = 0; + int args; + int c; + + if (argc < 2) + usage(argv); + + if (strcmp(argv[1], "restore") != 0) + usage(argv); + + while ((c = getopt(argc-1, argv+1, "+hco:i:t:k:")) >= 0) { + switch (c) { + case 'h': + usage(argv); + break; + case 'c': + if (input) + die("-c and -i are incompatible"); + create_only = 1; + /* make output default to partial */ + output_file = "trace-partial.dat"; + break; + + case 't': + tracing_dir = optarg; + break; + case 'k': + kallsyms = optarg; + break; + case 'o': + if (output) + die("only one output file allowed"); + output = optarg; + break; + + case 'i': + if (input) + die("only one input file allowed"); + if (create_only) + die("-c and -i are incompatible"); + input = optarg; + break; + + default: + usage(argv); + } + } + + if (!output) + output = output_file; + + if ((argc - optind) <= 1) { + if (!create_only) { + warning("No data files found"); + usage(argv); + } + + handle = create_output(output, tracing_dir, kallsyms); + if (!handle) + die("Unabled to create output file %s", output); + if (tracecmd_write_cmdlines(handle) < 0) + die("Failed to write command lines"); + tracecmd_output_close(handle); + exit(0); + } + first_arg = optind + 1; + args = argc - first_arg; + printf("first = %d %s args=%d\n", first_arg, argv[first_arg], args); + + /* Make sure input and output are not the same file */ + if (input && output) { + if (stat(input, &st1) < 0) + die("%s:", input); + /* output exists? otherwise we don't care */ + if (stat(output, &st2) == 0) { + if (st1.st_ino == st2.st_ino && + st1.st_dev == st2.st_dev) + die("input and output file are the same"); + } + } + + if (input) { + struct tracecmd_input *ihandle; + + ihandle = tracecmd_alloc(input, 0); + if (!ihandle) + die("error reading file %s", input); + /* make sure headers are ok */ + if (tracecmd_read_headers(ihandle, TRACECMD_FILE_CMD_LINES) < 0) + die("error reading file %s headers", input); + + handle = tracecmd_copy(ihandle, output, TRACECMD_FILE_CMD_LINES, 0, NULL); + tracecmd_close(ihandle); + } else { + handle = tracecmd_output_create(output); + tracecmd_output_write_headers(handle, NULL); + } + + if (!handle) + die("error writing to %s", output); + + if (tracecmd_append_cpu_data(handle, args, &argv[first_arg]) < 0) + die("failed to append data"); + + return; +} diff --git a/tracecmd/trace-setup-guest.c b/tracecmd/trace-setup-guest.c new file mode 100644 index 00000000..f20b48e2 --- /dev/null +++ b/tracecmd/trace-setup-guest.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 VMware Inc, Slavomir Kaslev <kaslevs@vmware.com> + * + */ + +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <grp.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "trace-local.h" +#include "trace-msg.h" + +static int make_dir(const char *path, mode_t mode) +{ + char buf[PATH_MAX+2], *p; + + strncpy(buf, path, sizeof(buf)); + if (buf[PATH_MAX]) + return -E2BIG; + + for (p = buf; *p; p++) { + p += strspn(p, "/"); + p += strcspn(p, "/"); + *p = '\0'; + if (mkdir(buf, mode) < 0 && errno != EEXIST) + return -errno; + *p = '/'; + } + + return 0; +} + +static int make_fifo(const char *path, mode_t mode) +{ + struct stat st; + + if (!stat(path, &st)) { + if (S_ISFIFO(st.st_mode)) + return 0; + return -EEXIST; + } + + if (mkfifo(path, mode)) + return -errno; + return 0; +} + +static int make_guest_dir(const char *guest) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), GUEST_DIR_FMT, guest); + return make_dir(path, 0750); +} + +static int make_guest_fifo(const char *guest, int cpu, mode_t mode) +{ + static const char *exts[] = {".in", ".out"}; + char path[PATH_MAX]; + int i, ret = 0; + + for (i = 0; i < ARRAY_SIZE(exts); i++) { + snprintf(path, sizeof(path), GUEST_FIFO_FMT "%s", + guest, cpu, exts[i]); + ret = make_fifo(path, mode); + if (ret < 0) + break; + } + + return ret; +} + +static int make_guest_fifos(const char *guest, int nr_cpus, mode_t mode) +{ + int i, ret = 0; + mode_t mask; + + mask = umask(0); + for (i = 0; i < nr_cpus; i++) { + ret = make_guest_fifo(guest, i, mode); + if (ret < 0) + break; + } + umask(mask); + + return ret; +} + +static int get_guest_cpu_count(const char *guest) +{ + const char *cmd_fmt = "virsh vcpucount --maximum '%s' 2>/dev/null"; + int nr_cpus = -1; + char cmd[1024]; + FILE *f; + + snprintf(cmd, sizeof(cmd), cmd_fmt, guest); + f = popen(cmd, "r"); + if (!f) + return -errno; + + fscanf(f, "%d", &nr_cpus); + pclose(f); + + return nr_cpus; +} + +static int attach_guest_fifos(const char *guest, int nr_cpus) +{ + const char *cmd_fmt = + "virsh attach-device --config '%s' '%s' >/dev/null 2>/dev/null"; + const char *xml_fmt = + "<channel type='pipe'>\n" + " <source path='%s'/>\n" + " <target type='virtio' name='%s%d'/>\n" + "</channel>"; + char tmp_path[PATH_MAX], path[PATH_MAX]; + char cmd[PATH_MAX], xml[PATH_MAX]; + int i, fd, ret = 0; + +#ifdef __ANDROID__ + strcpy(tmp_path, "/data/local/tmp/pipexmlXXXXXX"); +#else /* !__ANDROID__ */ + strcpy(tmp_path, "/tmp/pipexmlXXXXXX"); +#endif /* __ANDROID__ */ + + fd = mkstemp(tmp_path); + if (fd < 0) + return fd; + + for (i = 0; i < nr_cpus; i++) { + snprintf(path, sizeof(path), GUEST_FIFO_FMT, guest, i); + snprintf(xml, sizeof(xml), xml_fmt, path, GUEST_PIPE_NAME, i); + pwrite(fd, xml, strlen(xml), 0); + + snprintf(cmd, sizeof(cmd), cmd_fmt, guest, tmp_path); + errno = 0; + if (system(cmd) != 0) { + ret = -errno; + break; + } + } + + close(fd); + unlink(tmp_path); + + return ret; +} + +static void do_setup_guest(const char *guest, int nr_cpus, + mode_t mode, gid_t gid, bool attach) +{ + gid_t save_egid; + int ret; + + if (gid != -1) { + save_egid = getegid(); + ret = setegid(gid); + if (ret < 0) + die("failed to set effective group ID"); + } + + ret = make_guest_dir(guest); + if (ret < 0) + die("failed to create guest directory for %s", guest); + + ret = make_guest_fifos(guest, nr_cpus, mode); + if (ret < 0) + die("failed to create FIFOs for %s", guest); + + if (attach) { + ret = attach_guest_fifos(guest, nr_cpus); + if (ret < 0) + die("failed to attach FIFOs to %s", guest); + } + + if (gid != -1) { + ret = setegid(save_egid); + if (ret < 0) + die("failed to restore effective group ID"); + } +} + +void trace_setup_guest(int argc, char **argv) +{ + bool attach = false; + struct group *group; + mode_t mode = 0660; + int nr_cpus = -1; + gid_t gid = -1; + char *guest; + + if (argc < 2) + usage(argv); + + if (strcmp(argv[1], "setup-guest") != 0) + usage(argv); + + for (;;) { + int c, option_index = 0; + static struct option long_options[] = { + {"help", no_argument, NULL, '?'}, + {NULL, 0, NULL, 0} + }; + + c = getopt_long(argc-1, argv+1, "+hc:p:g:a", + long_options, &option_index); + if (c == -1) + break; + switch (c) { + case 'h': + usage(argv); + break; + case 'c': + nr_cpus = atoi(optarg); + break; + case 'p': + mode = strtol(optarg, NULL, 8); + break; + case 'g': + group = getgrnam(optarg); + if (!group) + die("group %s does not exist", optarg); + gid = group->gr_gid; + break; + case 'a': + attach = true; + break; + default: + usage(argv); + } + } + + if (optind != argc-2) + usage(argv); + + guest = argv[optind+1]; + + if (nr_cpus <= 0) + nr_cpus = get_guest_cpu_count(guest); + + if (nr_cpus <= 0) + die("invalid number of cpus for guest %s", guest); + + do_setup_guest(guest, nr_cpus, mode, gid, attach); +} diff --git a/tracecmd/trace-show.c b/tracecmd/trace-show.c new file mode 100644 index 00000000..eb328527 --- /dev/null +++ b/tracecmd/trace-show.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdlib.h> +#include <getopt.h> +#include <errno.h> + +#include "tracefs.h" +#include "trace-local.h" + +enum { + OPT_tracing_on = 255, + OPT_current_tracer = 254, + OPT_buffer_size_kb = 253, + OPT_buffer_total_size_kb = 252, + OPT_ftrace_filter = 251, + OPT_ftrace_notrace = 250, + OPT_ftrace_pid = 249, + OPT_graph_function = 248, + OPT_graph_notrace = 247, + OPT_cpumask = 246, +}; + +void trace_show(int argc, char **argv) +{ + const char *buffer = NULL; + const char *file = "trace"; + const char *cpu = NULL; + struct buffer_instance *instance = &top_instance; + char cpu_path[128]; + char *path; + int snap = 0; + int pipe = 0; + int show_name = 0; + int option_index = 0; + int stop = 0; + int c; + static struct option long_options[] = { + {"tracing_on", no_argument, NULL, OPT_tracing_on}, + {"current_tracer", no_argument, NULL, OPT_current_tracer}, + {"buffer_size", no_argument, NULL, OPT_buffer_size_kb}, + {"buffer_total_size", no_argument, NULL, OPT_buffer_total_size_kb}, + {"ftrace_filter", no_argument, NULL, OPT_ftrace_filter}, + {"ftrace_notrace", no_argument, NULL, OPT_ftrace_notrace}, + {"ftrace_pid", no_argument, NULL, OPT_ftrace_pid}, + {"graph_function", no_argument, NULL, OPT_graph_function}, + {"graph_notrace", no_argument, NULL, OPT_graph_notrace}, + {"cpumask", no_argument, NULL, OPT_cpumask}, + {"help", no_argument, NULL, '?'}, + {NULL, 0, NULL, 0} + }; + + init_top_instance(); + + while ((c = getopt_long(argc-1, argv+1, "B:c:fsp", + long_options, &option_index)) >= 0) { + switch (c) { + case 'h': + usage(argv); + break; + case 'B': + if (buffer) + die("Can only show one buffer at a time"); + buffer = optarg; + instance = allocate_instance(optarg); + if (!instance) + die("Failed to create instance"); + break; + case 'c': + if (cpu) + die("Can only show one CPU at a time"); + cpu = optarg; + break; + case 'f': + show_name = 1; + break; + case 's': + snap = 1; + if (pipe) + die("Can not have -s and -p together"); + break; + case 'p': + pipe = 1; + if (snap) + die("Can not have -s and -p together"); + break; + case OPT_tracing_on: + show_instance_file(instance, "tracing_on"); + stop = 1; + break; + case OPT_current_tracer: + show_instance_file(instance, "current_tracer"); + stop = 1; + break; + case OPT_buffer_size_kb: + show_instance_file(instance, "buffer_size_kb"); + stop = 1; + break; + case OPT_buffer_total_size_kb: + show_instance_file(instance, "buffer_total_size_kb"); + stop = 1; + break; + case OPT_ftrace_filter: + show_instance_file(instance, "set_ftrace_filter"); + stop = 1; + break; + case OPT_ftrace_notrace: + show_instance_file(instance, "set_ftrace_notrace"); + stop = 1; + break; + case OPT_ftrace_pid: + show_instance_file(instance, "set_ftrace_pid"); + stop = 1; + break; + case OPT_graph_function: + show_instance_file(instance, "set_graph_function"); + stop = 1; + break; + case OPT_graph_notrace: + show_instance_file(instance, "set_graph_notrace"); + stop = 1; + break; + case OPT_cpumask: + show_instance_file(instance, "tracing_cpumask"); + stop = 1; + break; + default: + usage(argv); + } + } + if (stop) + exit(0); + if (pipe) + file = "trace_pipe"; + else if (snap) + file = "snapshot"; + + if (cpu) { + char *endptr; + long val; + + errno = 0; + val = strtol(cpu, &endptr, 0); + if (errno || cpu == endptr) + die("Invalid CPU index '%s'", cpu); + snprintf(cpu_path, 128, "per_cpu/cpu%ld/%s", val, file); + file = cpu_path; + } + + if (buffer) { + int ret; + + ret = asprintf(&path, "instances/%s/%s", buffer, file); + if (ret < 0) + die("Failed to allocate instance path %s", file); + file = path; + } + + if (show_name) { + char *name; + name = tracefs_get_tracing_file(file); + printf("%s\n", name); + tracefs_put_tracing_file(name); + } + show_file(file); + if (buffer) + free(path); + + return; +} diff --git a/tracecmd/trace-snapshot.c b/tracecmd/trace-snapshot.c new file mode 100644 index 00000000..34630b4f --- /dev/null +++ b/tracecmd/trace-snapshot.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2013 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "tracefs.h" +#include "trace-local.h" + +static void write_file(const char *name, char *val) +{ + char *path; + int fd; + ssize_t n; + + path = tracefs_get_tracing_file(name); + fd = open(path, O_WRONLY); + if (fd < 0) + die("writing %s", path); + + n = write(fd, val, strlen(val)); + if (n < 0) + die("failed to write to %s\n", path); + + tracefs_put_tracing_file(path); + close(fd); +} + +void trace_snapshot (int argc, char **argv) +{ + const char *buffer = NULL; + const char *file = "snapshot"; + struct stat st; + char *name; + char cpu_path[128]; + int take_snap = 0; + int reset_snap = 0; + int free_snap = 0; + int cpu = -1; + int ret; + int c; + + if (argc < 2) + usage(argv); + + if (strcmp(argv[1], "snapshot") != 0) + usage(argv); + + while ((c = getopt(argc-1, argv+1, "srfB:c:")) >= 0) { + switch (c) { + case 'h': + usage(argv); + break; + case 's': + take_snap = 1; + if (free_snap) + die("can't take snapshot and free it at the same time"); + break; + case 'f': + free_snap = 1; + if (take_snap) + die("can't take snapshot and free it at the same time"); + break; + case 'r': + reset_snap = 1; + break; + case 'B': + if (buffer) + die("Can only do one buffer at a time"); + buffer = optarg; + break; + case 'c': + if (cpu >= 0) + die("Can only do one CPU (or all) at a time"); + cpu = atoi(optarg); + break; + default: + usage(argv); + } + } + + if (cpu >= 0) { + snprintf(cpu_path, 128, "per_cpu/cpu%d/%s", cpu, file); + file = cpu_path; + } + + name = tracefs_get_tracing_file(file); + ret = stat(name, &st); + if (ret < 0) + die("Snapshot feature is not supported by this kernel"); + tracefs_put_tracing_file(name); + + if (!reset_snap && !take_snap && !free_snap) { + show_file(file); + exit(0); + } + + if (reset_snap) + write_file(file, "2"); + + if (free_snap) + write_file(file, "0"); + + if (take_snap) + write_file(file, "1"); +} diff --git a/tracecmd/trace-split.c b/tracecmd/trace-split.c new file mode 100644 index 00000000..83c5402c --- /dev/null +++ b/tracecmd/trace-split.c @@ -0,0 +1,556 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#define _LARGEFILE64_SOURCE +#include <dirent.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <libgen.h> +#include <getopt.h> +#include <stdarg.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> + +#include "trace-local.h" + +static unsigned int page_size; +static const char *default_input_file = DEFAULT_INPUT_FILE; +static const char *input_file; + +enum split_types { + SPLIT_NONE, + /* The order of these must be reverse of the case statement in the options */ + SPLIT_SECONDS, + SPLIT_MSECS, + SPLIT_USECS, + SPLIT_EVENTS, + SPLIT_PAGES, + SPLIT_NR_TYPES, +}; + +struct cpu_data { + unsigned long long ts; + unsigned long long offset; + struct tep_record *record; + int cpu; + int fd; + int index; + void *commit; + void *page; + char *file; +}; + +static int create_type_len(struct tep_handle *pevent, int time, int len) +{ + static int bigendian = -1; + char *ptr; + int test; + + if (bigendian < 0) { + test = 0x4321; + ptr = (char *)&test; + if (*ptr == 0x21) + bigendian = 0; + else + bigendian = 1; + } + + if (tep_is_file_bigendian(pevent)) + time |= (len << 27); + else + time = (time << 5) | len; + + return tep_read_number(pevent, &time, 4); +} + +static int write_record(struct tracecmd_input *handle, + struct tep_record *record, + struct cpu_data *cpu_data, + enum split_types type) +{ + unsigned long long diff; + struct tep_handle *pevent; + void *page; + int len = 0; + char *ptr; + int index = 0; + int time; + + page = cpu_data->page; + + pevent = tracecmd_get_tep(handle); + + ptr = page + cpu_data->index; + + diff = record->ts - cpu_data->ts; + if (diff > (1 << 27)) { + /* Add a time stamp */ + len = RINGBUF_TYPE_TIME_EXTEND; + time = (unsigned int)(diff & ((1ULL << 27) - 1)); + time = create_type_len(pevent, time, len); + *(unsigned *)ptr = time; + ptr += 4; + time = (unsigned int)(diff >> 27); + *(unsigned *)ptr = tep_read_number(pevent, &time, 4); + cpu_data->ts = record->ts; + cpu_data->index += 8; + return 0; + } + + if (record->size && (record->size <= 28 * 4)) + len = record->size / 4; + + time = (unsigned)diff; + time = create_type_len(pevent, time, len); + + memcpy(ptr, &time, 4); + ptr += 4; + index = 4; + + if (!len) { + len = record->size + 4; + if ((len + 4) > record->record_size) + die("Bad calculation of record len (expect:%d actual:%d)", + record->record_size, len + 4); + *(unsigned *)ptr = tep_read_number(pevent, &len, 4); + ptr += 4; + index += 4; + } + + len = (record->size + 3) & ~3; + index += len; + + memcpy(ptr, record->data, len); + + cpu_data->index += index; + cpu_data->ts = record->ts; + + return 1; +} + +static void write_page(struct tep_handle *pevent, + struct cpu_data *cpu_data, int long_size) +{ + if (long_size == 8) { + unsigned long long index = cpu_data->index - 16; + *(unsigned long long *)cpu_data->commit = + tep_read_number(pevent, &index, 8); + } else { + unsigned int index = cpu_data->index - 12; + *(unsigned int *)cpu_data->commit = + tep_read_number(pevent, &index, 4); + } + write(cpu_data->fd, cpu_data->page, page_size); +} + +static struct tep_record *read_record(struct tracecmd_input *handle, + int percpu, int *cpu) +{ + if (percpu) + return tracecmd_read_data(handle, *cpu); + + return tracecmd_read_next_data(handle, cpu); +} + +static void set_cpu_time(struct tracecmd_input *handle, + int percpu, unsigned long long start, int cpu, int cpus) +{ + if (percpu) { + tracecmd_set_cpu_to_timestamp(handle, cpu, start); + return; + } + + for (cpu = 0; cpu < cpus; cpu++) + tracecmd_set_cpu_to_timestamp(handle, cpu, start); + return; +} + +static int parse_cpu(struct tracecmd_input *handle, + struct cpu_data *cpu_data, + unsigned long long start, + unsigned long long end, + int count_limit, int percpu, int cpu, + enum split_types type) +{ + struct tep_record *record; + struct tep_handle *pevent; + void *ptr; + int page_size; + int long_size = 0; + int cpus; + int count = 0; + int pages = 0; + + cpus = tracecmd_cpus(handle); + + long_size = tracecmd_long_size(handle); + page_size = tracecmd_page_size(handle); + pevent = tracecmd_get_tep(handle); + + /* Force new creation of first page */ + if (percpu) { + cpu_data[cpu].index = page_size + 1; + cpu_data[cpu].page = NULL; + } else { + for (cpu = 0; cpu < cpus; cpu++) { + cpu_data[cpu].index = page_size + 1; + cpu_data[cpu].page = NULL; + } + } + + /* + * Get the cpu pointers up to the start of the + * start time stamp. + */ + + record = read_record(handle, percpu, &cpu); + + if (start) { + set_cpu_time(handle, percpu, start, cpu, cpus); + while (record && record->ts < start) { + tracecmd_free_record(record); + record = read_record(handle, percpu, &cpu); + } + } else if (record) + start = record->ts; + + while (record && (!end || record->ts <= end)) { + if (cpu_data[cpu].index + record->record_size > page_size) { + + if (type == SPLIT_PAGES && ++pages > count_limit) + break; + + if (cpu_data[cpu].page) + write_page(pevent, &cpu_data[cpu], long_size); + else { + cpu_data[cpu].page = malloc(page_size); + if (!cpu_data[cpu].page) + die("Failed to allocate page"); + } + + memset(cpu_data[cpu].page, 0, page_size); + ptr = cpu_data[cpu].page; + + *(unsigned long long*)ptr = + tep_read_number(pevent, &(record->ts), 8); + cpu_data[cpu].ts = record->ts; + ptr += 8; + cpu_data[cpu].commit = ptr; + ptr += long_size; + cpu_data[cpu].index = 8 + long_size; + } + + cpu_data[cpu].offset = record->offset; + + if (write_record(handle, record, &cpu_data[cpu], type)) { + tracecmd_free_record(record); + record = read_record(handle, percpu, &cpu); + + /* if we hit the end of the cpu, clear the offset */ + if (!record) { + if (percpu) + cpu_data[cpu].offset = 0; + else + for (cpu = 0; cpu < cpus; cpu++) + cpu_data[cpu].offset = 0; + } + + switch (type) { + case SPLIT_NONE: + break; + case SPLIT_SECONDS: + if (record && + record->ts > + (start + (unsigned long long)count_limit * 1000000000ULL)) { + tracecmd_free_record(record); + record = NULL; + } + break; + case SPLIT_MSECS: + if (record && + record->ts > + (start + (unsigned long long)count_limit * 1000000ULL)) { + tracecmd_free_record(record); + record = NULL; + } + break; + case SPLIT_USECS: + if (record && + record->ts > + (start + (unsigned long long)count_limit * 1000ULL)) { + tracecmd_free_record(record); + record = NULL; + } + break; + case SPLIT_EVENTS: + if (++count >= count_limit) { + tracecmd_free_record(record); + record = NULL; + } + break; + default: + break; + } + } + } + + if (record) + tracecmd_free_record(record); + + if (percpu) { + if (cpu_data[cpu].page) { + write_page(pevent, &cpu_data[cpu], long_size); + free(cpu_data[cpu].page); + cpu_data[cpu].page = NULL; + } + } else { + for (cpu = 0; cpu < cpus; cpu++) { + if (cpu_data[cpu].page) { + write_page(pevent, &cpu_data[cpu], long_size); + free(cpu_data[cpu].page); + cpu_data[cpu].page = NULL; + } + } + } + + return 0; +} + +static double parse_file(struct tracecmd_input *handle, + const char *output_file, + unsigned long long start, + unsigned long long end, int percpu, int only_cpu, + int count, enum split_types type) +{ + unsigned long long current; + struct tracecmd_output *ohandle; + struct cpu_data *cpu_data; + struct tep_record *record; + char **cpu_list; + char *output; + char *base; + char *file; + char *dir; + int cpus; + int cpu; + int fd; + + output = strdup(output_file); + dir = dirname(output); + base = basename(output); + + ohandle = tracecmd_copy(handle, output_file, TRACECMD_FILE_CMD_LINES, 0, NULL); + + cpus = tracecmd_cpus(handle); + cpu_data = malloc(sizeof(*cpu_data) * cpus); + if (!cpu_data) + die("Failed to allocate cpu_data for %d cpus", cpus); + + for (cpu = 0; cpu < cpus; cpu++) { + int ret; + + ret = asprintf(&file, "%s/.tmp.%s.%d", dir, base, cpu); + if (ret < 0) + die("Failed to allocate file for %s %s %d", dir, base, cpu); + fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); + cpu_data[cpu].cpu = cpu; + cpu_data[cpu].fd = fd; + cpu_data[cpu].file = file; + cpu_data[cpu].offset = 0; + if (start) + tracecmd_set_cpu_to_timestamp(handle, cpu, start); + } + + if (only_cpu >= 0) { + parse_cpu(handle, cpu_data, start, end, count, + 1, only_cpu, type); + } else if (percpu) { + for (cpu = 0; cpu < cpus; cpu++) + parse_cpu(handle, cpu_data, start, + end, count, percpu, cpu, type); + } else + parse_cpu(handle, cpu_data, start, + end, count, percpu, -1, type); + + cpu_list = malloc(sizeof(*cpu_list) * cpus); + if (!cpu_list) + die("Failed to allocate cpu_list for %d cpus", cpus); + for (cpu = 0; cpu < cpus; cpu ++) + cpu_list[cpu] = cpu_data[cpu].file; + + tracecmd_set_out_clock(ohandle, tracecmd_get_trace_clock(handle)); + if (tracecmd_append_cpu_data(ohandle, cpus, cpu_list) < 0) + die("Failed to append tracing data\n"); + + current = end; + for (cpu = 0; cpu < cpus; cpu++) { + /* Set the tracecmd cursor to the next set of records */ + if (cpu_data[cpu].offset) { + record = tracecmd_read_at(handle, cpu_data[cpu].offset, NULL); + if (record && (!current || record->ts > current)) + current = record->ts + 1; + tracecmd_free_record(record); + } + unlink(cpu_data[cpu].file); + free(cpu_data[cpu].file); + } + free(cpu_data); + free(cpu_list); + free(output); + tracecmd_output_close(ohandle); + + return current; +} + +void trace_split (int argc, char **argv) +{ + struct tracecmd_input *handle; + unsigned long long start_ns = 0, end_ns = 0; + unsigned long long current; + double start, end; + char *endptr; + char *output = NULL; + char *output_file; + enum split_types split_type = SPLIT_NONE; + enum split_types type = SPLIT_NONE; + int count; + int repeat = 0; + int percpu = 0; + int cpu = -1; + int ac; + int c; + + if (strcmp(argv[1], "split") != 0) + usage(argv); + + while ((c = getopt(argc-1, argv+1, "+ho:i:s:m:u:e:p:rcC:")) >= 0) { + switch (c) { + case 'h': + usage(argv); + break; + case 'p': + type++; + case 'e': + type++; + case 'u': + type++; + case 'm': + type++; + case 's': + type++; + if (split_type != SPLIT_NONE) + die("Only one type of split is allowed"); + count = atoi(optarg); + if (count <= 0) + die("Units must be greater than 0"); + split_type = type; + + /* Spliting by pages only makes sense per cpu */ + if (type == SPLIT_PAGES) + percpu = 1; + break; + case 'r': + repeat = 1; + break; + case 'c': + percpu = 1; + break; + case 'C': + cpu = atoi(optarg); + break; + case 'o': + if (output) + die("only one output file allowed"); + output = strdup(optarg); + break; + case 'i': + input_file = optarg; + break; + default: + usage(argv); + } + } + + ac = (argc - optind); + + if (ac >= 2) { + optind++; + start = strtod(argv[optind], &endptr); + if (ac > 3) + usage(argv); + + /* Make sure a true start value was entered */ + if (*endptr != 0) + die("Start value not floating point: %s", argv[optind]); + + start_ns = (unsigned long long)(start * 1000000000.0); + optind++; + if (ac == 3) { + end = strtod(argv[optind], &endptr); + + /* Make sure a true end value was entered */ + if (*endptr != 0) + die("End value not floating point: %s", + argv[optind]); + + end_ns = (unsigned long long)(end * 1000000000.0); + if (end_ns < start_ns) + die("Error: end is less than start"); + } + } + + if (!input_file) + input_file = default_input_file; + + handle = tracecmd_open(input_file, 0); + if (!handle) + die("error reading %s", input_file); + + if (tracecmd_get_file_state(handle) == TRACECMD_FILE_CPU_LATENCY) + die("trace-cmd split does not work with latency traces\n"); + + page_size = tracecmd_page_size(handle); + + if (!output) + output = strdup(input_file); + + if (!repeat) { + output = realloc(output, strlen(output) + 3); + strcat(output, ".1"); + } + + current = start_ns; + output_file = malloc(strlen(output) + 50); + if (!output_file) + die("Failed to allocate for %s", output); + c = 1; + + do { + if (repeat) + sprintf(output_file, "%s.%04d", output, c++); + else + strcpy(output_file, output); + + current = parse_file(handle, output_file, start_ns, end_ns, + percpu, cpu, count, type); + if (!repeat) + break; + start_ns = 0; + } while (current && (!end_ns || current < end_ns)); + + free(output); + free(output_file); + + tracecmd_close(handle); + + return; +} diff --git a/tracecmd/trace-stack.c b/tracecmd/trace-stack.c new file mode 100644 index 00000000..80364949 --- /dev/null +++ b/tracecmd/trace-stack.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <getopt.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> + +#include "tracefs.h" +#include "trace-local.h" + +#define PROC_FILE "/proc/sys/kernel/stack_tracer_enabled" + +enum stack_type { + STACK_START, + STACK_STOP, + STACK_RESET, + STACK_REPORT +}; + +static void test_available(void) +{ + struct stat buf; + int fd; + + fd = stat(PROC_FILE, &buf); + if (fd < 0) + die("stack tracer not configured on running kernel"); +} + +/* NOTE: this implementation only accepts new_status in the range [0..9]. */ +static void change_stack_tracer_status(unsigned new_status) +{ + char buf[1]; + int status; + int ret; + int fd; + int n; + + if (new_status > 9) { + warning("invalid status %d\n", new_status); + return; + } + + ret = tracecmd_stack_tracer_status(&status); + if (ret < 0) + die("error reading %s", PROC_FILE); + + if (ret > 0 && status == new_status) + return; /* nothing to do */ + + fd = open(PROC_FILE, O_WRONLY); + if (fd < 0) + die("writing %s", PROC_FILE); + + buf[0] = new_status + '0'; + + n = write(fd, buf, 1); + if (n < 0) + die("writing into %s", PROC_FILE); + close(fd); +} + +static void start_trace(void) +{ + change_stack_tracer_status(1); +} + +static void stop_trace(void) +{ + change_stack_tracer_status(0); +} + +static void reset_trace(void) +{ + char *path; + char buf[1]; + int fd; + int n; + + path = tracefs_get_tracing_file("stack_max_size"); + fd = open(path, O_WRONLY); + if (fd < 0) + die("writing %s", path); + + buf[0] = '0'; + n = write(fd, buf, 1); + if (n < 0) + die("writing into %s", path); + tracefs_put_tracing_file(path); + close(fd); +} + +static void read_trace(void) +{ + char *buf = NULL; + int status; + char *path; + FILE *fp; + size_t n; + int r; + + if (tracecmd_stack_tracer_status(&status) <= 0) + die("Invalid stack tracer state"); + + if (status > 0) + printf("(stack tracer running)\n"); + else + printf("(stack tracer not running)\n"); + + path = tracefs_get_tracing_file("stack_trace"); + fp = fopen(path, "r"); + if (!fp) + die("reading to '%s'", path); + tracefs_put_tracing_file(path); + + while ((r = getline(&buf, &n, fp)) >= 0) { + /* + * Skip any line that starts with a '#'. + * Those talk about how to enable stack tracing + * within the debugfs system. We don't care about that. + */ + if (buf[0] != '#') + printf("%s", buf); + + free(buf); + buf = NULL; + } + + fclose(fp); +} + +enum { + OPT_verbose = 252, + OPT_reset = 253, + OPT_stop = 254, + OPT_start = 255, +}; + +void trace_stack (int argc, char **argv) +{ + enum stack_type trace_type = STACK_REPORT; + int c; + + if (argc < 2) + usage(argv); + + if (strcmp(argv[1], "stack") != 0) + usage(argv); + + for (;;) { + int option_index = 0; + static struct option long_options[] = { + {"start", no_argument, NULL, OPT_start}, + {"stop", no_argument, NULL, OPT_stop}, + {"reset", no_argument, NULL, OPT_reset}, + {"help", no_argument, NULL, '?'}, + {"verbose", optional_argument, NULL, OPT_verbose}, + {NULL, 0, NULL, 0} + }; + + c = getopt_long (argc-1, argv+1, "+h?", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'h': + usage(argv); + break; + case OPT_start: + trace_type = STACK_START; + break; + case OPT_stop: + trace_type = STACK_STOP; + break; + case OPT_reset: + trace_type = STACK_RESET; + break; + case OPT_verbose: + if (trace_set_verbose(optarg) < 0) + die("invalid verbose level %s", optarg); + break; + default: + usage(argv); + } + } + + test_available(); + + switch (trace_type) { + case STACK_START: + start_trace(); + break; + case STACK_STOP: + stop_trace(); + break; + case STACK_RESET: + reset_trace(); + break; + default: + read_trace(); + break; + } + + return; +} diff --git a/tracecmd/trace-stat.c b/tracecmd/trace-stat.c new file mode 100644 index 00000000..a5fb777b --- /dev/null +++ b/tracecmd/trace-stat.c @@ -0,0 +1,926 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <stdlib.h> +#include <stdio.h> +#include <getopt.h> +#include <unistd.h> +#include <fcntl.h> +#include <ctype.h> + +#include "tracefs.h" +#include "trace-local.h" + +#ifndef BUFSIZ +#define BUFSIZ 1024 +#endif + +static inline int is_top_instance(struct buffer_instance *instance) +{ + return instance == &top_instance; +} + +static int get_instance_file_fd(struct buffer_instance *instance, + const char *file) +{ + char *path; + int fd; + + path = tracefs_instance_get_file(instance->tracefs, file); + fd = open(path, O_RDONLY); + tracefs_put_tracing_file(path); + + return fd; +} + +char *strstrip(char *str) +{ + char *s; + + if (!str) + return NULL; + + s = str + strlen(str) - 1; + while (s >= str && isspace(*s)) + s--; + s++; + *s = '\0'; + + for (s = str; *s && isspace(*s); s++) + ; + + return s; +} + +/* FIXME: append_file() is duplicated and could be consolidated */ +char *append_file(const char *dir, const char *name) +{ + char *file; + int ret; + + ret = asprintf(&file, "%s/%s", dir, name); + if (ret < 0) + die("Failed to allocate %s/%s", dir, name); + + return file; +} + +static char *get_fd_content(int fd, const char *file) +{ + char *str = NULL; + int cnt = 0; + int ret; + + for (;;) { + str = realloc(str, BUFSIZ * ++cnt); + if (!str) + die("malloc"); + ret = read(fd, str + BUFSIZ * (cnt - 1), BUFSIZ); + if (ret < 0) + die("reading %s\n", file); + if (ret < BUFSIZ) + break; + } + str[BUFSIZ * (cnt-1) + ret] = 0; + + return str; +} + +char *get_file_content(const char *file) +{ + char *str; + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) + return NULL; + + str = get_fd_content(fd, file); + close(fd); + + return str; +} + +static char *get_instance_file_content(struct buffer_instance *instance, + const char *file) +{ + char *str = NULL; + int fd; + + fd = get_instance_file_fd(instance, file); + if (fd < 0) + return NULL; + + str = get_fd_content(fd, file); + + close(fd); + return str; +} + +static void report_file(struct buffer_instance *instance, + char *name, char *def_value, char *description) +{ + char *str; + char *cont; + + if (!tracefs_file_exists(instance->tracefs, name)) + return; + str = get_instance_file_content(instance, name); + if (!str) + return; + cont = strstrip(str); + if (cont[0] && strcmp(cont, def_value) != 0) + printf("\n%s%s\n", description, cont); + + free(str); +} + +static int report_instance(const char *name, void *data) +{ + bool *first = (bool *)data; + + if (*first) { + *first = false; + printf("\nInstances:\n"); + } + printf(" %s\n", name); + return 0; +} + +static void report_instances(void) +{ + bool first = true; + + tracefs_instances_walk(report_instance, &first); +} + +struct event_iter *trace_event_iter_alloc(const char *path) +{ + struct event_iter *iter; + + iter = malloc(sizeof(*iter)); + if (!iter) + die("Failed to allocate event_iter for path %s", path); + memset(iter, 0, sizeof(*iter)); + + iter->system_dir = opendir(path); + if (!iter->system_dir) + die("opendir"); + + return iter; +} + +enum event_iter_type +trace_event_iter_next(struct event_iter *iter, const char *path, const char *system) +{ + struct dirent *dent; + + if (system && !iter->event_dir) { + char *event; + struct stat st; + + event = append_file(path, system); + + stat(event, &st); + if (!S_ISDIR(st.st_mode)) { + free(event); + goto do_system; + } + + iter->event_dir = opendir(event); + if (!iter->event_dir) + die("opendir %s", event); + free(event); + } + + if (iter->event_dir) { + while ((dent = readdir(iter->event_dir))) { + const char *name = dent->d_name; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0) + continue; + + iter->event_dent = dent; + return EVENT_ITER_EVENT; + } + closedir(iter->event_dir); + iter->event_dir = NULL; + } + + do_system: + while ((dent = readdir(iter->system_dir))) { + const char *name = dent->d_name; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0) + continue; + + iter->system_dent = dent; + + return EVENT_ITER_SYSTEM; + } + + return EVENT_ITER_NONE; +} + +void trace_event_iter_free(struct event_iter *iter) +{ + if (!iter) + return; + + if (iter->event_dir) + closedir(iter->event_dir); + + closedir(iter->system_dir); + free(iter); +} + +static void reset_event_iter(struct event_iter *iter) +{ + if (iter->event_dir) { + closedir(iter->event_dir); + iter->event_dir = NULL; + } + + rewinddir(iter->system_dir); +} + +static int process_individual_events(const char *path, struct event_iter *iter) +{ + struct stat st; + const char *system = iter->system_dent->d_name; + char *file; + char *enable = NULL; + char *str; + int ret = 0; + + file = append_file(path, system); + + stat(file, &st); + if (!S_ISDIR(st.st_mode)) + goto out; + + enable = append_file(file, "enable"); + str = get_file_content(enable); + if (!str) + goto out; + + if (*str != '1' && *str != '0') + ret = 1; + free(str); + + out: + free(enable); + free(file); + + return ret; +} + +static void +process_event_enable(char *path, const char *system, const char *name, + enum event_process *processed) +{ + struct stat st; + char *enable = NULL; + char *file; + char *str; + + if (system) + path = append_file(path, system); + + file = append_file(path, name); + + if (system) + free(path); + + stat(file, &st); + if (!S_ISDIR(st.st_mode)) + goto out; + + enable = append_file(file, "enable"); + str = get_file_content(enable); + if (!str) + goto out; + + if (*str == '1') { + if (!system) { + if (!*processed) + printf(" Individual systems:\n"); + printf( " %s\n", name); + *processed = PROCESSED_SYSTEM; + } else { + if (!*processed) { + printf(" Individual events:\n"); + *processed = PROCESSED_SYSTEM; + } + if (*processed == PROCESSED_SYSTEM) { + printf(" %s\n", system); + *processed = PROCESSED_EVENT; + } + printf( " %s\n", name); + } + } + free(str); + + out: + free(enable); + free(file); +} + +static void report_events(struct buffer_instance *instance) +{ + struct event_iter *iter; + char *str; + char *cont; + char *path; + char *system; + enum event_iter_type type; + enum event_process processed = PROCESSED_NONE; + enum event_process processed_part = PROCESSED_NONE; + + str = get_instance_file_content(instance, "events/enable"); + if (!str) + return; + + cont = strstrip(str); + + printf("\nEvents:\n"); + + switch(*cont) { + case '1': + printf(" All enabled\n"); + free(str); + return; + case '0': + printf(" All disabled\n"); + free(str); + return; + } + + free(str); + + path = tracefs_instance_get_file(instance->tracefs, "events"); + if (!path) + die("malloc"); + + iter = trace_event_iter_alloc(path); + + while (trace_event_iter_next(iter, path, NULL)) { + process_event_enable(path, NULL, iter->system_dent->d_name, &processed); + } + + reset_event_iter(iter); + + system = NULL; + while ((type = trace_event_iter_next(iter, path, system))) { + + if (type == EVENT_ITER_SYSTEM) { + + /* Only process systems that are not fully enabled */ + if (!process_individual_events(path, iter)) + continue; + + system = iter->system_dent->d_name; + if (processed_part) + processed_part = PROCESSED_SYSTEM; + continue; + } + + process_event_enable(path, iter->system_dent->d_name, + iter->event_dent->d_name, &processed_part); + } + + trace_event_iter_free(iter); + + if (!processed && !processed_part) + printf(" (none enabled)\n"); + + tracefs_put_tracing_file(path); +} + +static void +process_event_filter(char *path, struct event_iter *iter, enum event_process *processed) +{ + const char *system = iter->system_dent->d_name; + const char *event = iter->event_dent->d_name; + struct stat st; + char *filter = NULL; + char *file; + char *str; + char *cont; + + path = append_file(path, system); + file = append_file(path, event); + free(path); + + stat(file, &st); + if (!S_ISDIR(st.st_mode)) + goto out; + + filter = append_file(file, "filter"); + str = get_file_content(filter); + if (!str) + goto out; + + cont = strstrip(str); + + if (strcmp(cont, "none") == 0) { + free(str); + goto out; + } + + if (!*processed) + printf("\nFilters:\n"); + printf( " %s:%s \"%s\"\n", system, event, cont); + *processed = PROCESSED_SYSTEM; + free(str); + + out: + free(filter); + free(file); +} + +static void report_event_filters(struct buffer_instance *instance) +{ + struct event_iter *iter; + char *path; + char *system; + enum event_iter_type type; + enum event_process processed = PROCESSED_NONE; + + path = tracefs_instance_get_file(instance->tracefs, "events"); + if (!path) + die("malloc"); + + iter = trace_event_iter_alloc(path); + + processed = PROCESSED_NONE; + system = NULL; + while ((type = trace_event_iter_next(iter, path, system))) { + + if (type == EVENT_ITER_SYSTEM) { + system = iter->system_dent->d_name; + continue; + } + + process_event_filter(path, iter, &processed); + } + + trace_event_iter_free(iter); + + tracefs_put_tracing_file(path); +} + +static void +process_event_trigger(char *path, struct event_iter *iter, enum event_process *processed) +{ + const char *system = iter->system_dent->d_name; + const char *event = iter->event_dent->d_name; + struct stat st; + char *trigger = NULL; + char *file; + char *str; + char *cont; + + path = append_file(path, system); + file = append_file(path, event); + free(path); + + stat(file, &st); + if (!S_ISDIR(st.st_mode)) + goto out; + + trigger = append_file(file, "trigger"); + str = get_file_content(trigger); + if (!str) + goto out; + + cont = strstrip(str); + + if (cont[0] == '#') { + free(str); + goto out; + } + + if (!*processed) + printf("\nTriggers:\n"); + printf( " %s:%s \"%s\"\n", system, event, cont); + *processed = PROCESSED_SYSTEM; + free(str); + + out: + free(trigger); + free(file); +} + +static void report_event_triggers(struct buffer_instance *instance) +{ + struct event_iter *iter; + char *path; + char *system; + enum event_iter_type type; + enum event_process processed = PROCESSED_NONE; + + path = tracefs_instance_get_file(instance->tracefs, "events"); + if (!path) + die("malloc"); + + iter = trace_event_iter_alloc(path); + + processed = PROCESSED_NONE; + system = NULL; + while ((type = trace_event_iter_next(iter, path, system))) { + + if (type == EVENT_ITER_SYSTEM) { + system = iter->system_dent->d_name; + continue; + } + + process_event_trigger(path, iter, &processed); + } + + trace_event_iter_free(iter); + + tracefs_put_tracing_file(path); +} + +enum func_states { + FUNC_STATE_START, + FUNC_STATE_SKIP, + FUNC_STATE_PRINT, +}; + +static void list_functions(const char *path, char *string) +{ + enum func_states state; + struct stat st; + char *str; + int ret = 0; + int len; + int i; + int first = 0; + + /* Ignore if it does not exist. */ + ret = stat(path, &st); + if (ret < 0) + return; + + str = get_file_content(path); + if (!str) + return; + + len = strlen(str); + + state = FUNC_STATE_START; + + /* Skip all lines that start with '#' */ + for (i = 0; i < len; i++) { + + if (state == FUNC_STATE_PRINT) + putchar(str[i]); + + if (str[i] == '\n') { + state = FUNC_STATE_START; + continue; + } + + if (state == FUNC_STATE_SKIP) + continue; + + if (state == FUNC_STATE_START && str[i] == '#') { + state = FUNC_STATE_SKIP; + continue; + } + + if (!first) { + printf("\n%s:\n", string); + first = 1; + } + + if (state != FUNC_STATE_PRINT) { + state = FUNC_STATE_PRINT; + printf(" "); + putchar(str[i]); + } + } + free(str); +} + +static void report_graph_funcs(struct buffer_instance *instance) +{ + char *path; + + path = tracefs_instance_get_file(instance->tracefs, "set_graph_function"); + if (!path) + die("malloc"); + + list_functions(path, "Function Graph Filter"); + + tracefs_put_tracing_file(path); + + path = tracefs_instance_get_file(instance->tracefs, "set_graph_notrace"); + if (!path) + die("malloc"); + + list_functions(path, "Function Graph No Trace"); + + tracefs_put_tracing_file(path); +} + +static void report_ftrace_filters(struct buffer_instance *instance) +{ + char *path; + + path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_filter"); + if (!path) + die("malloc"); + + list_functions(path, "Function Filter"); + + tracefs_put_tracing_file(path); + + path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_notrace"); + if (!path) + die("malloc"); + + list_functions(path, "Function No Trace"); + + tracefs_put_tracing_file(path); +} + +static void report_buffers(struct buffer_instance *instance) +{ +#define FILE_SIZE 100 + char *str; + char *cont; + char file[FILE_SIZE]; + int cpu; + + str = get_instance_file_content(instance, "buffer_size_kb"); + if (!str) + return; + + cont = strstrip(str); + + /* If it's not expanded yet, just skip */ + if (strstr(cont, "expanded") != NULL) + goto out; + + if (strcmp(cont, "X") != 0) { + printf("\nBuffer size in kilobytes (per cpu):\n"); + printf(" %s\n", str); + goto total; + } + + /* Read the sizes of each CPU buffer */ + for (cpu = 0; ; cpu++) { + + snprintf(file, FILE_SIZE, "per_cpu/cpu%d/buffer_size_kb", cpu); + str = get_instance_file_content(instance, file); + if (!str) + break; + + cont = strstrip(str); + if (!cpu) + putchar('\n'); + + printf("CPU %d buffer size (kb): %s\n", cpu, cont); + free(str); + } + + total: + free(str); + + str = get_instance_file_content(instance, "buffer_total_size_kb"); + if (!str) + return; + + cont = strstrip(str); + printf("\nBuffer total size in kilobytes:\n"); + printf(" %s\n", str); + + out: + free(str); +} + +static void report_clock(struct buffer_instance *instance) +{ + struct tracefs_instance *tracefs = instance ? instance->tracefs : NULL; + char *clock; + + clock = tracefs_get_clock(tracefs); + + /* Default clock is "local", only show others */ + if (clock && strcmp(clock, "local") != 0) + printf("\nClock: %s\n", clock); + + free(clock); +} + +static void report_cpumask(struct buffer_instance *instance) +{ + char *str; + char *cont; + int cpus; + int n; + int i; + + str = get_instance_file_content(instance, "tracing_cpumask"); + if (!str) + return; + + cont = strstrip(str); + + /* check to make sure all CPUs on this machine are set */ + cpus = tracecmd_count_cpus(); + + for (i = strlen(cont) - 1; i >= 0 && cpus > 0; i--) { + if (cont[i] == ',') + continue; + + if (cont[i] == 'f') { + cpus -= 4; + continue; + } + + if (cpus >= 4) + break; + + if (cont[i] >= '0' && cont[i] <= '9') + n = cont[i] - '0'; + else + n = 10 + (cont[i] - 'a'); + + while (cpus > 0) { + if (!(n & 1)) + break; + n >>= 1; + cpus--; + } + break; + } + + /* If cpus is greater than zero, one isn't set */ + if (cpus > 0) + printf("\nCPU mask: %s\n", cont); + + free(str); +} + +static void report_probes(struct buffer_instance *instance, + const char *file, const char *string) +{ + char *str; + char *cont; + int newline; + int i; + + str = get_instance_file_content(instance, file); + if (!str) + return; + + cont = strstrip(str); + if (strlen(cont) == 0) + goto out; + + printf("\n%s:\n", string); + + newline = 1; + for (i = 0; cont[i]; i++) { + if (newline) + printf(" "); + putchar(cont[i]); + if (cont[i] == '\n') + newline = 1; + else + newline = 0; + } + putchar('\n'); + out: + free(str); +} + +static void report_kprobes(struct buffer_instance *instance) +{ + report_probes(instance, "kprobe_events", "Kprobe events"); +} + +static void report_uprobes(struct buffer_instance *instance) +{ + report_probes(instance, "uprobe_events", "Uprobe events"); +} + +static void report_traceon(struct buffer_instance *instance) +{ + char *str; + char *cont; + + str = get_instance_file_content(instance, "tracing_on"); + if (!str) + return; + + cont = strstrip(str); + + /* double newline as this is the last thing printed */ + if (strcmp(cont, "0") == 0) + printf("\nTracing is disabled\n\n"); + else + printf("\nTracing is enabled\n\n"); + + free(str); +} + +static void stat_instance(struct buffer_instance *instance, bool opt) +{ + if (instance != &top_instance) { + if (instance != first_instance) + printf("---------------\n"); + printf("Instance: %s\n", + tracefs_instance_get_name(instance->tracefs)); + } + + report_file(instance, "current_tracer", "nop", "Tracer: "); + report_events(instance); + report_event_filters(instance); + report_event_triggers(instance); + report_ftrace_filters(instance); + report_graph_funcs(instance); + report_buffers(instance); + report_clock(instance); + report_cpumask(instance); + report_file(instance, "tracing_max_latency", "0", "Max Latency: "); + report_kprobes(instance); + report_uprobes(instance); + report_file(instance, "set_event_pid", "", "Filtered event PIDs:\n"); + report_file(instance, "set_ftrace_pid", "no pid", + "Filtered function tracer PIDs:\n"); + if (opt) { + printf("\nOptions:\n"); + show_options(" ", instance); + } + report_traceon(instance); + report_file(instance, "error_log", "", "Error log:\n"); + if (instance == &top_instance) + report_instances(); +} + +void trace_stat (int argc, char **argv) +{ + struct buffer_instance *instance = &top_instance; + bool opt = false; + int topt = 0; + int status; + int c; + + init_top_instance(); + + for (;;) { + c = getopt(argc-1, argv+1, "htoB:"); + if (c == -1) + break; + switch (c) { + case 'h': + usage(argv); + break; + case 'B': + instance = allocate_instance(optarg); + if (!instance) + die("Failed to create instance"); + add_instance(instance, tracecmd_count_cpus()); + /* top instance requires direct access */ + if (!topt && is_top_instance(first_instance)) + first_instance = instance; + break; + case 't': + /* Force to use top instance */ + topt = 1; + instance = &top_instance; + break; + case 'o': + opt = 1; + break; + default: + usage(argv); + } + } + + update_first_instance(instance, topt); + + for_all_instances(instance) { + stat_instance(instance, opt); + } + + if (tracecmd_stack_tracer_status(&status) >= 0) { + if (status > 0) + printf("Stack tracing is enabled\n\n"); + } else { + printf("Error reading stack tracer status\n\n"); + } + + exit(0); +} diff --git a/tracecmd/trace-stream.c b/tracecmd/trace-stream.c new file mode 100644 index 00000000..ee310f3d --- /dev/null +++ b/tracecmd/trace-stream.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> + +#include <sys/time.h> +#include <sys/types.h> + +#include "trace-local.h" + +/* + * Stream runs for a single machine. We are going to cheat + * and use the trace-output and trace-input code to create + * our pevent. First just create a trace.dat file and then read + * it to create the pevent and handle. + */ +struct tracecmd_input * +trace_stream_init(struct buffer_instance *instance, int cpu, int fd, int cpus, + struct hook_list *hooks, + tracecmd_handle_init_func handle_init, int global) +{ + struct tracecmd_input *trace_input; + struct tracecmd_output *trace_output; + static FILE *fp = NULL; + static int tfd; + static int ofd; + long flags; + + if (instance->handle) { + trace_input = instance->handle; + goto make_pipe; + } + + if (!fp) { + fp = tmpfile(); + if (!fp) + return NULL; + tfd = fileno(fp); + + ofd = dup(tfd); + trace_output = tracecmd_output_create_fd(ofd); + if (!trace_output) { + fclose(fp); + return NULL; + } + tracecmd_output_write_headers(trace_output, NULL); + tracecmd_output_free(trace_output); + } + + lseek(ofd, 0, SEEK_SET); + + trace_input = tracecmd_alloc_fd(ofd, 0); + if (!trace_input) { + close(ofd); + goto fail; + } + + if (tracecmd_read_headers(trace_input, TRACECMD_FILE_PRINTK) < 0) + goto fail_free_input; + + if (handle_init) + handle_init(trace_input, hooks, global); + + make_pipe: + /* Do not block on this pipe */ + flags = fcntl(fd, F_GETFL); + fcntl(fd, F_SETFL, flags | O_NONBLOCK); + + if (tracecmd_make_pipe(trace_input, cpu, fd, cpus) < 0) + goto fail_free_input; + + instance->handle = trace_input; + + return trace_input; + + fail_free_input: + tracecmd_close(trace_input); + fail: + fclose(fp); + + return NULL; +} + +int trace_stream_read(struct pid_record_data *pids, int nr_pids, struct timeval *tv) +{ + struct tep_record *record; + struct pid_record_data *pid; + struct pid_record_data *last_pid; + fd_set rfds; + int top_rfd = 0; + int nr_fd; + int ret; + int i; + + last_pid = NULL; + + again: + for (i = 0; i < nr_pids; i++) { + pid = &pids[i]; + + if (!pid->record) + pid->record = tracecmd_read_data(pid->instance->handle, pid->cpu); + record = pid->record; + if (!record && errno == EINVAL) + /* pipe has closed */ + pid->closed = 1; + + if (record && + (!last_pid || record->ts < last_pid->record->ts)) + last_pid = pid; + } + if (last_pid) { + trace_show_data(last_pid->instance->handle, last_pid->record); + tracecmd_free_record(last_pid->record); + last_pid->record = NULL; + return 1; + } + + nr_fd = 0; + FD_ZERO(&rfds); + + for (i = 0; i < nr_pids; i++) { + /* Do not process closed pipes */ + if (pids[i].closed) + continue; + nr_fd++; + if (pids[i].brass[0] > top_rfd) + top_rfd = pids[i].brass[0]; + + FD_SET(pids[i].brass[0], &rfds); + } + + if (!nr_fd) + return 0; + + ret = select(top_rfd + 1, &rfds, NULL, NULL, tv); + + if (ret > 0) + goto again; + + return ret; +} diff --git a/tracecmd/trace-usage.c b/tracecmd/trace-usage.c new file mode 100644 index 00000000..2cfa64f5 --- /dev/null +++ b/tracecmd/trace-usage.c @@ -0,0 +1,492 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <libgen.h> + +#include "trace-local.h" +#include "version.h" + +struct usage_help { + char *name; + char *short_help; + char *long_help; +}; + +static struct usage_help usage_help[] = { + { + "record", + "record a trace into a trace.dat file", + " %s record [-v][-e event [-f filter]][-p plugin][-F][-d][-D][-o file] \\\n" + " [-q][-s usecs][-O option ][-l func][-g func][-n func] \\\n" + " [-P pid][-N host:port][-t][-r prio][-b size][-B buf][command ...]\n" + " [-m max][-C clock]\n" + " -e run command with event enabled\n" + " -f filter for previous -e event\n" + " -R trigger for previous -e event\n" + " -p run command with plugin enabled\n" + " -F filter only on the given process\n" + " -P trace the given pid like -F for the command\n" + " -c also trace the children of -F (or -P if kernel supports it)\n" + " -C set the trace clock\n" + " -T do a stacktrace on all events\n" + " -l filter function name\n" + " -g set graph function\n" + " -n do not trace function\n" + " -m max size per CPU in kilobytes\n" + " -M set CPU mask to trace\n" + " -v will negate all -e (disable those events) and -B (delete those instances) after it\n" + " -d disable function tracer when running\n" + " -D Full disable of function tracing (for all users)\n" + " -o data output file [default trace.dat]\n" + " -O option to enable (or disable)\n" + " -r real time priority to run the capture threads\n" + " -s sleep interval between recording (in usecs) [default: 1000]\n" + " -S used with --profile, to enable only events in command line\n" + " -N host:port to connect to (see listen)\n" + " -V cid:port to connect to via vsocket (see listen)\n" + " -t used with -N, forces use of tcp in live trace\n" + " -b change kernel buffersize (in kilobytes per CPU)\n" + " -B create sub buffer and following events will be enabled here\n" + " -k do not reset the buffers after tracing.\n" + " -i do not fail if an event is not found\n" + " -q print no output to the screen\n" + " -G when profiling, set soft and hard irqs as global\n" + " --quiet print no output to the screen\n" + " --module filter module name\n" + " --by-comm used with --profile, merge events for related comms\n" + " --profile enable tracing options needed for report --profile\n" + " --func-stack perform a stack trace for function tracer\n" + " (use with caution)\n" + " --max-graph-depth limit function_graph depth\n" + " --cmdlines-size change kernel saved_cmdlines_size\n" + " --no-filter include trace-cmd threads in the trace\n" + " --proc-map save the traced processes address map into the trace.dat file\n" + " --user execute the specified [command ...] as given user\n" + " --tsc2nsec Convert the current clock to nanoseconds, using tsc multiplier and shift from the Linux" + " kernel's perf interface\n" + " --tsync-interval set the loop interval, in ms, for timestamps synchronization with guests:" + " If a negative number is specified, timestamps synchronization is disabled" + " If 0 is specified, no loop is performed - timestamps offset is calculated only twice," + " at the beginnig and at the end of the trace\n" + " --poll don't block while reading from the trace buffer\n" + " --name used with -A to give the agent a specific name\n" + " --file-version set the desired trace file version\n" + " --compression compress the trace output file, one of these strings can be passed:\n" + " any - auto select the best available compression algorithm\n" + " none - do not compress the trace file\n" + " name - the name of the desired compression algorithms\n" + " available algorithms can be listed with trace-cmd list -c\n" + }, + { + "set", + "set a ftrace configuration parameter", + " %s set [-v][-e event [-f filter]][-p plugin][-F][-d][-D] \\\n" + " [-q][-s usecs][-O option ][-l func][-g func][-n func] \\\n" + " [-P pid][-b size][-B buf][-m max][-C clock][command ...]\n" + " -e enable event\n" + " -f filter for previous -e event\n" + " -R trigger for previous -e event\n" + " -p set ftrace plugin\n" + " -P set PIDs to be traced\n" + " -c also trace the children of -F (or -P if kernel supports it)\n" + " -C set the trace clock\n" + " -T do a stacktrace on all events\n" + " -l filter function name\n" + " -g set graph function\n" + " -n do not trace function\n" + " -m max size per CPU in kilobytes\n" + " -M set CPU mask to trace\n" + " -v will negate all -e (disable those events) and -B (delete those instances) after it\n" + " -d disable function tracer when running\n" + " -D Full disable of function tracing (for all users)\n" + " -O option to enable (or disable)\n" + " -b change kernel buffersize (in kilobytes per CPU)\n" + " -B create sub buffer and following events will be enabled here\n" + " -i do not fail if an event is not found\n" + " -q print no output to the screen\n" + " --quiet print no output to the screen\n" + " --module filter module name\n" + " --func-stack perform a stack trace for function tracer\n" + " (use with caution)\n" + " --max-graph-depth limit function_graph depth\n" + " --cmdlines-size change kernel saved_cmdlines_size\n" + " --user execute the specified [command ...] as given user\n" + " --fork return immediately if a command is specified\n" + " --verbose 'level' Set the desired log level\n" + }, + { + "start", + "start tracing without recording into a file", + " %s start [-e event][-p plugin][-d][-O option ][-P pid]\n" + " Uses same options as record.\n" + " It only enables the tracing and exits\n" + "\n" + " --fork: If a command is specified, then return right after it forks\n" + " --verbose 'level' Set the desired log level\n" + }, + { + "extract", + "extract a trace from the kernel", + " %s extract [-p plugin][-O option][-o file][-B buf][-s][-a][-t]\n" + " Uses similar options as record, but only reads an existing trace.\n" + " -s : extract the snapshot instead of the main buffer\n" + " -B : extract a given buffer (more than one may be specified)\n" + " -a : extract all buffers (except top one)\n" + " -t : extract the top level buffer (useful with -B and -a)\n" + " --verbose 'level' Set the desired log level\n" + }, + { + "stop", + "stop the kernel from recording trace data", + " %s stop [-B buf [-B buf]..] [-a] [-t]\n" + " Stops the tracer from recording more data.\n" + " Used in conjunction with start\n" + " -B stop a given buffer (more than one may be specified)\n" + " -a stop all buffers (except top one)\n" + " -t stop the top level buffer (useful with -B or -a)\n" + }, + { + "restart", + "restart the kernel trace data recording", + " %s restart [-B buf [-B buf]..] [-a] [-t]\n" + " Restarts recording after a trace-cmd stop.\n" + " Used in conjunction with stop\n" + " -B restart a given buffer (more than one may be specified)\n" + " -a restart all buffers (except top one)\n" + " -t restart the top level buffer (useful with -B or -a)\n" + }, + { + "show", + "show the contents of the kernel tracing buffer", + " %s show [-p|-s][-c cpu][-B buf][options]\n" + " Basically, this is a cat of the trace file.\n" + " -p read the trace_pipe file instead\n" + " -s read the snapshot file instance\n" + " (Can't have both -p and -s)\n" + " -c just show the file associated with a given CPU\n" + " -B read from a tracing buffer instance.\n" + " -f display the file path that is being dumped\n" + " The following options shows the corresponding file name\n" + " and then exits.\n" + " --tracing_on\n" + " --current_tracer\n" + " --buffer_size (for buffer_size_kb)\n" + " --buffer_total_size (for buffer_total_size_kb)\n" + " --ftrace_filter (for set_ftrace_filter)\n" + " --ftrace_notrace (for set_ftrace_notrace)\n" + " --ftrace_pid (for set_ftrace_pid)\n" + " --graph_function (for set_graph_function)\n" + " --graph_notrace (for set_graph_notrace)\n" + " --cpumask (for tracing_cpumask)\n" + }, + { + "reset", + "disable all kernel tracing and clear the trace buffers", + " %s reset [-b size][-B buf][-a][-d][-t]\n" + " Disables the tracer (may reset trace file)\n" + " Used in conjunction with start\n" + " -b change the kernel buffer size (in kilobytes per CPU)\n" + " -d delete the previous specified instance\n" + " -B reset the given buffer instance (may specify multiple -B)\n" + " -a reset all instances (except top one)\n" + " -t reset the top level instance (useful with -B or -a)\n" + }, + { + "clear", + "clear the trace buffers", + " %s clear [-B buf][-a]\n" + " -B clear the given buffer (may specify multiple -B)\n" + " -a clear all existing buffers, including the top level one\n" + }, + { + "report", + "read out the trace stored in a trace.dat file", + " %s report [-i file] [--cpu cpu] [-e][-f][-l][-P][-L][-N][-R][-E]\\\n" + " [-r events][-n events][-F filter][-v][-V[1-6]][-T][-O option]\n" + " [-H [start_system:]start_event,start_match[,pid]/[end_system:]end_event,end_match[,flags]\n" + " [-G]\n" + " -i input file [default trace.dat]\n" + " -e show file endianess\n" + " -f show function mapping list\n" + " -P show printk list\n" + " -E show event files stored\n" + " -F filter to filter output on\n" + " -I filter out events with the HARDIRQ flag set\n" + " -S filter out events with the SOFTIRQ flag set\n" + " -t print out full timestamp. Do not truncate to 6 places.\n" + " -R raw format: ignore print format and only show field data\n" + " -r raw format the events that match the option\n" + " -v will negate all -F after it (Not show matches)\n" + " -T print out the filter strings created and exit\n" + " -V[level] verbose (shows plugins being loaded)\n" + " With optional level (see --verbose numbers)\n" + " -L load only local (~/.trace-cmd/plugins) plugins\n" + " -N do not load any plugins\n" + " -n ignore plugin handlers for events that match the option\n" + " -w show wakeup latencies\n" + " -l show latency format (default with latency tracers)\n" + " -O plugin option -O [plugin:]var[=val]\n" + " --cpu <cpu1,cpu2,...> - filter events according to the given cpu list.\n" + " A range of CPUs can be specified using 'cpuX-cpuY' notation.\n" + " --cpus - List the CPUs that have content in it then exit.\n" + " --check-events return whether all event formats can be parsed\n" + " --stat - show the buffer stats that were reported at the end of the record.\n" + " --uname - show uname of the record, if it was saved\n" + " --version - show version used to build the trace-cmd exec that created the file\n" + " --profile report stats on where tasks are blocked and such\n" + " -G when profiling, set soft and hard irqs as global\n" + " -H Allows users to hook two events together for timings\n" + " (used with --profile)\n" + " --by-comm used with --profile, merge events for related comms\n" + " --ts-offset will add amount to timestamp of all events of the\n" + " previous data file.\n" + " --ts2secs HZ, pass in the timestamp frequency (per second)\n" + " to convert the displayed timestamps to seconds\n" + " Affects the previous data file, unless there was no\n" + " previous data file, in which case it becomes default\n" + " --ts-diff Show the delta timestamp between events.\n" + " --ts-check Check to make sure no time stamp on any CPU goes backwards.\n" + " --nodate Ignore the --date processing of trace-cmd record.\n" + " --raw-ts Display raw timestamps, without any corrections.\n" + " --align-ts Display timestamps aligned to the first event.\n" + " --verbose[=level] Set the desired log level\n" + " 0 or none - no error messages\n" + " 1 or crit - only critical messages\n" + " 2 or err - 'crit' and error messages\n" + " 3 or warn - 'err' and warning messages\n" + " 4 or info - 'warn' and informational messages\n" + " 5 or debug - 'info' and debugging messages\n" + " 6 or all - same as debug\n" + }, + { + "stream", + "Start tracing and read the output directly", + " %s stream [-e event][-p plugin][-d][-O option ][-P pid]\n" + " Uses same options as record but does not write to files or the network.\n" + " --verbose 'level' Set the desired log level\n" + }, + { + "profile", + "Start profiling and read the output directly", + " %s profile [-e event][-p plugin][-d][-O option ][-P pid][-G][-S][-o output]\n" + " [-H [start_system:]start_event,start_match[,pid]/[end_system:]end_event,end_match[,flags]\n\n" + " Uses same options as record --profile.\n" + " -H Allows users to hook two events together for timings\n" + " --verbose 'level' Set the desired log level\n" + }, + { + "hist", + "show a histogram of the trace.dat information", + " %s hist [-i file][-P] [file]" + " -P ignore pids (compact all functions)\n" + }, + { + "stat", + "show the status of the running tracing (ftrace) system", + " %s stat [-B buf][-t][-o]" + " -B show the status of a instance buffer\n" + " -t show the top level status along with buffer specified by -B\n" + " -o list tracing options\n" + }, + { + "split", + "parse a trace.dat file into smaller file(s)", + " %s split [options] -o file [start [end]]\n" + " -o output file to write to (file.1, file.2, etc)\n" + " -s n split file up by n seconds\n" + " -m n split file up by n milliseconds\n" + " -u n split file up by n microseconds\n" + " -e n split file up by n events\n" + " -p n split file up by n pages\n" + " -r repeat from start to end\n" + " -c per cpu, that is -p 2 will be 2 pages for each CPU\n" + " if option is specified, it will split the file\n" + " up starting at start, and ending at end\n" + " start - decimal start time in seconds (ex: 75678.923853)\n" + " if left out, will start at beginning of file\n" + " end - decimal end time in seconds\n" + }, + { + "options", + "list the plugin options available for trace-cmd report", + " %s options\n" + }, + { + "listen", + "listen on a network socket for trace clients", + " %s listen -p port[-D][-o file][-d dir][-l logfile]\n" + " Creates a socket to listen for clients.\n" + " -p port number to listen on.\n" + " -D run in daemon mode.\n" + " -V listen on a vsocket instead.\n" + " -o file name to use for clients.\n" + " -d directory to store client files.\n" + " -l logfile to write messages to.\n" + " --verbose 'level' Set the desired log level\n" + }, + { + "agent", + "listen on a vsocket for trace clients", + " %s agent -p port[-D]\n" + " Creates a vsocket to listen for clients.\n" + " -N Connect to IP via TCP instead of vsockets\n" + " *** Insecure setting, only use on a trusted network ***\n" + " *** Only use if the client is totally trusted. ***\n" + " -p port number to listen on.\n" + " -D run in daemon mode.\n" + " --verbose 'level' Set the desired log level\n" + }, + { + "setup-guest", + "create FIFOs for tracing guest VMs", + " %s setup-guest [-c cpus][-p perm][-g group][-a] guest\n" + " -c number of guest virtual CPUs\n" + " -p FIFOs permissions (default: 0660)\n" + " -g FIFOs group owner\n" + " -a Attach FIFOs to guest VM config\n" + }, + { + "list", + "list the available events, plugins or options", + " %s list [-e [regex]][-t][-o][-f [regex]]\n" + " -e list available events\n" + " -F show event format\n" + " --full show the print fmt with -F\n" + " -R show event triggers\n" + " -l show event filters\n" + " -t list available tracers\n" + " -o list available options\n" + " -f [regex] list available functions to filter on\n" + " -P list loaded plugin files (by path)\n" + " -O list plugin options\n" + " -B list defined buffer instances\n" + " -C list the defined clocks (and active one)\n" + " -c list the supported trace file compression algorithms\n" + }, + { + "restore", + "restore a crashed record", + " %s restore [-c][-o file][-i file] cpu-file [cpu-file ...]\n" + " -c create a partial trace.dat file only\n" + " -o output file\n" + " -i partial trace.dat file for input\n" + }, + { + "snapshot", + "take snapshot of running trace", + " %s snapshot [-s][-r][-f][-B buf][-c cpu]\n" + " -s take a snapshot of the trace buffer\n" + " -r reset current snapshot\n" + " -f free the snapshot buffer\n" + " without the above three options, display snapshot\n" + " -c operate on the snapshot buffer for the given CPU\n" + " -B operate on the snapshot buffer for a tracing buffer instance.\n" + }, + { + "stack", + "output, enable or disable kernel stack tracing", + " %s stack [--start][--stop][--reset]\n" + " --start enable the stack tracer\n" + " --stop disable the stack tracer\n" + " --reset reset the maximum stack found\n" + " --verbose 'level' Set the desired log level\n" + }, + { + "check-events", + "parse trace event formats", + " %s check-events [-N]\n" + " -N do not load any plugins\n" + " --verbose 'level' Set the desired log level\n" + }, + { + "dump", + "read out the meta data from a trace file", + " %s dump [options]\n" + " -i input file, default is trace.dat\n" + " -v validate a trace file\n" + " --all print all meta data from a trace file\n" + " --summary print a meta data summary\n" + " --head-page print header page information\n" + " --head-event print header event information\n" + " --ftrace-events print ftrace events format\n" + " --systems print recorded event systems\n" + " --events print format of recorded events\n" + " --kallsyms print information of the mapping of function addresses to the function names\n" + " --printk print trace_printk() format strings\n" + " --cmd-lines print information mapping a PID to a process name\n" + " --options print options\n" + " --flyrecord information of offset and count of recorded events per CPU\n" + " --clock trace clock, saved in the file\n" + " -h, --help show usage information\n" + " --verbose 'level' Set the desired log level\n" + }, + { + "convert", + "convert trace file to different version", + " %s convert [options]\n" + " -i input file, default is trace.dat\n" + " -o output file, mandatory parameter.\n" + " The output file can be specified also as last argument of the command\n" + " --file-version set the desired trace file version\n" + " --compression compress the trace output file, one of these strings can be passed:\n" + " any - auto select the best available compression algorithm\n" + " none - do not compress the trace file\n" + " name - the name of the desired compression algorithms\n" + " available algorithms can be listed with trace-cmd list -c\n" }, + { + NULL, NULL, NULL + } +}; + +static struct usage_help *find_help(char *cmd) +{ + struct usage_help *help; + + help = usage_help; + while (help->name) { + if (strcmp(cmd, help->name) == 0) + return help; + help++; + } + return NULL; +} + +void usage(char **argv) +{ + struct usage_help *help = NULL; + char *arg = argv[0]; + char *p; + + p = basename(arg); + + printf("\n" + "%s version %s (%s)\n\n" + "usage:\n", p, VERSION_STRING, VERSION_GIT); + + if (argv[1]) + help = find_help(argv[1]); + + if (help) { + printf(help->long_help, p); + goto out; + } + + printf(" %s [COMMAND] ...\n\n" + " commands:\n", p); + + help = usage_help; + while (help->name) { + printf(" %s - %s\n", help->name, help->short_help); + help++; + } + out: + printf("\n"); + exit(-1); +} + + +void trace_usage(int argc, char **argv) +{ + usage(argv); +} diff --git a/tracecmd/trace-vm.c b/tracecmd/trace-vm.c new file mode 100644 index 00000000..57dbef8d --- /dev/null +++ b/tracecmd/trace-vm.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <dirent.h> +#include <limits.h> +#include <unistd.h> +#include <errno.h> + +#include "trace-local.h" +#include "trace-msg.h" + +static struct trace_guest *guests; +static size_t guests_len; + +static struct trace_guest *get_guest_by_cid(unsigned int guest_cid) +{ + int i; + + if (!guests) + return NULL; + + for (i = 0; i < guests_len; i++) + if (guest_cid == guests[i].cid) + return guests + i; + return NULL; +} + +static struct trace_guest *get_guest_by_name(const char *name) +{ + int i; + + if (!guests) + return NULL; + + for (i = 0; i < guests_len; i++) + if (strcmp(name, guests[i].name) == 0) + return guests + i; + return NULL; +} + +bool trace_have_guests_pid(void) +{ + for (int i = 0; i < guests_len; i++) { + if (guests[i].pid < 0) + return false; + } + + return true; +} + +static struct trace_guest *add_guest(unsigned int cid, const char *name) +{ + guests = realloc(guests, (guests_len + 1) * sizeof(*guests)); + if (!guests) + die("allocating new guest"); + memset(&guests[guests_len], 0, sizeof(struct trace_guest)); + guests[guests_len].name = strdup(name); + if (!guests[guests_len].name) + die("allocating guest name"); + guests[guests_len].cid = cid; + guests[guests_len].pid = -1; + guests_len++; + + return &guests[guests_len - 1]; +} + +static struct tracefs_instance *start_trace_connect(void) +{ + struct tracefs_instance *open_instance; + + open_instance = tracefs_instance_create("vsock_find_pid"); + if (!open_instance) + return NULL; + + tracefs_event_enable(open_instance, "sched", "sched_waking"); + tracefs_event_enable(open_instance, "kvm", "kvm_exit"); + tracefs_trace_on(open_instance); + return open_instance; +} + +struct pids { + struct pids *next; + int pid; +}; + +struct trace_fields { + struct tep_event *sched_waking; + struct tep_event *kvm_exit; + struct tep_format_field *common_pid; + struct tep_format_field *sched_next; + struct pids *pids; + int found_pid; +}; + +static void free_pids(struct pids *pids) +{ + struct pids *next; + + while (pids) { + next = pids; + pids = pids->next; + free(next); + } +} + +static void add_pid(struct pids **pids, int pid) +{ + struct pids *new_pid; + + new_pid = malloc(sizeof(*new_pid)); + if (!new_pid) + return; + + new_pid->pid = pid; + new_pid->next = *pids; + *pids = new_pid; +} + +static bool match_pid(struct pids *pids, int pid) +{ + while (pids) { + if (pids->pid == pid) + return true; + pids = pids->next; + } + return false; +} + +static int callback(struct tep_event *event, struct tep_record *record, int cpu, + void *data) +{ + struct trace_fields *fields = data; + struct tep_handle *tep = event->tep; + unsigned long long val; + int flags; + int type; + int pid; + int ret; + + ret = tep_read_number_field(fields->common_pid, record->data, &val); + if (ret < 0) + return 0; + + flags = tep_data_flags(tep, record); + + /* Ignore events in interrupts */ + if (flags & (TRACE_FLAG_HARDIRQ | TRACE_FLAG_SOFTIRQ)) + return 0; + + /* + * First make sure that this event comes from a PID from + * this task (or a task woken by this task) + */ + pid = val; + if (!match_pid(fields->pids, pid)) + return 0; + + type = tep_data_type(tep, record); + + /* + * If this event is a kvm_exit, we have our PID + * and we can stop processing. + */ + if (type == fields->kvm_exit->id) { + fields->found_pid = pid; + return -1; + } + + if (type != fields->sched_waking->id) + return 0; + + ret = tep_read_number_field(fields->sched_next, record->data, &val); + if (ret < 0) + return 0; + + /* This is a task woken by our task or a chain of wake ups */ + add_pid(&fields->pids, (int)val); + return 0; +} + +static int find_tgid(int pid) +{ + FILE *fp; + char *path; + char *buf = NULL; + char *save; + size_t l = 0; + int tgid = -1; + + if (asprintf(&path, "/proc/%d/status", pid) < 0) + return -1; + + fp = fopen(path, "r"); + free(path); + if (!fp) + return -1; + + while (getline(&buf, &l, fp) > 0) { + char *tok; + + if (strncmp(buf, "Tgid:", 5) != 0) + continue; + tok = strtok_r(buf, ":", &save); + if (!tok) + continue; + tok = strtok_r(NULL, ":", &save); + if (!tok) + continue; + while (isspace(*tok)) + tok++; + tgid = strtol(tok, NULL, 0); + break; + } + free(buf); + fclose(fp); + + return tgid; +} + +static int stop_trace_connect(struct tracefs_instance *open_instance) +{ + const char *systems[] = { "kvm", "sched", NULL}; + struct tep_handle *tep; + struct trace_fields trace_fields; + int tgid = -1; + + if (!open_instance) + return -1; + + /* The connection is finished, stop tracing, we have what we want */ + tracefs_trace_off(open_instance); + tracefs_event_disable(open_instance, NULL, NULL); + + tep = tracefs_local_events_system(NULL, systems); + + trace_fields.sched_waking = tep_find_event_by_name(tep, "sched", "sched_waking"); + if (!trace_fields.sched_waking) + goto out; + trace_fields.kvm_exit = tep_find_event_by_name(tep, "kvm", "kvm_exit"); + if (!trace_fields.kvm_exit) + goto out; + trace_fields.common_pid = tep_find_common_field(trace_fields.sched_waking, + "common_pid"); + if (!trace_fields.common_pid) + goto out; + trace_fields.sched_next = tep_find_any_field(trace_fields.sched_waking, + "pid"); + if (!trace_fields.sched_next) + goto out; + + trace_fields.found_pid = -1; + trace_fields.pids = NULL; + add_pid(&trace_fields.pids, getpid()); + tracefs_iterate_raw_events(tep, open_instance, NULL, 0, callback, &trace_fields); + free_pids(trace_fields.pids); + out: + tracefs_instance_destroy(open_instance); + tracefs_instance_free(open_instance); + + if (trace_fields.found_pid > 0) + tgid = find_tgid(trace_fields.found_pid); + + return tgid; +} + +/* + * In order to find the guest that is associated to the given cid, + * trace the sched_waking and kvm_exit events, connect to the cid + * (doesn't matter what port, use -1 to not connect to anything) + * and find what task gets woken up from this code and calls kvm_exit, + * then that is the task that is running the guest. + * Then look at the /proc/<guest-pid>/status file to find the task group + * id (Tgid), and this is the PID of the task running all the threads. + */ +static void find_pid_by_cid(struct trace_guest *guest) +{ + struct tracefs_instance *instance; + int fd; + + instance = start_trace_connect(); + fd = trace_vsock_open(guest->cid, -1); + guest->pid = stop_trace_connect(instance); + /* Just in case! */ + if (fd >= 0) + close(fd); +} + +struct trace_guest *trace_get_guest(unsigned int cid, const char *name) +{ + struct trace_guest *guest = NULL; + + if (name) { + guest = get_guest_by_name(name); + if (guest) + return guest; + } + + if (cid > 0) { + guest = get_guest_by_cid(cid); + if (!guest && name) { + guest = add_guest(cid, name); + if (guest) + find_pid_by_cid(guest); + } + } + return guest; +} + +#define VM_CID_CMD "virsh dumpxml" +#define VM_CID_LINE "<cid auto=" +#define VM_CID_ID "address='" +static void read_guest_cid(char *name) +{ + struct trace_guest *guest; + char *cmd = NULL; + char line[512]; + char *cid; + unsigned int cid_id = 0; + FILE *f; + + asprintf(&cmd, "%s %s", VM_CID_CMD, name); + f = popen(cmd, "r"); + free(cmd); + if (f == NULL) + return; + + while (fgets(line, sizeof(line), f) != NULL) { + if (!strstr(line, VM_CID_LINE)) + continue; + cid = strstr(line, VM_CID_ID); + if (!cid) + continue; + cid_id = strtol(cid + strlen(VM_CID_ID), NULL, 10); + if ((cid_id == INT_MIN || cid_id == INT_MAX) && errno == ERANGE) + continue; + guest = add_guest(cid_id, name); + if (guest) + find_pid_by_cid(guest); + break; + } + + /* close */ + pclose(f); +} + +#define VM_NAME_CMD "virsh list --name" +void read_qemu_guests(void) +{ + char name[256]; + FILE *f; + + f = popen(VM_NAME_CMD, "r"); + if (f == NULL) + return; + + while (fgets(name, sizeof(name), f) != NULL) { + if (name[0] == '\n') + continue; + if (name[strlen(name) - 1] == '\n') + name[strlen(name) - 1] = '\0'; + read_guest_cid(name); + } + + /* close */ + pclose(f); +} + +int get_guest_vcpu_pid(unsigned int guest_cid, unsigned int guest_vcpu) +{ + int i; + + if (!guests) + return -1; + + for (i = 0; i < guests_len; i++) { + if (guests[i].cpu_pid < 0 || guest_vcpu >= guests[i].cpu_max) + continue; + if (guest_cid == guests[i].cid) + return guests[i].cpu_pid[guest_vcpu]; + } + return -1; +} diff --git a/tracecmd/trace-vsock.c b/tracecmd/trace-vsock.c new file mode 100644 index 00000000..39294e7a --- /dev/null +++ b/tracecmd/trace-vsock.c @@ -0,0 +1,176 @@ +#include <unistd.h> +#include <errno.h> +#include <arpa/inet.h> +#include <sys/ioctl.h> +#include <linux/vm_sockets.h> + +#include "trace-cmd-private.h" + +int __hidden trace_vsock_open(unsigned int cid, unsigned int port) +{ + struct sockaddr_vm addr = { + .svm_family = AF_VSOCK, + .svm_cid = cid, + .svm_port = port, + }; + int sd; + + sd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (sd < 0) + return -errno; + + if (connect(sd, (struct sockaddr *)&addr, sizeof(addr))) + return -errno; + + return sd; +} + +int __hidden trace_vsock_make(unsigned int port) +{ + struct sockaddr_vm addr = { + .svm_family = AF_VSOCK, + .svm_cid = VMADDR_CID_ANY, + .svm_port = port, + }; + int sd; + + sd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (sd < 0) + return -errno; + + setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &(int){1}, sizeof(int)); + + if (bind(sd, (struct sockaddr *)&addr, sizeof(addr))) + return -errno; + + if (listen(sd, SOMAXCONN)) + return -errno; + + return sd; +} + +int __hidden trace_vsock_make_any(void) +{ + return trace_vsock_make(VMADDR_PORT_ANY); +} + +int __hidden trace_vsock_get_port(int sd, unsigned int *port) +{ + struct sockaddr_vm addr; + socklen_t addr_len = sizeof(addr); + + if (getsockname(sd, (struct sockaddr *)&addr, &addr_len)) + return -errno; + + if (addr.svm_family != AF_VSOCK) + return -EINVAL; + + if (port) + *port = addr.svm_port; + + return 0; +} + +int get_vsocket_params(int fd, unsigned int *lcid, unsigned int *rcid) +{ + struct sockaddr_vm addr; + socklen_t addr_len = sizeof(addr); + + memset(&addr, 0, sizeof(addr)); + if (getsockname(fd, (struct sockaddr *)&addr, &addr_len)) + return -1; + if (addr.svm_family != AF_VSOCK) + return -1; + *lcid = addr.svm_cid; + + memset(&addr, 0, sizeof(addr)); + addr_len = sizeof(addr); + if (getpeername(fd, (struct sockaddr *)&addr, &addr_len)) + return -1; + if (addr.svm_family != AF_VSOCK) + return -1; + *rcid = addr.svm_cid; + + return 0; +} + +int trace_vsock_print_connection(int fd) +{ + struct sockaddr_vm vm_addr; + socklen_t addr_len; + int cid, port; + + addr_len = sizeof(vm_addr); + if (getpeername(fd, (struct sockaddr *)&vm_addr, &addr_len)) + return -1; + if (vm_addr.svm_family != AF_VSOCK) + return -1; + cid = vm_addr.svm_cid; + port = vm_addr.svm_port; + if (tracecmd_get_debug()) + tracecmd_debug("Connected to @%u:%u fd:%d\n", cid, port, fd); + else + tracecmd_plog("Connected to @%u:%u\n", cid, port); + return 0; +} + +static int try_splice_read_vsock(void) +{ + int ret, sd, brass[2]; + + sd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (sd < 0) + return -errno; + + ret = pipe(brass); + if (ret < 0) + goto out_close_sd; + + /* + * On kernels that don't support splice reading from vsockets + * this will fail with EINVAL, or ENOTCONN otherwise. + * Technically, it should never succeed but if it does, claim splice + * reading is supported. + */ + ret = splice(sd, NULL, brass[1], NULL, 10, 0); + if (ret < 0) + ret = errno != EINVAL; + else + ret = 1; + + close(brass[0]); + close(brass[1]); +out_close_sd: + close(sd); + return ret; +} + +bool __hidden trace_vsock_can_splice_read(void) +{ + static bool initialized, res; + + if (initialized) + return res; + + res = try_splice_read_vsock() > 0; + initialized = true; + return res; +} + +#define GET_LOCAL_CID 0x7b9 + +int __hidden trace_vsock_local_cid(void) +{ + int cid; + int fd; + + fd = open("/dev/vsock", O_RDONLY); + if (fd < 0) + return -errno; + + if (ioctl(fd, GET_LOCAL_CID, &cid)) + cid = -errno; + + close(fd); + return cid; +} diff --git a/utest/Makefile b/utest/Makefile new file mode 100644 index 00000000..2cf99745 --- /dev/null +++ b/utest/Makefile @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: GPL-2.0 + +include $(src)/scripts/utils.mk + +bdir:=$(obj)/utest + +TARGETS = $(bdir)/trace-utest + +OBJS = +OBJS += trace-utest.o +OBJS += tracefs-utest.o + +LIBS += -lcunit $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) + +OBJS := $(OBJS:%.o=$(bdir)/%.o) +DEPS := $(OBJS:$(bdir)/%.o=$(bdir)/.%.d) + +$(bdir): + @mkdir -p $(bdir) + +$(OBJS): | $(bdir) +$(DEPS): | $(bdir) + +$(bdir)/trace-utest: $(OBJS) + $(Q)$(do_app_build) + +$(bdir)/%.o: %.c + $(Q)$(call do_fpic_compile) + +$(DEPS): $(bdir)/.%.d: %.c + $(Q)$(CC) -M $(CPPFLAGS) $(CFLAGS) $< > $@ + $(Q)$(CC) -M -MT $(bdir)/$*.o $(CPPFLAGS) $(CFLAGS) $< > $@ + +$(OBJS): $(bdir)/%.o : $(bdir)/.%.d + +dep_includes := $(wildcard $(DEPS)) + +test: $(TARGETS) + +clean: + $(RM) $(TARGETS) $(bdir)/*.o $(bdir)/.*.d diff --git a/utest/README b/utest/README new file mode 100644 index 00000000..f93630d0 --- /dev/null +++ b/utest/README @@ -0,0 +1,15 @@ + +Unit tests for trace-cmd libraries. The tests use CUnit framework: + http://cunit.sourceforge.net/ +which must be pre installed on the system, before building the unit tests. +The framework can be downloaded, compiled and installed manually, or +using a precompiled distro package: + + Fedora: + CUnit + CUnit-devel + + Ubuntu and Debian: + libcunit1 + libcunit1-doc + libcunit1-dev diff --git a/utest/trace-utest.c b/utest/trace-utest.c new file mode 100644 index 00000000..58d4d4e4 --- /dev/null +++ b/utest/trace-utest.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com> + * + */ +#include <stdio.h> +#include <unistd.h> +#include <getopt.h> +#include <stdlib.h> + +#include <CUnit/CUnit.h> +#include <CUnit/Basic.h> + +#include "trace-utest.h" + +enum unit_tests { + RUN_NONE = 0, + RUN_TRACEFS = (1 << 0), + RUN_ALL = 0xFFFF +}; + +static void print_help(char **argv) +{ + printf("Usage: %s [OPTIONS]\n", basename(argv[0])); + printf("\t-s, --silent\tPrint test summary\n"); + printf("\t-r, --run test\tRun specific test:\n"); + printf("\t\t tracefs run libtracefs tests\n"); + printf("\t-h, --help\tPrint usage information\n"); + exit(0); +} + +int main(int argc, char **argv) +{ + CU_BasicRunMode verbose = CU_BRM_VERBOSE; + enum unit_tests tests = RUN_NONE; + + for (;;) { + int c; + int index = 0; + const char *opts = "+hsr:"; + static struct option long_options[] = { + {"silent", no_argument, NULL, 's'}, + {"run", required_argument, NULL, 'r'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + + c = getopt_long (argc, argv, opts, long_options, &index); + if (c == -1) + break; + switch (c) { + case 'r': + if (strcmp(optarg, "tracefs") == 0) + tests |= RUN_TRACEFS; + else + print_help(argv); + break; + case 's': + verbose = CU_BRM_SILENT; + break; + case 'h': + default: + print_help(argv); + break; + } + } + + if (tests == RUN_NONE) + tests = RUN_ALL; + + if (CU_initialize_registry() != CUE_SUCCESS) { + printf("Test registry cannot be initialized\n"); + return -1; + } + + if (tests & RUN_TRACEFS) + test_tracefs_lib(); + + CU_basic_set_mode(verbose); + CU_basic_run_tests(); + CU_cleanup_registry(); + return 0; +} diff --git a/utest/trace-utest.h b/utest/trace-utest.h new file mode 100644 index 00000000..917c0e78 --- /dev/null +++ b/utest/trace-utest.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com> + * + */ +#ifndef _TRACE_UTEST_H_ +#define _TRACE_UTEST_H_ + +void test_tracefs_lib(void); + +#endif /* _TRACE_UTEST_H_ */ diff --git a/utest/tracefs-utest.c b/utest/tracefs-utest.c new file mode 100644 index 00000000..9c9eee06 --- /dev/null +++ b/utest/tracefs-utest.c @@ -0,0 +1,630 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <dirent.h> + +#include <CUnit/CUnit.h> +#include <CUnit/Basic.h> + +#include "tracefs.h" + +#define TRACEFS_SUITE "trasefs library" +#define TEST_INSTANCE_NAME "cunit_test_iter" +#define TEST_ARRAY_SIZE 500 + +static struct tracefs_instance *test_instance; +static struct tep_handle *test_tep; +struct test_sample { + int cpu; + int value; +}; +static struct test_sample test_array[TEST_ARRAY_SIZE]; +static int test_found; + +static int test_callback(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct tep_format_field *field; + struct test_sample *sample; + int *cpu_test = (int *)context; + int i; + + if (cpu_test && *cpu_test >= 0 && *cpu_test != cpu) + return 0; + field = tep_find_field(event, "buf"); + if (field) { + sample = ((struct test_sample *)(record->data + field->offset)); + for (i = 0; i < TEST_ARRAY_SIZE; i++) { + if (test_array[i].value == sample->value && + test_array[i].cpu == cpu) { + test_array[i].value = 0; + test_found++; + break; + } + } + } + + return 0; +} + +static void test_iter_write(void) +{ + int cpus = sysconf(_SC_NPROCESSORS_CONF); + cpu_set_t *cpuset, *cpusave; + int cpu_size; + char *path; + int i, fd; + int ret; + cpuset = CPU_ALLOC(cpus); + cpusave = CPU_ALLOC(cpus); + cpu_size = CPU_ALLOC_SIZE(cpus); + CPU_ZERO_S(cpu_size, cpuset); + + sched_getaffinity(0, cpu_size, cpusave); + + path = tracefs_instance_get_file(test_instance, "trace_marker"); + CU_TEST(path != NULL); + fd = open(path, O_WRONLY); + tracefs_put_tracing_file(path); + CU_TEST(fd >= 0); + + for (i = 0; i < TEST_ARRAY_SIZE; i++) { + test_array[i].cpu = rand() % cpus; + test_array[i].value = random(); + if (!test_array[i].value) + test_array[i].value++; + CU_TEST(test_array[i].cpu < cpus); + CPU_ZERO_S(cpu_size, cpuset); + CPU_SET(test_array[i].cpu, cpuset); + sched_setaffinity(0, cpu_size, cpuset); + ret = write(fd, test_array + i, sizeof(struct test_sample)); + CU_TEST(ret == sizeof(struct test_sample)); + } + + sched_setaffinity(0, cpu_size, cpusave); + close(fd); +} + + +static void iter_raw_events_on_cpu(int cpu) +{ + int check = 0; + int ret; + int i; + + test_found = 0; + test_iter_write(); + ret = tracefs_iterate_raw_events(test_tep, test_instance, NULL, 0, + test_callback, &cpu); + CU_TEST(ret == 0); + if (cpu < 0) { + CU_TEST(test_found == TEST_ARRAY_SIZE); + } else { + for (i = 0; i < TEST_ARRAY_SIZE; i++) { + if (test_array[i].cpu == cpu) { + check++; + CU_TEST(test_array[i].value == 0) + } else { + CU_TEST(test_array[i].value != 0) + } + } + CU_TEST(test_found == check); + } +} + +static void test_iter_raw_events(void) +{ + int cpus = sysconf(_SC_NPROCESSORS_CONF); + int ret; + int i; + + ret = tracefs_iterate_raw_events(NULL, test_instance, NULL, 0, test_callback, NULL); + CU_TEST(ret < 0); + ret = tracefs_iterate_raw_events(test_tep, NULL, NULL, 0, test_callback, NULL); + CU_TEST(ret == 0); + ret = tracefs_iterate_raw_events(test_tep, test_instance, NULL, 0, NULL, NULL); + CU_TEST(ret < 0); + + iter_raw_events_on_cpu(-1); + for (i = 0; i < cpus; i++) + iter_raw_events_on_cpu(i); +} + +#define RAND_STR_SIZE 20 +#define RAND_ASCII "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; +static const char *get_rand_str() +{ + static char str[RAND_STR_SIZE]; + static char sym[] = RAND_ASCII; + struct timespec clk; + int i; + + clock_gettime(CLOCK_REALTIME, &clk); + srand(clk.tv_nsec); + for (i = 0; i < RAND_STR_SIZE; i++) + str[i] = sym[rand() % (sizeof(sym) - 1)]; + + str[RAND_STR_SIZE - 1] = 0; + return str; +} + +static void test_trace_file(void) +{ + const char *tmp = get_rand_str(); + const char *tdir; + struct stat st; + char *file; + + tdir = tracefs_tracing_dir(); + CU_TEST(tdir != NULL); + CU_TEST(stat(tdir, &st) == 0); + CU_TEST(S_ISDIR(st.st_mode)); + + file = tracefs_get_tracing_file(NULL); + CU_TEST(file == NULL); + file = tracefs_get_tracing_file(tmp); + CU_TEST(file != NULL); + CU_TEST(stat(file, &st) != 0); + tracefs_put_tracing_file(file); + + file = tracefs_get_tracing_file("trace"); + CU_TEST(file != NULL); + CU_TEST(stat(file, &st) == 0); + tracefs_put_tracing_file(file); +} + +static void test_instance_file_read(struct tracefs_instance *inst, char *fname) +{ + const char *tdir = tracefs_tracing_dir(); + char buf[BUFSIZ]; + char *fpath; + char *file; + size_t fsize = 0; + int size = 0; + int fd; + + if (inst) { + CU_TEST(asprintf(&fpath, "%s/instances/%s/%s", + tdir, tracefs_instance_get_name(inst), fname) > 0); + } else { + CU_TEST(asprintf(&fpath, "%s/%s", tdir, fname) > 0); + } + + memset(buf, 0, BUFSIZ); + fd = open(fpath, O_RDONLY); + CU_TEST(fd >= 0); + fsize = read(fd, buf, BUFSIZ); + CU_TEST(fsize >= 0); + close(fd); + buf[BUFSIZ - 1] = 0; + + file = tracefs_instance_file_read(inst, fname, &size); + CU_TEST(file != NULL); + CU_TEST(size == fsize); + CU_TEST(strcmp(file, buf) == 0); + + free(fpath); + free(file); +} + +#define ALL_TRACERS "available_tracers" +#define CUR_TRACER "current_tracer" +#define PER_CPU "per_cpu" +static void test_instance_file(void) +{ + struct tracefs_instance *instance = NULL; + struct tracefs_instance *second = NULL; + const char *name = get_rand_str(); + const char *inst_name = NULL; + const char *tdir; + char *inst_file; + char *inst_dir; + struct stat st; + char *fname; + char *file1; + char *file2; + char *tracer; + int size; + int ret; + + tdir = tracefs_tracing_dir(); + CU_TEST(tdir != NULL); + CU_TEST(asprintf(&inst_dir, "%s/instances/%s", tdir, name) > 0); + CU_TEST(stat(inst_dir, &st) != 0); + + CU_TEST(tracefs_instance_exists(name) == false); + instance = tracefs_instance_create(name); + CU_TEST(instance != NULL); + CU_TEST(tracefs_instance_is_new(instance)); + second = tracefs_instance_create(name); + CU_TEST(second != NULL); + CU_TEST(!tracefs_instance_is_new(second)); + tracefs_instance_free(second); + CU_TEST(tracefs_instance_exists(name) == true); + CU_TEST(stat(inst_dir, &st) == 0); + CU_TEST(S_ISDIR(st.st_mode)); + inst_name = tracefs_instance_get_name(instance); + CU_TEST(inst_name != NULL); + CU_TEST(strcmp(inst_name, name) == 0); + + fname = tracefs_instance_get_dir(NULL); + CU_TEST(fname != NULL); + CU_TEST(strcmp(fname, tdir) == 0); + free(fname); + + fname = tracefs_instance_get_dir(instance); + CU_TEST(fname != NULL); + CU_TEST(strcmp(fname, inst_dir) == 0); + free(fname); + + CU_TEST(asprintf(&fname, "%s/"ALL_TRACERS, tdir) > 0); + CU_TEST(fname != NULL); + inst_file = tracefs_instance_get_file(NULL, ALL_TRACERS); + CU_TEST(inst_file != NULL); + CU_TEST(strcmp(fname, inst_file) == 0); + tracefs_put_tracing_file(inst_file); + free(fname); + + CU_TEST(asprintf(&fname, "%s/instances/%s/"ALL_TRACERS, tdir, name) > 0); + CU_TEST(fname != NULL); + CU_TEST(stat(fname, &st) == 0); + inst_file = tracefs_instance_get_file(instance, ALL_TRACERS); + CU_TEST(inst_file != NULL); + CU_TEST(strcmp(fname, inst_file) == 0); + + test_instance_file_read(NULL, ALL_TRACERS); + test_instance_file_read(instance, ALL_TRACERS); + + file1 = tracefs_instance_file_read(instance, ALL_TRACERS, NULL); + CU_TEST(file1 != NULL); + tracer = strtok(file1, " "); + CU_TEST(tracer != NULL); + ret = tracefs_instance_file_write(instance, CUR_TRACER, tracer); + CU_TEST(ret == strlen(tracer)); + file2 = tracefs_instance_file_read(instance, CUR_TRACER, &size); + CU_TEST(file2 != NULL); + CU_TEST(size >= strlen(tracer)); + CU_TEST(strncmp(file2, tracer, strlen(tracer)) == 0); + free(file1); + free(file2); + + tracefs_put_tracing_file(inst_file); + free(fname); + + CU_TEST(tracefs_file_exists(NULL, (char *)name) == false); + CU_TEST(tracefs_dir_exists(NULL, (char *)name) == false); + CU_TEST(tracefs_file_exists(instance, (char *)name) == false); + CU_TEST(tracefs_dir_exists(instance, (char *)name) == false); + + CU_TEST(tracefs_file_exists(NULL, CUR_TRACER) == true); + CU_TEST(tracefs_dir_exists(NULL, CUR_TRACER) == false); + CU_TEST(tracefs_file_exists(instance, CUR_TRACER) == true); + CU_TEST(tracefs_dir_exists(instance, CUR_TRACER) == false); + + CU_TEST(tracefs_file_exists(NULL, PER_CPU) == false); + CU_TEST(tracefs_dir_exists(NULL, PER_CPU) == true); + CU_TEST(tracefs_file_exists(instance, PER_CPU) == false); + CU_TEST(tracefs_dir_exists(instance, PER_CPU) == true); + + CU_TEST(tracefs_instance_destroy(NULL) != 0); + CU_TEST(tracefs_instance_destroy(instance) == 0); + CU_TEST(tracefs_instance_destroy(instance) != 0); + tracefs_instance_free(instance); + CU_TEST(stat(inst_dir, &st) != 0); + free(inst_dir); +} + +static void exclude_string(char **strings, char *name) +{ + int i; + + for (i = 0; strings[i]; i++) { + if (strcmp(strings[i], name) == 0) { + free(strings[i]); + strings[i] = strdup("/"); + return; + } + } +} + +static void test_check_files(const char *fdir, char **files) +{ + struct dirent *dent; + DIR *dir; + int i; + + dir = opendir(fdir); + CU_TEST(dir != NULL); + + while ((dent = readdir(dir))) + exclude_string(files, dent->d_name); + + closedir(dir); + + for (i = 0; files[i]; i++) + CU_TEST(files[i][0] == '/'); +} + +static void test_system_event(void) +{ + const char *tdir; + char **systems; + char **events; + char *sdir = NULL; + + tdir = tracefs_tracing_dir(); + CU_TEST(tdir != NULL); + + systems = tracefs_event_systems(tdir); + CU_TEST(systems != NULL); + + events = tracefs_system_events(tdir, systems[0]); + CU_TEST(events != NULL); + + asprintf(&sdir, "%s/events/%s", tdir, systems[0]); + CU_TEST(sdir != NULL); + test_check_files(sdir, events); + free(sdir); + sdir = NULL; + + asprintf(&sdir, "%s/events", tdir); + CU_TEST(sdir != NULL); + test_check_files(sdir, systems); + + tracefs_list_free(systems); + tracefs_list_free(events); + + free(sdir); +} + +static void test_tracers(void) +{ + const char *tdir; + char **tracers; + char *tfile; + char *tracer; + int i; + + tdir = tracefs_tracing_dir(); + CU_TEST(tdir != NULL); + + tracers = tracefs_tracers(tdir); + CU_TEST(tracers != NULL); + + tfile = tracefs_instance_file_read(NULL, ALL_TRACERS, NULL); + + tracer = strtok(tfile, " "); + while (tracer) { + exclude_string(tracers, tracer); + tracer = strtok(NULL, " "); + } + + for (i = 0; tracers[i]; i++) + CU_TEST(tracers[i][0] == '/'); + + tracefs_list_free(tracers); + free(tfile); +} + +static void test_check_events(struct tep_handle *tep, char *system, bool exist) +{ + struct dirent *dent; + char file[PATH_MAX]; + char buf[1024]; + char *edir = NULL; + const char *tdir; + DIR *dir; + int fd; + + tdir = tracefs_tracing_dir(); + CU_TEST(tdir != NULL); + + asprintf(&edir, "%s/events/%s", tdir, system); + dir = opendir(edir); + CU_TEST(dir != NULL); + + while ((dent = readdir(dir))) { + if (dent->d_name[0] == '.') + continue; + sprintf(file, "%s/%s/id", edir, dent->d_name); + fd = open(file, O_RDONLY); + if (fd < 0) + continue; + CU_TEST(read(fd, buf, 1024) > 0); + if (exist) { + CU_TEST(tep_find_event(tep, atoi(buf)) != NULL); + } else { + CU_TEST(tep_find_event(tep, atoi(buf)) == NULL); + } + + close(fd); + } + + closedir(dir); + free(edir); + +} + +static void test_local_events(void) +{ + struct tep_handle *tep; + const char *tdir; + char **systems; + char *lsystems[3]; + int i; + + tdir = tracefs_tracing_dir(); + CU_TEST(tdir != NULL); + + tep = tracefs_local_events(tdir); + CU_TEST(tep != NULL); + + systems = tracefs_event_systems(tdir); + CU_TEST(systems != NULL); + + for (i = 0; systems[i]; i++) + test_check_events(tep, systems[i], true); + tep_free(tep); + + memset(lsystems, 0, sizeof(lsystems)); + for (i = 0; systems[i]; i++) { + if (!lsystems[0]) + lsystems[0] = systems[i]; + else if (!lsystems[2]) + lsystems[2] = systems[i]; + else + break; + } + + if (lsystems[0] && lsystems[2]) { + tep = tracefs_local_events_system(tdir, + (const char * const *)lsystems); + CU_TEST(tep != NULL); + test_check_events(tep, lsystems[0], true); + test_check_events(tep, lsystems[2], false); + } + tep_free(tep); + + tep = tep_alloc(); + CU_TEST(tep != NULL); + CU_TEST(tracefs_fill_local_events(tdir, tep, NULL) == 0); + for (i = 0; systems[i]; i++) + test_check_events(tep, systems[i], true); + + tep_free(tep); + + tracefs_list_free(systems); +} + +struct test_walk_instance { + struct tracefs_instance *instance; + bool found; +}; +#define WALK_COUNT 10 +int test_instances_walk_cb(const char *name, void *data) +{ + struct test_walk_instance *instances = (struct test_walk_instance *)data; + int i; + + CU_TEST(instances != NULL); + CU_TEST(name != NULL); + + for (i = 0; i < WALK_COUNT; i++) { + if (!strcmp(name, + tracefs_instance_get_name(instances[i].instance))) { + instances[i].found = true; + break; + } + } + + return 0; +} + +static void test_instances_walk(void) +{ + struct test_walk_instance instances[WALK_COUNT]; + int i; + + memset(instances, 0, WALK_COUNT * sizeof(struct test_walk_instance)); + for (i = 0; i < WALK_COUNT; i++) { + instances[i].instance = tracefs_instance_create(get_rand_str()); + CU_TEST(instances[i].instance != NULL); + } + + CU_TEST(tracefs_instances_walk(test_instances_walk_cb, instances) == 0); + for (i = 0; i < WALK_COUNT; i++) { + CU_TEST(instances[i].found); + tracefs_instance_destroy(instances[i].instance); + instances[i].found = false; + } + + CU_TEST(tracefs_instances_walk(test_instances_walk_cb, instances) == 0); + for (i = 0; i < WALK_COUNT; i++) { + CU_TEST(!instances[i].found); + tracefs_instance_free(instances[i].instance); + } +} + +static void current_clock_check(const char *clock) +{ + int size = 0; + char *clocks; + char *str; + + clocks = tracefs_instance_file_read(test_instance, "trace_clock", &size); + CU_TEST(clocks != NULL); + CU_TEST(size > strlen(clock)); + str = strstr(clocks, clock); + CU_TEST(str != NULL); + CU_TEST(str != clocks); + CU_TEST(*(str - 1) == '['); + CU_TEST(*(str + strlen(clock)) == ']'); + free(clocks); +} + +static void test_get_clock(void) +{ + const char *clock; + + clock = tracefs_get_clock(test_instance); + CU_TEST(clock != NULL); + current_clock_check(clock); + free((char *)clock); +} + +static int test_suite_destroy(void) +{ + tracefs_instance_destroy(test_instance); + tracefs_instance_free(test_instance); + tep_free(test_tep); + return 0; +} + +static int test_suite_init(void) +{ + const char *systems[] = {"ftrace", NULL}; + + test_tep = tracefs_local_events_system(NULL, systems); + if (test_tep == NULL) + return 1; + test_instance = tracefs_instance_create(TEST_INSTANCE_NAME); + if (!test_instance) + return 1; + + return 0; +} + +void test_tracefs_lib(void) +{ + CU_pSuite suite = NULL; + + suite = CU_add_suite(TRACEFS_SUITE, test_suite_init, test_suite_destroy); + if (suite == NULL) { + fprintf(stderr, "Suite \"%s\" cannot be ceated\n", TRACEFS_SUITE); + return; + } + CU_add_test(suite, "tracing file / directory APIs", + test_trace_file); + CU_add_test(suite, "instance file / directory APIs", + test_instance_file); + CU_add_test(suite, "systems and events APIs", + test_system_event); + CU_add_test(suite, "tracefs_iterate_raw_events API", + test_iter_raw_events); + CU_add_test(suite, "tracefs_tracers API", + test_tracers); + CU_add_test(suite, "tracefs_local events API", + test_local_events); + CU_add_test(suite, "tracefs_instances_walk API", + test_instances_walk); + CU_add_test(suite, "tracefs_get_clock API", + test_get_clock); +} |