21 files changed, 438 insertions, 5 deletions
diff --git a/profcollectd/README.md b/profcollectd/README.md
index dba75a2c..e9066000 100644
--- a/profcollectd/README.md
+++ b/profcollectd/README.md
@@ -37,6 +37,62 @@ are controlled by the following configurations:
 
 Setting the frequency value to `0` disables collection for the corresponding event.
 
+#### Custom configuration
+
+In adb root:
+
+```
+# Record every 60s (By default, record every 10m). The actual interval will be longer than the
+# set value if the device goes to hibernation.
+oriole:/ # setprop persist.device_config.profcollect_native_boot.collection_interval 60
+
+# Each time recording, record ETM data for 1s (By default, it's 0.5s).
+oriole:/ # setprop persist.device_config.profcollect_native_boot.sampling_period 1000
+
+# Set ETM data storage limit to 50G (By default, it is 512M).
+oriole:/ # setprop persist.device_config.profcollect_native_boot.max_trace_limit 53687091200
+
+# Enable ETM data collection (By default, it's decided by the server).
+oriole:/ # setprop persist.device_config.profcollect_native_boot.enabled true
+
+# After adjusting configuration, need to restart profcollectd
+oriole:/ # setprop ctl.stop profcollectd
+# Wait for a few seconds.
+oriole:/ # setprop ctl.start profcollectd
+
+# Check if profcollectd is running
+oriole:/ # ps -e | grep profcollectd
+root           918     1 10945660 47040 binder_wait_for_work 0 S profcollectd
+
+# Check if the new configuration takes effect.
+oriole:/ # cat /data/misc/profcollectd/output/config.json
+{"version":1,"node_id":[189,15,145,225,97,167],"build_fingerprint":"google/oriole/oriole:Tiramisu/TP1A.220223.002/8211650:userdebug/dev-keys","collection_interval":{"secs":60,"nanos":0},"sampling_period":{"secs":1,"nanos":0},"binary_filter":"^/(system|apex/.+)/(bin|lib|lib64)/.+","max_trace_limit":53687091200}
+```
+
+To check existing collected ETM data:
+```
+oriole:/ # cd data/misc/profcollectd/trace/
+oriole:/data/misc/profcollectd/trace # ls
+```
+
+To check if ETM data can be collected successfully:
+```
+# Trigger one collection manually.
+oriole:/ # profcollectctl once
+Trace once
+
+# Check trace directory to see if there is a recent manual trace file.
+oriole:/ # ls /data/misc/profcollectd/trace/
+20220224-222946_manual.etmtrace
+```
+
+If there are too many trace files, we need to processing them to avoid reaching storage limit.
+It may take a long time.
+```
+oriole:/ # profcollectctl process
+Processing traces
+```
+
 ### Processing
 
 The raw tracing data needs to be combined with the original binary to create the AutoFDO branch
diff --git a/profcollectd/libprofcollectd/lib.rs b/profcollectd/libprofcollectd/lib.rs
index f95132f4..da178f27 100644
--- a/profcollectd/libprofcollectd/lib.rs
+++ b/profcollectd/libprofcollectd/lib.rs
@@ -127,6 +127,9 @@ pub fn reset() -> Result<()> {
 pub fn init_logging() {
     let min_log_level = if cfg!(feature = "test") { log::Level::Info } else { log::Level::Error };
     android_logger::init_once(
-        android_logger::Config::default().with_tag("profcollectd").with_min_level(min_log_level),
+        android_logger::Config::default()
+            .with_tag("profcollectd")
+            .with_min_level(min_log_level)
+            .with_log_id(android_logger::LogId::System),
     );
 }
diff --git a/simpleperf/Android.bp b/simpleperf/Android.bp
index ba4ef198..b5edda80 100644
--- a/simpleperf/Android.bp
+++ b/simpleperf/Android.bp
@@ -328,7 +328,10 @@ cc_library {
     srcs: ["profcollect.cpp"],
     host_supported: false,
     static_libs: ["libsimpleperf"],
-    shared_libs: ["libLLVM_android"],
+    shared_libs: [
+        "libLLVM_android",
+        "libpower",
+    ],
 }
 
 rust_bindgen {
@@ -530,6 +533,11 @@ cc_library_shared {
 
 cc_defaults {
     name: "simpleperf_test_srcs",
+    tidy_timeout_srcs: [
+        "record_test.cpp",
+        "cmd_report_sample_test.cpp",
+        "cmd_report_test.cpp",
+    ],
     srcs: [
         "cmd_inject_test.cpp",
         "cmd_kmem_test.cpp",
@@ -561,6 +569,10 @@ cc_defaults {
             ],
         },
         linux: {
+            tidy_timeout_srcs: [
+                "cmd_stat_test.cpp",
+                "cmd_record_test.cpp",
+            ],
             srcs: [
                 "CallChainJoiner_test.cpp",
                 "cmd_api_test.cpp",
diff --git a/simpleperf/ETMDecoder.cpp b/simpleperf/ETMDecoder.cpp
index 41a7d130..7c1cca05 100644
--- a/simpleperf/ETMDecoder.cpp
+++ b/simpleperf/ETMDecoder.cpp
@@ -735,7 +735,7 @@ class ETMDecoderImpl : public ETMDecoder {
       auto resp = decoder.TraceDataIn(OCSD_OP_DATA, data_index_, left_size, data, &processed);
       if (IsRespError(resp)) {
         // A decoding error shouldn't ruin all data. Reset decoders to recover from it.
-        LOG(INFO) << "reset etm decoders for seeing a decode failure, resp " << resp;
+        LOG(DEBUG) << "reset etm decoders for seeing a decode failure, resp " << resp;
         decoder.TraceDataIn(OCSD_OP_RESET, data_index_ + processed, 0, nullptr, nullptr);
       }
       data += processed;
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index b84bcfab..f206b808 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -114,7 +114,9 @@ constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024;
 static constexpr size_t kRecordBufferSize = 64 * 1024 * 1024;
 static constexpr size_t kSystemWideRecordBufferSize = 256 * 1024 * 1024;
 
-static constexpr size_t kDefaultAuxBufferSize = 4 * 1024 * 1024;
+// If the kernel needs to allocate continuous DMA memory for ETR (like when IOMMU for ETR isn't
+// available), requesting 4M ETR buffer may fail and cause warning. So use 1M buffer here.
+static constexpr size_t kDefaultAuxBufferSize = 1 * 1024 * 1024;
 
 // On Pixel 3, it takes about 1ms to enable ETM, and 16-40ms to disable ETM and copy 4M ETM data.
 // So make default period to 100ms.
@@ -221,7 +223,7 @@ class RecordCommand : public Command {
 "--aux-buffer-size <buffer_size>  Set aux buffer size, only used in cs-etm event type.\n"
 "                                 Need to be power of 2 and page size aligned.\n"
 "                                 Used memory size is (buffer_size * (cpu_count + 1).\n"
-"                                 Default is 4M.\n"
+"                                 Default is 1M.\n"
 "--no-inherit  Don't record created child threads/processes.\n"
 "--cpu-percent <percent>  Set the max percent of cpu time used for recording.\n"
 "                         percent is in range [1-100], default is 25.\n"
diff --git a/simpleperf/doc/README.md b/simpleperf/doc/README.md
index 473a5c11..51ffc839 100644
--- a/simpleperf/doc/README.md
+++ b/simpleperf/doc/README.md
@@ -95,6 +95,9 @@ See [executable_commands_reference.md](./executable_commands_reference.md).
 
 See [scripts_reference.md](./scripts_reference.md).
 
+## View the profile
+
+See [view_the_profile.md](./view_the_profile.md).
 
 ## Answers to common issues
 
diff --git a/simpleperf/doc/pictures/android_studio_profiler_flame_chart.png b/simpleperf/doc/pictures/android_studio_profiler_flame_chart.png
new file mode 100644
index 00000000..6fea4f77
--- /dev/null
+++ b/simpleperf/doc/pictures/android_studio_profiler_flame_chart.png
diff --git a/simpleperf/doc/pictures/android_studio_profiler_open_perf_trace.png b/simpleperf/doc/pictures/android_studio_profiler_open_perf_trace.png
new file mode 100644
index 00000000..faa9b70d
--- /dev/null
+++ b/simpleperf/doc/pictures/android_studio_profiler_open_perf_trace.png
diff --git a/simpleperf/doc/pictures/android_studio_profiler_select_process.png b/simpleperf/doc/pictures/android_studio_profiler_select_process.png
new file mode 100644
index 00000000..4004e171
--- /dev/null
+++ b/simpleperf/doc/pictures/android_studio_profiler_select_process.png
diff --git a/simpleperf/doc/pictures/android_studio_profiler_select_recording_method.png b/simpleperf/doc/pictures/android_studio_profiler_select_recording_method.png
new file mode 100644
index 00000000..3813533a
--- /dev/null
+++ b/simpleperf/doc/pictures/android_studio_profiler_select_recording_method.png
diff --git a/simpleperf/doc/pictures/continuous_pprof.png b/simpleperf/doc/pictures/continuous_pprof.png
new file mode 100644
index 00000000..f92f7b65
--- /dev/null
+++ b/simpleperf/doc/pictures/continuous_pprof.png
diff --git a/simpleperf/doc/pictures/firefox_profiler.png b/simpleperf/doc/pictures/firefox_profiler.png
new file mode 100644
index 00000000..4019289b
--- /dev/null
+++ b/simpleperf/doc/pictures/firefox_profiler.png
diff --git a/simpleperf/doc/pictures/flamescope.png b/simpleperf/doc/pictures/flamescope.png
new file mode 100644
index 00000000..2a19dad5
--- /dev/null
+++ b/simpleperf/doc/pictures/flamescope.png
diff --git a/simpleperf/doc/pictures/flamescope_click.png b/simpleperf/doc/pictures/flamescope_click.png
new file mode 100644
index 00000000..a5a8a979
--- /dev/null
+++ b/simpleperf/doc/pictures/flamescope_click.png
diff --git a/simpleperf/doc/pictures/flamescope_flamegraph.png b/simpleperf/doc/pictures/flamescope_flamegraph.png
new file mode 100644
index 00000000..8a0b4ef9
--- /dev/null
+++ b/simpleperf/doc/pictures/flamescope_flamegraph.png
diff --git a/simpleperf/doc/pictures/report_command.png b/simpleperf/doc/pictures/report_command.png
new file mode 100644
index 00000000..c04419a8
--- /dev/null
+++ b/simpleperf/doc/pictures/report_command.png
diff --git a/simpleperf/doc/pictures/report_html.png b/simpleperf/doc/pictures/report_html.png
new file mode 100644
index 00000000..358ef7b4
--- /dev/null
+++ b/simpleperf/doc/pictures/report_html.png
diff --git a/simpleperf/doc/scripts_reference.md b/simpleperf/doc/scripts_reference.md
index 1a97ebe5..31dee02d 100644
--- a/simpleperf/doc/scripts_reference.md
+++ b/simpleperf/doc/scripts_reference.md
@@ -114,6 +114,9 @@ device. It is more convenient than running adb commands manually.
 
 ## Viewing the profile
 
+Scripts in this section are for viewing the profile or converting profile data into formats used by
+external UIs. For recommended UIs, see [view_the_profile.md](view_the_profile.md).
+
 ### report.py
 
 report.py is a wrapper of the `report` command on the host. It accepts all options of the `report`
diff --git a/simpleperf/doc/view_the_profile.md b/simpleperf/doc/view_the_profile.md
new file mode 100644
index 00000000..8a0e07e7
--- /dev/null
+++ b/simpleperf/doc/view_the_profile.md
@@ -0,0 +1,342 @@
+# View the profile
+
+[TOC]
+
+## Introduction
+
+After using `simpleperf record` or `app_profiler.py`, we get a profile data file. The file contains
+a list of samples. Each sample has a timestamp, a thread id, a callstack, events (like cpu-cycles
+or cpu-clock) used in this sample, etc. We have many choices for viewing the profile. We can show
+samples in chronological order, or show aggregated flamegraphs. We can show reports in text format,
+or in some interactive UIs.
+
+Below shows some recommended UIs to view the profile. Google developers can find more examples in
+[go/gmm-profiling](go/gmm-profiling?polyglot=linux-workstation#viewing-the-profile).
+
+
+## Continuous PProf UI (great flamegraph UI, but only available internally)
+
+[PProf](https://github.com/google/pprof) is a mature profiling technology used extensively on
+Google servers, with a powerful flamegraph UI, with strong drilldown, search, pivot, profile diff,
+and graph visualisation.
+
+![Example](./pictures/continuous_pprof.png)
+
+We can use `pprof_proto_generator.py` to convert profiles into pprof.profile protobufs for use in
+pprof.
+
+```
+# Output all threads, broken down by threadpool.
+./pprof_proto_generator.py
+
+# Use proguard mapping.
+./pprof_proto_generator.py --proguard-mapping-file proguard.map
+
+# Just the main (UI) thread (query by thread name):
+./pprof_proto_generator.py --comm com.example.android.displayingbitmaps
+```
+
+This will print some debug logs about Failed to read symbols: this is usually OK, unless those
+symbols are hotspots.
+
+Upload pprof.profile to http://pprof/ UI:
+
+```
+# Upload all threads in profile, grouped by threadpool.
+# This is usually a good default, combining threads with similar names.
+pprof --flame --tagroot threadpool pprof.profile
+
+# Upload all threads in profile, grouped by individual thread name.
+pprof --flame --tagroot thread pprof.profile
+
+# Upload all threads in profile, without grouping by thread.
+pprof --flame pprof.profile
+This will output a URL, example: https://pprof.corp.google.com/?id=589a60852306144c880e36429e10b166
+```
+
+## Firefox Profiler (great chronological UI)
+
+We can view Android profiles using Firefox Profiler: https://profiler.firefox.com/. This does not
+require Firefox installation -- Firefox Profiler is just a website, you can open it in any browser.
+
+![Example](./pictures/firefox_profiler.png)
+
+Firefox Profiler has a great chronological view, as it doesn't pre-aggregate similar stack traces
+like pprof does.
+
+We can use `gecko_profile_generator.py` to convert raw perf.data files into a Firefox Profile, with
+Proguard deobfuscation.
+
+```
+# Create Gecko Profile
+./gecko_profile_generator.py | gzip > gecko_profile.json.gz
+
+# Create Gecko Profile using Proguard map
+./gecko_profile_generator.py --proguard-mapping-file proguard.map | gzip > gecko_profile.json.gz
+```
+
+Then drag-and-drop gecko_profile.json.gz into https://profiler.firefox.com/.
+
+Firefox Profiler supports:
+
+1. Aggregated Flamegraphs
+2. Chronological Stackcharts
+
+And allows filtering by:
+
+1. Individual threads
+2. Multiple threads (Ctrl+Click thread names to select many)
+3. Timeline period
+4. Stack frame text search
+
+## FlameScope (great jank-finding UI)
+
+[Netflix's FlameScope](https://github.com/Netflix/flamescope) is a rough, proof-of-concept UI that
+lets you spot repeating patterns of work by laying out the profile as a subsecond heatmap.
+
+Below, each vertical stripe is one second, and each cell is 10ms. Redder cells have more samples.
+See https://www.brendangregg.com/blog/2018-11-08/flamescope-pattern-recognition.html for how to
+spot patterns.
+
+This is an example of a 60s DisplayBitmaps app Startup Profile.
+
+![Example](./pictures/flamescope.png)
+
+You can see:
+
+  The thick red vertical line on the left is startup.
+  The long white vertical sections on the left shows the app is mostly idle, waiting for commands
+  from instrumented tests.
+  Then we see periodically red blocks, which shows the app is periodically busy handling commands
+  from instrumented tests.
+
+Click the start and end cells of a duration:
+
+![Example](./pictures/flamescope_click.png)
+
+To see a flamegraph for that duration:
+
+![Example](./pictures/flamescope_flamegraph.png)
+
+Install and run Flamescope:
+
+```
+git clone https://github.com/Netflix/flamescope ~/flamescope
+cd ~/flamescope
+pip install -r requirements.txt
+npm install
+npm run webpack
+python3 run.py
+```
+
+Then open FlameScope in-browser: http://localhost:5000/.
+
+FlameScope can read gzipped perf script format profiles. Convert simpleperf perf.data to this
+format with `report_sample.py`, and place it in Flamescope's examples directory:
+
+```
+# Create `Linux perf script` format profile.
+report_sample.py | gzip > ~/flamescope/examples/my_simpleperf_profile.gz
+
+# Create `Linux perf script` format profile using Proguard map.
+report_sample.py \
+  --proguard-mapping-file proguard.map \
+  | gzip > ~/flamescope/examples/my_simpleperf_profile.gz
+```
+
+Open the profile "as Linux Perf", and click start and end sections to get a flamegraph of that
+timespan.
+
+To investigate UI Thread Jank, filter to UI thread samples only:
+
+```
+report_sample.py \
+  --comm com.example.android.displayingbitmaps \ # UI Thread
+  | gzip > ~/flamescope/examples/uithread.gz
+```
+
+Once you've identified the timespan of interest, consider also zooming into that section with
+Firefox Profiler, which has a more powerful flamegraph viewer.
+
+## Differential FlameGraph
+
+See Brendan Gregg's [Differential Flame Graphs](https://www.brendangregg.com/blog/2014-11-09/differential-flame-graphs.html) blog.
+
+Use Simpleperf's `stackcollapse.py` to convert perf.data to Folded Stacks format for the FlameGraph
+toolkit.
+
+Consider diffing both directions: After minus Before, and Before minus After.
+
+If you've recorded before and after your optimisation as perf_before.data and perf_after.data, and
+you're only interested in the UI thread:
+
+```
+# Generate before and after folded stacks from perf.data files
+./stackcollapse.py --kernel --jit -i perf_before.data \
+  --proguard-mapping-file proguard_before.map \
+  --comm com.example.android.displayingbitmaps \
+  > perf_before.folded
+./stackcollapse.py --kernel --jit -i perf_after.data \
+  --proguard-mapping-file proguard_after.map \
+  --comm com.example.android.displayingbitmaps \
+  > perf_after.folded
+
+# Generate diff reports
+FlameGraph/difffolded.pl -n perf_before.folded perf_after.folded \
+  | FlameGraph/flamegraph.pl > diff1.svg
+FlameGraph/difffolded.pl -n --negate perf_after.folded perf_before.folded \
+  | FlameGraph/flamegraph.pl > diff2.svg
+```
+
+## Android Studio Profiler
+
+Android Studio Profiler supports recording and reporting profiles of app processes. It supports
+several recording methods, including one using simpleperf as backend. You can use Android Studio
+Profiler for both recording and reporting.
+
+In Android Studio:
+Open View -> Tool Windows -> Profiler
+Click + -> Your Device -> Profileable Processes -> Your App
+
+![Example](./pictures/android_studio_profiler_select_process.png)
+
+Click into "CPU" Chart
+
+Choose Callstack Sample Recording. Even if you're using Java, this provides better observability,
+into ART, malloc, and the kernel.
+
+![Example](./pictures/android_studio_profiler_select_recording_method.png)
+
+Click Record, run your test on the device, then Stop when you're done.
+
+Click on a thread track, and "Flame Chart" to see a chronological chart on the left, and an
+aggregated flamechart on the right:
+
+![Example](./pictures/android_studio_profiler_flame_chart.png)
+
+If you want more flexibility in recording options, or want to add proguard mapping file, you can
+record using simpleperf, and report using Android Studio Profiler.
+
+We can use `simpleperf report-sample` to convert perf.data to trace files for Android Studio
+Profiler.
+
+```
+# Convert perf.data to perf.trace for Android Studio Profiler.
+# If on Mac/Windows, use simpleperf host executable for those platforms instead.
+bin/linux/x86_64/simpleperf report-sample --show-callchain --protobuf -i perf.data -o perf.trace
+
+# Convert perf.data to perf.trace using proguard mapping file.
+bin/linux/x86_64/simpleperf report-sample --show-callchain --protobuf -i perf.data -o perf.trace \
+    --proguard-mapping-file proguard.map
+```
+
+In Android Studio: Open File -> Open -> Select perf.trace
+
+![Example](./pictures/android_studio_profiler_open_perf_trace.png)
+
+
+## Simpleperf HTML Report
+
+Simpleperf can generate its own HTML Profile, which is able to show Android-specific information
+and separate flamegraphs for all threads, with a much rougher flamegraph UI.
+
+![Example](./pictures/report_html.png)
+
+This UI is fairly rough; we recommend using the Continuous PProf UI or Firefox Profiler instead. But
+it's useful for a quick look at your data.
+
+Each of the following commands take as input ./perf.data and output ./report.html.
+
+```
+# Make an HTML report.
+./report_html.py
+
+# Make an HTML report with Proguard mapping.
+./report_html.py --proguard-mapping-file proguard.map
+```
+
+This will print some debug logs about Failed to read symbols: this is usually OK, unless those
+symbols are hotspots.
+
+See also [report_html.py's README](scripts_reference.md#report_htmlpy) and `report_html.py -h`.
+
+
+## PProf Interactive Command Line
+
+Unlike Continuous PProf UI, [PProf](https://github.com/google/pprof) command line is publicly
+available, and allows drilldown, pivoting and filtering.
+
+The below session demonstrates filtering to stack frames containing processBitmap.
+
+```
+$ pprof pprof.profile
+(pprof) show=processBitmap
+(pprof) top
+Active filters:
+   show=processBitmap
+Showing nodes accounting for 2.45s, 11.44% of 21.46s total
+      flat  flat%   sum%        cum   cum%
+     2.45s 11.44% 11.44%      2.45s 11.44%  com.example.android.displayingbitmaps.util.ImageFetcher.processBitmap
+```
+
+And then showing the tags of those frames, to tell what threads they are running on:
+
+```
+(pprof) tags
+ pid: Total 2.5s
+      2.5s (  100%): 31112
+
+ thread: Total 2.5s
+         1.4s (57.21%): AsyncTask #3
+         1.1s (42.79%): AsyncTask #4
+
+ threadpool: Total 2.5s
+             2.5s (  100%): AsyncTask #%d
+
+ tid: Total 2.5s
+      1.4s (57.21%): 31174
+      1.1s (42.79%): 31175
+```
+
+Contrast with another method:
+
+```
+(pprof) show=addBitmapToCache
+(pprof) top
+Active filters:
+   show=addBitmapToCache
+Showing nodes accounting for 1.05s, 4.88% of 21.46s total
+      flat  flat%   sum%        cum   cum%
+     1.05s  4.88%  4.88%      1.05s  4.88%  com.example.android.displayingbitmaps.util.ImageCache.addBitmapToCache
+```
+
+For more information, see the [pprof README](https://github.com/google/pprof/blob/master/doc/README.md#interactive-terminal-use).
+
+
+## Simpleperf Report Command Line
+
+The simpleperf report command reports profiles in text format.
+
+![Example](./pictures/report_command.png)
+
+You can call `simpleperf report` directly or call it via `report.py`.
+
+```
+# Report symbols in table format.
+$ ./report.py --children
+
+# Report call graph.
+$ bin/linux/x86_64/simpleperf report -g -i perf.data
+```
+
+See also [report command's README](executable_commands_reference.md#The-report-command) and
+`report.py -h`.
+
+
+## Custom Report Interface
+
+If the above View UIs can't fulfill your need, you can use `simpleperf_report_lib.py` to parse
+perf.data, extract sample information, and feed it to any views you like.
+
+See [simpleperf_report_lib.py's README](scripts_reference.md#simpleperf_report_libpy) for more
+details.
diff --git a/simpleperf/profcollect.cpp b/simpleperf/profcollect.cpp
index 39c04ab7..724111e8 100644
--- a/simpleperf/profcollect.cpp
+++ b/simpleperf/profcollect.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <wakelock/wakelock.h>
 #include <include/simpleperf_profcollect.hpp>
 
 #include "ETMRecorder.h"
@@ -42,6 +43,13 @@ bool HasDeviceSupport() {
 }
 
 bool Record(const char* event_name, const char* output, float duration) {
+  // The kernel may panic when trying to hibernate or hotplug CPUs while collecting
+  // ETM data. So get wakelock to keep the CPUs on.
+  auto wakelock = android::wakelock::WakeLock::tryGet("profcollectd");
+  if (!wakelock) {
+    LOG(ERROR) << "Failed to request wakelock.";
+    return false;
+  }
   auto recordCmd = CreateCommandInstance("record");
   std::vector<std::string> args;
   args.push_back("-a");
diff --git a/simpleperf/scripts/run_simpleperf_without_usb_connection.py b/simpleperf/scripts/run_simpleperf_without_usb_connection.py
index 8beddcd9..405deb4f 100755
--- a/simpleperf/scripts/run_simpleperf_without_usb_connection.py
+++ b/simpleperf/scripts/run_simpleperf_without_usb_connection.py
@@ -45,6 +45,8 @@ def start_recording(args):
     shell_cmd = 'cd /data/local/tmp && nohup ./simpleperf record ' + args.record_options
     if args.app:
         shell_cmd += ' --app ' + args.app
+    if args.pid:
+        shell_cmd += ' -p ' + args.pid
     if args.size_limit:
         shell_cmd += ' --size-limit ' + args.size_limit
     shell_cmd += ' >/data/local/tmp/simpleperf_output 2>&1'
@@ -85,6 +87,8 @@ def main():
                                       Default is `-e task-clock:u -g`.""")
     start_parser.add_argument('-p', '--app', help="""Profile an Android app, given the package
                               name. Like `-p com.example.android.myapp`.""")
+    start_parser.add_argument('--pid', help="""Profile an Android app, given the process id.
+                              Like `--pid 918`.""")
     start_parser.add_argument('--size_limit', type=str,
                               help="""Stop profiling when recording data reaches
                                       [size_limit][K|M|G] bytes. Like `--size_limit 1M`.""")