summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Ferris <cferris@google.com>2013-05-02 15:12:11 -0700
committerChristopher Ferris <cferris@google.com>2013-05-03 12:27:25 -0700
commitf0d668eb46297293adfe8d50ff0f71cbb2fadeaa (patch)
treeeee583e18c7922133f64897f38e3ce128b3db77a
parent76469ff2a0894eb2c3aa03976d5525f91bbf52ba (diff)
downloadextras-f0d668eb46297293adfe8d50ff0f71cbb2fadeaa.tar.gz
Add read bandwidth and small refactor.
- Addition of read bandwidth test. - Move the allocation of all buffers used for testing into the setSize() function. - Add a canRun function to indicate whether a test is able to run on this platform. Currently, this only disables neon bandwidth tests on platforms that don't support neon. - Refactor the thread testing functions to use one common routine. Merge from internal master. (cherry-picked from 1a3794a84074d7f22b8ddaba840aedd758a14cdd) Change-Id: Ie0f29ec7e484a376471d121333b1982be8f41393
-rw-r--r--tests/memtest/bandwidth.cpp166
-rw-r--r--tests/memtest/bandwidth.h284
-rw-r--r--tests/memtest/memtest.cpp11
3 files changed, 359 insertions, 102 deletions
diff --git a/tests/memtest/bandwidth.cpp b/tests/memtest/bandwidth.cpp
index d0f09108..0e0ec2e0 100644
--- a/tests/memtest/bandwidth.cpp
+++ b/tests/memtest/bandwidth.cpp
@@ -98,34 +98,48 @@ BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
size = values["size"].int_value;
}
if (strcmp(name, "copy_ldrd_strd") == 0) {
- bench = new CopyLdrdStrdBenchmark(size);
+ bench = new CopyLdrdStrdBenchmark();
} else if (strcmp(name, "copy_ldmia_stmia") == 0) {
- bench = new CopyLdmiaStmiaBenchmark(size);
+ bench = new CopyLdmiaStmiaBenchmark();
} else if (strcmp(name, "copy_vld_vst") == 0) {
- bench = new CopyVldVstBenchmark(size);
+ bench = new CopyVldVstBenchmark();
} else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
- bench = new CopyVldmiaVstmiaBenchmark(size);
+ bench = new CopyVldmiaVstmiaBenchmark();
} else if (strcmp(name, "memcpy") == 0) {
- bench = new MemcpyBenchmark(size);
+ bench = new MemcpyBenchmark();
} else if (strcmp(name, "write_strd") == 0) {
- bench = new WriteStrdBenchmark(size);
+ bench = new WriteStrdBenchmark();
} else if (strcmp(name, "write_stmia") == 0) {
- bench = new WriteStmiaBenchmark(size);
+ bench = new WriteStmiaBenchmark();
} else if (strcmp(name, "write_vst") == 0) {
- bench = new WriteVstBenchmark(size);
+ bench = new WriteVstBenchmark();
} else if (strcmp(name, "write_vstmia") == 0) {
- bench = new WriteVstmiaBenchmark(size);
+ bench = new WriteVstmiaBenchmark();
} else if (strcmp(name, "memset") == 0) {
- bench = new MemsetBenchmark(size);
+ bench = new MemsetBenchmark();
+ } else if (strcmp(name, "read_ldrd") == 0) {
+ bench = new ReadLdrdBenchmark();
+ } else if (strcmp(name, "read_ldmia") == 0) {
+ bench = new ReadLdmiaBenchmark();
+ } else if (strcmp(name, "read_vld") == 0) {
+ bench = new ReadVldBenchmark();
+ } else if (strcmp(name, "read_vldmia") == 0) {
+ bench = new ReadVldmiaBenchmark();
+ } else {
+ printf("Unknown type name %s\n", name);
+ return NULL;
}
- if (bench) {
- if (values.count("num_warm_loops") > 0) {
- bench->set_num_loops(values["num_warm_loops"].int_value);
- }
- if (values.count("num_loops") > 0) {
- bench->set_num_loops(values["num_loops"].int_value);
- }
+ if (!bench->setSize(values["size"].int_value)) {
+ printf("Failed to allocate buffers for benchmark.\n");
+ return NULL;
+ }
+
+ if (values.count("num_warm_loops") > 0) {
+ bench->set_num_loops(values["num_warm_loops"].int_value);
+ }
+ if (values.count("num_loops") > 0) {
+ bench->set_num_loops(values["num_loops"].int_value);
}
return bench;
@@ -208,7 +222,6 @@ bool processThreadArgs(int argc, char** argv, option_t options[],
BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values);
if (!bench) {
- printf("Unknown type %s\n", (*values)["type"].char_value);
return false;
}
@@ -292,6 +305,9 @@ int per_core_bandwidth(int argc, char** argv) {
it != cpu_list.end(); ++it, ++i) {
args[i].core = *it;
args[i].bench = createBandwidthBenchmarkObject(values);
+ if (!args[i].bench) {
+ return 0;
+ }
}
printf("Running on %d cores\n", cpu_list.size());
@@ -325,6 +341,9 @@ int multithread_bandwidth(int argc, char** argv) {
for (int i = 0; i < num_threads; i++) {
args[i].core = -1;
args[i].bench = createBandwidthBenchmarkObject(values);
+ if (!args[i].bench) {
+ return 0;
+ }
}
printf("Running %d threads\n", num_threads);
@@ -341,18 +360,20 @@ int multithread_bandwidth(int argc, char** argv) {
return 0;
}
-int copy_bandwidth(int argc, char** argv) {
+bool run_bandwidth_benchmark(int argc, char** argv, const char *name,
+ std::vector<BandwidthBenchmark*> bench_objs) {
arg_t values;
values["size"].int_value = 0;
- values["num_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_LOOPS;
- values["num_warm_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_WARM_LOOPS;
+ values["num_warm_loops"].int_value = 0;
+ values["num_loops"].int_value = 0;
if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
return -1;
}
+
size_t size = values["size"].int_value;
if ((size % 64) != 0) {
printf("The size value must be a multiple of 64.\n");
- return -1;
+ return 1;
}
if (setpriority(PRIO_PROCESS, 0, -20)) {
@@ -360,68 +381,77 @@ int copy_bandwidth(int argc, char** argv) {
return -1;
}
- std::vector<BandwidthBenchmark*> bench_objs;
- bench_objs.push_back(new CopyLdrdStrdBenchmark(size));
- bench_objs.push_back(new CopyLdmiaStmiaBenchmark(size));
- bench_objs.push_back(new CopyVldVstBenchmark(size));
- bench_objs.push_back(new CopyVldmiaVstmiaBenchmark(size));
- bench_objs.push_back(new MemcpyBenchmark(size));
-
- printf("Benchmarking copy bandwidth\n");
- printf(" size = %d\n", bench_objs[0]->size());
- printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value);
- printf(" num_loops = %d\n\n", values["num_loops"].int_value);
+ bool preamble_printed = false;
+ size_t num_warm_loops = values["num_warm_loops"].int_value;
+ size_t num_loops = values["num_loops"].int_value;
for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
it != bench_objs.end(); ++it) {
- (*it)->set_num_warm_loops(values["num_warm_loops"].int_value);
- (*it)->set_num_loops(values["num_loops"].int_value);
+ if (!(*it)->canRun()) {
+ continue;
+ }
+ if (!(*it)->setSize(values["num_warm_loops"].int_value)) {
+ printf("Failed creating buffer for bandwidth test.\n");
+ return false;
+ }
+ if (num_warm_loops) {
+ (*it)->set_num_warm_loops(num_warm_loops);
+ }
+ if (num_loops) {
+ (*it)->set_num_loops(num_loops);
+ }
+ if (!preamble_printed) {
+ preamble_printed = true;
+ printf("Benchmarking %s bandwidth\n", name);
+ printf(" size = %d\n", (*it)->size());
+ printf(" num_warm_loops = %d\n", (*it)->num_warm_loops());
+ printf(" num_loops = %d\n\n", (*it)->num_loops());
+ }
(*it)->run();
- printf(" Copy bandwidth with %s: %0.2f MB/s\n", (*it)->getName(),
+ printf(" %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(),
(*it)->mb_per_sec());
}
- return 0;
+ return true;
}
-int write_bandwidth(int argc, char** argv) {
- arg_t values;
- values["size"].int_value = 0;
- values["num_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_LOOPS;
- values["num_warm_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_WARM_LOOPS;
- if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
+int copy_bandwidth(int argc, char** argv) {
+ std::vector<BandwidthBenchmark*> bench_objs;
+ bench_objs.push_back(new CopyLdrdStrdBenchmark());
+ bench_objs.push_back(new CopyLdmiaStmiaBenchmark());
+ bench_objs.push_back(new CopyVldVstBenchmark());
+ bench_objs.push_back(new CopyVldmiaVstmiaBenchmark());
+ bench_objs.push_back(new MemcpyBenchmark());
+
+ if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) {
return -1;
}
+ return 0;
+}
- size_t size = values["size"].int_value;
- if ((size % 64) != 0) {
- printf("The size value must be a multiple of 64.\n");
- return 1;
- }
+int write_bandwidth(int argc, char** argv) {
+ std::vector<BandwidthBenchmark*> bench_objs;
+ bench_objs.push_back(new WriteStrdBenchmark());
+ bench_objs.push_back(new WriteStmiaBenchmark());
+ bench_objs.push_back(new WriteVstBenchmark());
+ bench_objs.push_back(new WriteVstmiaBenchmark());
+ bench_objs.push_back(new MemsetBenchmark());
- if (setpriority(PRIO_PROCESS, 0, -20)) {
- perror("Unable to raise priority of process.");
+ if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) {
return -1;
}
+ return 0;
+}
+
+int read_bandwidth(int argc, char** argv) {
std::vector<BandwidthBenchmark*> bench_objs;
- bench_objs.push_back(new WriteStrdBenchmark(size));
- bench_objs.push_back(new WriteStmiaBenchmark(size));
- bench_objs.push_back(new WriteVstBenchmark(size));
- bench_objs.push_back(new WriteVstmiaBenchmark(size));
- bench_objs.push_back(new MemsetBenchmark(size));
-
- printf("Benchmarking write bandwidth\n");
- printf(" size = %d\n", bench_objs[0]->size());
- printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value);
- printf(" num_loops = %d\n\n", values["num_loops"].int_value);
- for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
- it != bench_objs.end(); ++it) {
- (*it)->set_num_warm_loops(values["num_warm_loops"].int_value);
- (*it)->set_num_loops(values["num_loops"].int_value);
- (*it)->run();
- printf(" Write bandwidth with %s: %0.2f MB/s\n", (*it)->getName(),
- (*it)->mb_per_sec());
- }
+ bench_objs.push_back(new ReadLdrdBenchmark());
+ bench_objs.push_back(new ReadLdmiaBenchmark());
+ bench_objs.push_back(new ReadVldBenchmark());
+ bench_objs.push_back(new ReadVldmiaBenchmark());
+ if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) {
+ return -1;
+ }
return 0;
}
diff --git a/tests/memtest/bandwidth.h b/tests/memtest/bandwidth.h
index cef5cdd5..414977b9 100644
--- a/tests/memtest/bandwidth.h
+++ b/tests/memtest/bandwidth.h
@@ -22,13 +22,20 @@
// Bandwidth Class definitions.
class BandwidthBenchmark {
public:
- BandwidthBenchmark(size_t size)
- : _size(size),
+ BandwidthBenchmark()
+ : _size(0),
_num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
_num_loops(DEFAULT_NUM_LOOPS) {}
virtual ~BandwidthBenchmark() {}
- void run() {
+ bool run() {
+ if (_size == 0) {
+ return false;
+ }
+ if (!canRun()) {
+ return false;
+ }
+
bench(_num_warm_loops);
nsecs_t t = system_time();
@@ -36,12 +43,28 @@ public:
t = system_time() - t;
_mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
+
+ return true;
}
+ bool canRun() { return !usesNeon() || isNeonSupported(); }
+
+ virtual bool setSize(size_t size) = 0;
+
virtual const char *getName() = 0;
virtual bool verify() = 0;
+ virtual bool usesNeon() { return false; }
+
+ bool isNeonSupported() {
+#if defined(__ARM_NEON__)
+ return true;
+#else
+ return false;
+#endif
+ }
+
// Accessors/mutators.
double mb_per_sec() { return _mb_per_sec; }
size_t num_warm_loops() { return _num_warm_loops; }
@@ -73,22 +96,45 @@ private:
class CopyBandwidthBenchmark : public BandwidthBenchmark {
public:
- CopyBandwidthBenchmark(size_t size) : BandwidthBenchmark(size) {
- if (_size == 0) {
+ CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
+
+ bool setSize(size_t size) {
+ if (_src) {
+ free(_src);
+ }
+ if (_dst) {
+ free(_dst);
+ }
+
+ if (size == 0) {
_size = DEFAULT_COPY_SIZE;
+ } else {
+ _size = size;
}
+
_src = reinterpret_cast<char*>(memalign(64, _size));
if (!_src) {
- perror("Failed to allocate memory for test.");
- exit(1);
+ perror("Failed to allocate memory for test.");
+ return false;
}
_dst = reinterpret_cast<char*>(memalign(64, _size));
if (!_dst) {
- perror("Failed to allocate memory for test.");
- exit(1);
+ perror("Failed to allocate memory for test.");
+ return false;
+ }
+
+ return true;
+ }
+ virtual ~CopyBandwidthBenchmark() {
+ if (_src) {
+ free(_src);
+ _src = NULL;
+ }
+ if (_dst) {
+ free(_dst);
+ _dst = NULL;
}
}
- virtual ~CopyBandwidthBenchmark() { free(_src); free(_dst); }
bool verify() {
memset(_src, 0x23, _size);
@@ -120,7 +166,7 @@ protected:
class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
public:
- CopyLdrdStrdBenchmark(size_t size) : CopyBandwidthBenchmark(size) { }
+ CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
virtual ~CopyLdrdStrdBenchmark() {}
const char *getName() { return "ldrd/strd"; }
@@ -174,7 +220,7 @@ protected:
class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
public:
- CopyLdmiaStmiaBenchmark(size_t size) : CopyBandwidthBenchmark(size) { }
+ CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
virtual ~CopyLdmiaStmiaBenchmark() {}
const char *getName() { return "ldmia/stmia"; }
@@ -213,7 +259,7 @@ protected:
class CopyVldVstBenchmark : public CopyBandwidthBenchmark {
public:
- CopyVldVstBenchmark(size_t size) : CopyBandwidthBenchmark(size) { }
+ CopyVldVstBenchmark() : CopyBandwidthBenchmark() { }
virtual ~CopyVldVstBenchmark() {}
const char *getName() { return "vld/vst"; }
@@ -252,7 +298,7 @@ protected:
class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
public:
- CopyVldmiaVstmiaBenchmark(size_t size) : CopyBandwidthBenchmark(size) { }
+ CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
virtual ~CopyVldmiaVstmiaBenchmark() {}
const char *getName() { return "vldmia/vstmia"; }
@@ -289,7 +335,7 @@ protected:
class MemcpyBenchmark : public CopyBandwidthBenchmark {
public:
- MemcpyBenchmark(size_t size) : CopyBandwidthBenchmark(size) { }
+ MemcpyBenchmark() : CopyBandwidthBenchmark() { }
virtual ~MemcpyBenchmark() {}
const char *getName() { return "memcpy"; }
@@ -302,21 +348,50 @@ protected:
}
};
-class WriteBandwidthBenchmark : public BandwidthBenchmark {
+class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
public:
- WriteBandwidthBenchmark(size_t size) : BandwidthBenchmark(size) {
+ SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
+ virtual ~SingleBufferBandwidthBenchmark() {
+ if (_buffer) {
+ free(_buffer);
+ _buffer = NULL;
+ }
+ }
+
+ bool setSize(size_t size) {
+ if (_buffer) {
+ free(_buffer);
+ _buffer = NULL;
+ }
+
if (_size == 0) {
- _size = DEFAULT_WRITE_SIZE;
+ _size = DEFAULT_SINGLE_BUFFER_SIZE;
+ } else {
+ _size = size;
}
_buffer = reinterpret_cast<char*>(memalign(64, _size));
if (!_buffer) {
- perror("Failed to allocate memory for test.");
- exit(1);
+ perror("Failed to allocate memory for test.");
+ return false;
}
memset(_buffer, 0, _size);
+
+ return true;
}
- virtual ~WriteBandwidthBenchmark() { free(_buffer); }
+
+ bool verify() { return true; }
+
+protected:
+ char *_buffer;
+
+ static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
+};
+
+class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
+public:
+ WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
+ virtual ~WriteBandwidthBenchmark() { }
bool verify() {
memset(_buffer, 0, _size);
@@ -339,16 +414,11 @@ public:
return true;
}
-
-protected:
- char *_buffer;
-
- static const unsigned int DEFAULT_WRITE_SIZE = 16000;
};
class WriteStrdBenchmark : public WriteBandwidthBenchmark {
public:
- WriteStrdBenchmark(size_t size) : WriteBandwidthBenchmark(size) { }
+ WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
virtual ~WriteStrdBenchmark() {}
const char *getName() { return "strd"; }
@@ -392,7 +462,7 @@ protected:
class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
public:
- WriteStmiaBenchmark(size_t size) : WriteBandwidthBenchmark(size) { }
+ WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
virtual ~WriteStmiaBenchmark() {}
const char *getName() { return "stmia"; }
@@ -437,11 +507,13 @@ protected:
class WriteVstBenchmark : public WriteBandwidthBenchmark {
public:
- WriteVstBenchmark(size_t size) : WriteBandwidthBenchmark(size) { }
+ WriteVstBenchmark() : WriteBandwidthBenchmark() { }
virtual ~WriteVstBenchmark() {}
const char *getName() { return "vst"; }
+ bool usesNeon() { return true; }
+
protected:
// Write a given value using vst.
void bench(size_t num_loops) {
@@ -480,11 +552,13 @@ protected:
class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
public:
- WriteVstmiaBenchmark(size_t size) : WriteBandwidthBenchmark(size) { }
+ WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
virtual ~WriteVstmiaBenchmark() {}
const char *getName() { return "vstmia"; }
+ bool usesNeon() { return true; }
+
protected:
// Write a given value using vstmia.
void bench(size_t num_loops) {
@@ -523,7 +597,7 @@ protected:
class MemsetBenchmark : public WriteBandwidthBenchmark {
public:
- MemsetBenchmark(size_t size) : WriteBandwidthBenchmark(size) { }
+ MemsetBenchmark() : WriteBandwidthBenchmark() { }
virtual ~MemsetBenchmark() {}
const char *getName() { return "memset"; }
@@ -536,4 +610,152 @@ protected:
}
};
+class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
+public:
+ ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
+ virtual ~ReadLdrdBenchmark() {}
+
+ const char *getName() { return "ldrd"; }
+
+protected:
+ // Write a given value using strd.
+ void bench(size_t num_loops) {
+ asm volatile(
+ "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
+
+ "mov r0, %0\n"
+ "mov r1, %1\n"
+ "mov r2, %2\n"
+
+ "0:\n"
+ "mov r3, r1, lsr #5\n"
+
+ "1:\n"
+ "subs r3, r3, #1\n"
+ "ldrd r4, r5, [r0]\n"
+ "ldrd r4, r5, [r0, #8]\n"
+ "ldrd r4, r5, [r0, #16]\n"
+ "ldrd r4, r5, [r0, #24]\n"
+ "add r0, r0, #32\n"
+ "bgt 1b\n"
+
+ "sub r0, r0, r1\n"
+ "subs r2, r2, #1\n"
+ "bgt 0b\n"
+
+ "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
+ :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
+ }
+};
+
+class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
+public:
+ ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
+ virtual ~ReadLdmiaBenchmark() {}
+
+ const char *getName() { return "ldmia"; }
+
+protected:
+ // Write a given value using stmia.
+ void bench(size_t num_loops) {
+ asm volatile(
+ "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
+
+ "mov r0, %0\n"
+ "mov r1, %1\n"
+ "mov r2, %2\n"
+
+ "0:\n"
+ "mov r3, r1, lsr #5\n"
+
+ "1:\n"
+ "subs r3, r3, #1\n"
+ "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
+ "bgt 1b\n"
+
+ "sub r0, r0, r1\n"
+ "subs r2, r2, #1\n"
+ "bgt 0b\n"
+
+ "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
+ :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
+ }
+};
+
+class ReadVldBenchmark : public SingleBufferBandwidthBenchmark {
+public:
+ ReadVldBenchmark() : SingleBufferBandwidthBenchmark() { }
+ virtual ~ReadVldBenchmark() {}
+
+ const char *getName() { return "vld"; }
+
+ bool usesNeon() { return true; }
+
+protected:
+ // Write a given value using vst.
+ void bench(size_t num_loops) {
+#if defined(__ARM_NEON__)
+ asm volatile(
+ "stmfd sp!, {r0,r1,r2,r3}\n"
+
+ "mov r0, %0\n"
+ "mov r1, %1\n"
+ "mov r2, %2\n"
+
+ "0:\n"
+ "mov r3, r1, lsr #5\n"
+
+ "1:\n"
+ "subs r3, r3, #1\n"
+ "vld1.8 {d0-d3}, [r0:128]!\n"
+ "bgt 1b\n"
+
+ "sub r0, r0, r1\n"
+ "subs r2, r2, #1\n"
+ "bgt 0b\n"
+
+ "ldmfd sp!, {r0,r1,r2,r3}\n"
+ :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
+#endif
+ }
+};
+
+class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
+public:
+ ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
+ virtual ~ReadVldmiaBenchmark() {}
+
+ const char *getName() { return "vldmia"; }
+
+ bool usesNeon() { return true; }
+
+protected:
+ // Write a given value using vstmia.
+ void bench(size_t num_loops) {
+#if defined(__ARM_NEON__)
+ asm volatile(
+ "stmfd sp!, {r0,r1,r2,r3}\n"
+
+ "mov r0, %0\n"
+ "mov r1, %1\n"
+ "mov r2, %2\n"
+
+ "0:\n"
+ "mov r3, r1, lsr #5\n"
+
+ "1:\n"
+ "subs r3, r3, #1\n"
+ "vldmia r0!, {d0-d3}\n"
+ "bgt 1b\n"
+
+ "sub r0, r0, r1\n"
+ "subs r2, r2, #1\n"
+ "bgt 0b\n"
+
+ "ldmfd sp!, {r0,r1,r2,r3}\n"
+ :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
+#endif
+ }
+};
+
#endif // __BANDWIDTH_H__
diff --git a/tests/memtest/memtest.cpp b/tests/memtest/memtest.cpp
index 0d76b45d..86ec9a41 100644
--- a/tests/memtest/memtest.cpp
+++ b/tests/memtest/memtest.cpp
@@ -41,14 +41,17 @@ static void usage(char* p) {
"<test> is one of the following:\n"
" copy_bandwidth [--size BYTES_TO_COPY]\n"
" write_bandwidth [--size BYTES_TO_WRITE]\n"
+ " read_bandwidth [--size BYTES_TO_COPY]\n"
" per_core_bandwidth [--size BYTES]\n"
" --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld_vst |\n"
" copy_vldmia_vstmia | memcpy | write_strd | write_stmia |\n"
- " write_vst | write_vstmia | memset\n"
+ " write_vst | write_vstmia | memset | read_ldrd |\n"
+ " read_ldmia | read_vld | read_vldmia\n"
" multithread_bandwidth [--size BYTES]\n"
" --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld_vst |\n"
" copy_vldmia_vstmia | memcpy | write_strd | write_stmia |\n"
- " write_vst | write_vstmia | memset\n"
+ " write_vst | write_vstmia | memset | read_ldrd |\n"
+ " read_ldmia | read_vld | read_vldmia\n"
" --num_threads NUM_THREADS_TO_RUN\n"
" malloc [fill]\n"
" madvise\n"
@@ -61,6 +64,7 @@ static void usage(char* p) {
int copy_bandwidth(int argc, char** argv);
int write_bandwidth(int argc, char** argv);
+int read_bandwidth(int argc, char** argv);
int per_core_bandwidth(int argc, char** argv);
int multithread_bandwidth(int argc, char** argv);
int malloc_test(int argc, char** argv);
@@ -83,7 +87,8 @@ function_t function_table[] = {
{ "crawl", crawl_test },
{ "fp", fp_test },
{ "copy_bandwidth", copy_bandwidth },
- { "write_bandwidth", write_bandwidth},
+ { "write_bandwidth", write_bandwidth },
+ { "read_bandwidth", read_bandwidth },
{ "per_core_bandwidth", per_core_bandwidth },
{ "multithread_bandwidth", multithread_bandwidth },
};