11348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris/*
21348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris * Copyright (C) 2013 The Android Open Source Project
31348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris *
41348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris * Licensed under the Apache License, Version 2.0 (the "License");
51348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris * you may not use this file except in compliance with the License.
61348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris * You may obtain a copy of the License at
71348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris *
81348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris *      http://www.apache.org/licenses/LICENSE-2.0
91348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris *
101348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris * Unless required by applicable law or agreed to in writing, software
111348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris * distributed under the License is distributed on an "AS IS" BASIS,
121348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
131348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris * See the License for the specific language governing permissions and
141348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris * limitations under the License.
151348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris */
161348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
171348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris#ifndef __BANDWIDTH_H__
181348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris#define __BANDWIDTH_H__
191348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
201348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris#include "memtest.h"
211348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
221348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris// Bandwidth Class definitions.
231348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisclass BandwidthBenchmark {
241348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
251a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    BandwidthBenchmark()
261a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        : _size(0),
271348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris          _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
281348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris          _num_loops(DEFAULT_NUM_LOOPS) {}
291348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    virtual ~BandwidthBenchmark() {}
301348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
311a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    bool run() {
321a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        if (_size == 0) {
331a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            return false;
341a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        }
351a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        if (!canRun()) {
361a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            return false;
371a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        }
381a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
391348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        bench(_num_warm_loops);
401348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
411348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        nsecs_t t = system_time();
421348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        bench(_num_loops);
431348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        t = system_time() - t;
441348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
451348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
461a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
471a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        return true;
481348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
491348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
501a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    bool canRun() { return !usesNeon() || isNeonSupported(); }
511a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
521a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    virtual bool setSize(size_t size) = 0;
531a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
541348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    virtual const char *getName() = 0;
551348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
561348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    virtual bool verify() = 0;
571348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
581a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    virtual bool usesNeon() { return false; }
591a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
601a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    bool isNeonSupported() {
611a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris#if defined(__ARM_NEON__)
621a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        return true;
631a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris#else
641a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        return false;
651a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris#endif
661a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    }
671a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
681348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    // Accessors/mutators.
691348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    double mb_per_sec() { return _mb_per_sec; }
701348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    size_t num_warm_loops() { return _num_warm_loops; }
711348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    size_t num_loops() { return _num_loops; }
721348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    size_t size() { return _size; }
731348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
741348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    void set_num_warm_loops(size_t num_warm_loops) {
751348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        _num_warm_loops = num_warm_loops;
761348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
771348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
781348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
791348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    // Static constants
801348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
811348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
821348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
831348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprotected:
841348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    virtual void bench(size_t num_loops) = 0;
851348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
861348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    double _mb_per_sec;
871348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    size_t _size;
881348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    size_t _num_warm_loops;
891348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    size_t _num_loops;
901348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
911348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprivate:
921348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    // Static constants
931348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    static const double _NUM_NS_PER_SEC = 1000000000.0;
941348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    static const double _BYTES_PER_MB = 1024.0* 1024.0;
951348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
961348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
971348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisclass CopyBandwidthBenchmark : public BandwidthBenchmark {
981348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
991a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
1001a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
1011a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    bool setSize(size_t size) {
1021a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        if (_src) {
1031a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris           free(_src);
1041a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        }
1051a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        if (_dst) {
1061a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            free(_dst);
1071a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        }
1081a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
1091a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        if (size == 0) {
1101348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            _size = DEFAULT_COPY_SIZE;
1111a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        } else {
1121a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            _size = size;
1131348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        }
1141a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
1151348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        _src = reinterpret_cast<char*>(memalign(64, _size));
1161348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        if (!_src) {
1171a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            perror("Failed to allocate memory for test.");
1181a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            return false;
1191348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        }
1201348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        _dst = reinterpret_cast<char*>(memalign(64, _size));
1211348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        if (!_dst) {
1221a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            perror("Failed to allocate memory for test.");
1231a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            return false;
1241a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        }
1251a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
1261a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        return true;
1271a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    }
1281a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    virtual ~CopyBandwidthBenchmark() {
1291a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        if (_src) {
1301a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            free(_src);
1311a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            _src = NULL;
1321a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        }
1331a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        if (_dst) {
1341a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            free(_dst);
1351a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            _dst = NULL;
1361348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        }
1371348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
1381348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
1391348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    bool verify() {
1401348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        memset(_src, 0x23, _size);
1411348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        memset(_dst, 0, _size);
1421348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        bench(1);
1431348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        if (memcmp(_src, _dst, _size) != 0) {
14465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            printf("Buffers failed to compare after one loop.\n");
1451348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            return false;
1461348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        }
1471348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
1481348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        memset(_src, 0x23, _size);
1491348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        memset(_dst, 0, _size);
1501348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        _num_loops = 2;
1511348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        bench(2);
1521348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        if (memcmp(_src, _dst, _size) != 0) {
15365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            printf("Buffers failed to compare after two loops.\n");
1541348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            return false;
1551348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        }
1561348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
1571348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        return true;
1581348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
1591348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
1601348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprotected:
1611348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    char *_src;
1621348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    char *_dst;
1631348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
1641348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    static const unsigned int DEFAULT_COPY_SIZE = 8000;
1651348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
1661348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
1671348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisclass CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
1681348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
1691a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
1701348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    virtual ~CopyLdrdStrdBenchmark() {}
1711348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
1721348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    const char *getName() { return "ldrd/strd"; }
1731348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
1741348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprotected:
1751348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    // Copy using ldrd/strd instructions.
1761348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    void bench(size_t num_loops) {
1771348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        asm volatile(
1781348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
1791348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
1801348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r0, %0\n"
1811348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r1, %1\n"
1821348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r2, %2\n"
1831348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r3, %3\n"
1841348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
1851348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "0:\n"
1861348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r4, r2, lsr #6\n"
1871348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
1881348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "1:\n"
1891348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldrd r6, r7, [r0]\n"
1901348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "strd r6, r7, [r1]\n"
1911348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldrd r6, r7, [r0, #8]\n"
1921348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "strd r6, r7, [r1, #8]\n"
1931348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldrd r6, r7, [r0, #16]\n"
1941348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "strd r6, r7, [r1, #16]\n"
1951348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldrd r6, r7, [r0, #24]\n"
1961348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "strd r6, r7, [r1, #24]\n"
1971348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldrd r6, r7, [r0, #32]\n"
1981348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "strd r6, r7, [r1, #32]\n"
1991348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldrd r6, r7, [r0, #40]\n"
2001348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "strd r6, r7, [r1, #40]\n"
2011348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldrd r6, r7, [r0, #48]\n"
2021348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "strd r6, r7, [r1, #48]\n"
2031348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldrd r6, r7, [r0, #56]\n"
2041348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "strd r6, r7, [r1, #56]\n"
2051348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2061348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "add  r0, r0, #64\n"
2071348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "add  r1, r1, #64\n"
2081348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r4, r4, #1\n"
2091348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 1b\n"
2101348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2111348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "sub r0, r0, r2\n"
2121348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "sub r1, r1, r2\n"
2131348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r3, r3, #1\n"
2141348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 0b\n"
2151348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2161348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
2171348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
2181348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
2191348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
2201348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2211348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisclass CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
2221348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
2231a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
2241348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    virtual ~CopyLdmiaStmiaBenchmark() {}
2251348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2261348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    const char *getName() { return "ldmia/stmia"; }
2271348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2281348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprotected:
2291348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    // Copy using ldmia/stmia instructions.
2301348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    void bench(size_t num_loops) {
2311348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        asm volatile(
2321348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
2331348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2341348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r0, %0\n"
2351348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r1, %1\n"
2361348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r2, %2\n"
2371348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r3, %3\n"
2381348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2391348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "0:\n"
2401348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r4, r2, lsr #6\n"
2411348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2421348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "1:\n"
2431348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
2441348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
2451348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r4, r4, #1\n"
2461348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
2471348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
2481348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 1b\n"
2491348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2501348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "sub r0, r0, r2\n"
2511348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "sub r1, r1, r2\n"
2521348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r3, r3, #1\n"
2531348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 0b\n"
2541348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2551348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
2561348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
2571348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
2581348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
2591348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
26065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferrisclass CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
2611348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
26265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
26365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    virtual ~CopyVld1Vst1Benchmark() {}
2641348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
26565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    const char *getName() { return "vld1/vst1"; }
2661348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
267a67e53c2345eac337d24e4decaf4733c5314aff8Christopher Ferris    bool usesNeon() { return true; }
268a67e53c2345eac337d24e4decaf4733c5314aff8Christopher Ferris
2691348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprotected:
27065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    // Copy using vld1/vst1 instructions.
2711348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    void bench(size_t num_loops) {
272a67e53c2345eac337d24e4decaf4733c5314aff8Christopher Ferris#if defined(__ARM_NEON__)
2731348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        asm volatile(
2741348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "stmfd sp!, {r0,r1,r2,r3,r4}\n"
2751348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2761348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r0, %0\n"
2771348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r1, %1\n"
2781348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r2, %2\n"
2791348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r3, %3\n"
2801348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2811348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "0:\n"
2821348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r4, r2, lsr #6\n"
2831348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2841348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "1:\n"
2851348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vld1.8 {d0-d3}, [r0]!\n"
2861348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vld1.8 {d4-d7}, [r0]!\n"
2871348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r4, r4, #1\n"
2881348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vst1.8 {d0-d3}, [r1:128]!\n"
2891348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vst1.8 {d4-d7}, [r1:128]!\n"
2901348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 1b\n"
2911348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2921348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "sub r0, r0, r2\n"
2931348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "sub r1, r1, r2\n"
2941348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r3, r3, #1\n"
2951348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 0b\n"
2961348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
2971348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
2981348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
299a67e53c2345eac337d24e4decaf4733c5314aff8Christopher Ferris#endif
3001348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
3011348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
3021348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
30365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferrisclass CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
30465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferrispublic:
30565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
30665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    virtual ~CopyVldrVstrBenchmark() {}
30765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
30865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    const char *getName() { return "vldr/vstr"; }
30965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
31065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    bool usesNeon() { return true; }
31165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
31265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferrisprotected:
31365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    // Copy using vldr/vstr instructions.
31465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    void bench(size_t num_loops) {
31565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris#if defined(__ARM_NEON__)
31665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris        asm volatile(
31765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "stmfd sp!, {r0,r1,r2,r3,r4}\n"
31865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
31965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r0, %0\n"
32065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r1, %1\n"
32165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r2, %2\n"
32265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r3, %3\n"
32365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
32465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "0:\n"
32565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r4, r2, lsr #6\n"
32665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
32765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "1:\n"
32865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vldr d0, [r0, #0]\n"
32965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "subs r4, r4, #1\n"
33065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vldr d1, [r0, #8]\n"
33165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vstr d0, [r1, #0]\n"
33265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vldr d0, [r0, #16]\n"
33365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vstr d1, [r1, #8]\n"
33465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vldr d1, [r0, #24]\n"
33565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vstr d0, [r1, #16]\n"
33665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vldr d0, [r0, #32]\n"
33765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vstr d1, [r1, #24]\n"
33865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vldr d1, [r0, #40]\n"
33965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vstr d0, [r1, #32]\n"
34065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vldr d0, [r0, #48]\n"
34165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vstr d1, [r1, #40]\n"
34265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vldr d1, [r0, #56]\n"
34365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vstr d0, [r1, #48]\n"
34465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "add r0, r0, #64\n"
34565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vstr d1, [r1, #56]\n"
34665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "add r1, r1, #64\n"
34765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "bgt 1b\n"
34865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
34965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "sub r0, r0, r2\n"
35065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "sub r1, r1, r2\n"
35165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "subs r3, r3, #1\n"
35265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "bgt 0b\n"
35365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
35465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
35565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris        :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
35665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris#endif
35765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    }
35865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris};
35965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
3601348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisclass CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
3611348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
3621a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
3631348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    virtual ~CopyVldmiaVstmiaBenchmark() {}
3641348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
3651348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    const char *getName() { return "vldmia/vstmia"; }
3661348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
367a67e53c2345eac337d24e4decaf4733c5314aff8Christopher Ferris    bool usesNeon() { return true; }
368a67e53c2345eac337d24e4decaf4733c5314aff8Christopher Ferris
3691348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprotected:
37065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    // Copy using vldmia/vstmia instructions.
3711348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    void bench(size_t num_loops) {
372a67e53c2345eac337d24e4decaf4733c5314aff8Christopher Ferris#if defined(__ARM_NEON__)
3731348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        asm volatile(
3741348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "stmfd sp!, {r0,r1,r2,r3,r4}\n"
3751348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
3761348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r0, %0\n"
3771348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r1, %1\n"
3781348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r2, %2\n"
3791348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r3, %3\n"
3801348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
3811348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "0:\n"
3821348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r4, r2, lsr #6\n"
3831348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
3841348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "1:\n"
3851348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vldmia r0!, {d0-d7}\n"
3861348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r4, r4, #1\n"
3871348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vstmia r1!, {d0-d7}\n"
3881348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 1b\n"
3891348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
3901348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "sub r0, r0, r2\n"
3911348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "sub r1, r1, r2\n"
3921348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r3, r3, #1\n"
3931348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 0b\n"
3941348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
3951348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
3961348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
397a67e53c2345eac337d24e4decaf4733c5314aff8Christopher Ferris#endif
3981348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
3991348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
4001348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
4011348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisclass MemcpyBenchmark : public CopyBandwidthBenchmark {
4021348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
4031a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    MemcpyBenchmark() : CopyBandwidthBenchmark() { }
4041348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    virtual ~MemcpyBenchmark() {}
4051348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
4061348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    const char *getName() { return "memcpy"; }
4071348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
4081348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprotected:
4091348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    void bench(size_t num_loops) {
4101348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        for (size_t i = 0; i < num_loops; i++) {
4111348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            memcpy(_dst, _src, _size);
4121348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        }
4131348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
4141348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
4151348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
4161a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrisclass SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
4171348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
4181a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
4191a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    virtual ~SingleBufferBandwidthBenchmark() {
4201a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        if (_buffer) {
4211a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            free(_buffer);
4221a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            _buffer = NULL;
4231a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        }
4241a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    }
4251a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
4261a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    bool setSize(size_t size) {
4271a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        if (_buffer) {
4281a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            free(_buffer);
4291a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            _buffer = NULL;
4301a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        }
4311a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
4321348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        if (_size == 0) {
4331a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            _size = DEFAULT_SINGLE_BUFFER_SIZE;
4341a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        } else {
4351a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            _size = size;
4361348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        }
4371348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
4381348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        _buffer = reinterpret_cast<char*>(memalign(64, _size));
4391348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        if (!_buffer) {
4401a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            perror("Failed to allocate memory for test.");
4411a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            return false;
4421348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        }
4431348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        memset(_buffer, 0, _size);
4441a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
4451a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        return true;
4461348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
4471a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
4481a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    bool verify() { return true; }
4491a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
4501a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrisprotected:
4511a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    char *_buffer;
4521a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
4531a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
4541a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris};
4551a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
4561a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrisclass WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
4571a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrispublic:
4581a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
4591a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    virtual ~WriteBandwidthBenchmark() { }
4601348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
4611348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    bool verify() {
4621348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        memset(_buffer, 0, _size);
4631348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        bench(1);
4641348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        for (size_t i = 0; i < _size; i++) {
4651348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            if (_buffer[i] != 1) {
46665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris                printf("Buffer failed to compare after one loop.\n");
4671348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris                return false;
4681348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            }
4691348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        }
4701348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
4711348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        memset(_buffer, 0, _size);
4721348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        bench(2);
4731348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        for (size_t i = 0; i < _size; i++) {
4741348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            if (_buffer[i] != 2) {
47565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris                printf("Buffer failed to compare after two loops.\n");
4761348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris                return false;
4771348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            }
4781348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        }
4791348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
4801348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        return true;
4811348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
4821348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
4831348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
4841348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisclass WriteStrdBenchmark : public WriteBandwidthBenchmark {
4851348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
4861a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
4871348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    virtual ~WriteStrdBenchmark() {}
4881348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
4891348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    const char *getName() { return "strd"; }
4901348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
4911348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprotected:
4921348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    // Write a given value using strd.
4931348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    void bench(size_t num_loops) {
4941348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        asm volatile(
4951348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
4961348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
4971348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r0, %0\n"
4981348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r1, %1\n"
4991348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r2, %2\n"
5001348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5011348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r4, #0\n"
5021348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r5, #0\n"
5031348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5041348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "0:\n"
5051348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r3, r1, lsr #5\n"
5061348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5071348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "add r4, r4, #0x01010101\n"
5081348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r5, r4\n"
5091348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5101348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "1:\n"
5111348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r3, r3, #1\n"
5121348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "strd r4, r5, [r0]\n"
5131348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "strd r4, r5, [r0, #8]\n"
5141348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "strd r4, r5, [r0, #16]\n"
5151348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "strd r4, r5, [r0, #24]\n"
5161348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "add  r0, r0, #32\n"
5171348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 1b\n"
5181348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5191348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "sub r0, r0, r1\n"
5201348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r2, r2, #1\n"
5211348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 0b\n"
5221348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5231348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
5241348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris          :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
5251348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
5261348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
5271348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5281348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisclass WriteStmiaBenchmark : public WriteBandwidthBenchmark {
5291348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
5301a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
5311348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    virtual ~WriteStmiaBenchmark() {}
5321348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5331348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    const char *getName() { return "stmia"; }
5341348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5351348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprotected:
5361348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris      // Write a given value using stmia.
5371348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris      void bench(size_t num_loops) {
5381348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris          asm volatile(
5391348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
5401348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5411348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "mov r0, %0\n"
5421348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "mov r1, %1\n"
5431348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "mov r2, %2\n"
5441348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5451348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "mov r4, #0\n"
5461348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5471348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "0:\n"
5481348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "mov r3, r1, lsr #5\n"
5491348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5501348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "add r4, r4, #0x01010101\n"
5511348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "mov r5, r4\n"
5521348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "mov r6, r4\n"
5531348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "mov r7, r4\n"
5541348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "mov r8, r4\n"
5551348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "mov r9, r4\n"
5561348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "mov r10, r4\n"
5571348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "mov r11, r4\n"
5581348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5591348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "1:\n"
5601348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "subs r3, r3, #1\n"
5611348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
5621348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "bgt 1b\n"
5631348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5641348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "sub r0, r0, r1\n"
5651348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "subs r2, r2, #1\n"
5661348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "bgt 0b\n"
5671348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5681348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris              "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
5691348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
5701348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
5711348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
5721348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
57365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferrisclass WriteVst1Benchmark : public WriteBandwidthBenchmark {
5741348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
57565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
57665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    virtual ~WriteVst1Benchmark() {}
5771348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
57865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    const char *getName() { return "vst1"; }
5791348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5801a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    bool usesNeon() { return true; }
5811a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
5821348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprotected:
5831348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    // Write a given value using vst.
5841348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    void bench(size_t num_loops) {
585e5282ae49814c9ed0b9bdec920a20e6978349327Christopher Ferris#if defined(__ARM_NEON__)
5861348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        asm volatile(
5871348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "stmfd sp!, {r0,r1,r2,r3,r4}\n"
5881348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5891348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r0, %0\n"
5901348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r1, %1\n"
5911348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r2, %2\n"
5921348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r4, #0\n"
5931348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5941348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "0:\n"
5951348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r3, r1, lsr #5\n"
5961348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
5971348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "add r4, r4, #1\n"
5981348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vdup.8 d0, r4\n"
5991348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vmov d1, d0\n"
6001348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vmov d2, d0\n"
6011348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vmov d3, d0\n"
6021348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
6031348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "1:\n"
6041348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r3, r3, #1\n"
6051348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vst1.8 {d0-d3}, [r0:128]!\n"
6061348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 1b\n"
6071348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
6081348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "sub r0, r0, r1\n"
6091348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r2, r2, #1\n"
6101348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 0b\n"
6111348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
6121348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
6131348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
614e5282ae49814c9ed0b9bdec920a20e6978349327Christopher Ferris#endif
6151348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
6161348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
6171348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
61865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferrisclass WriteVstrBenchmark : public WriteBandwidthBenchmark {
61965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferrispublic:
62065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
62165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    virtual ~WriteVstrBenchmark() {}
62265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
62365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    const char *getName() { return "vstr"; }
62465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
62565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    bool usesNeon() { return true; }
62665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
62765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferrisprotected:
62865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    // Write a given value using vst.
62965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    void bench(size_t num_loops) {
63065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris#if defined(__ARM_NEON__)
63165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris        asm volatile(
63265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "stmfd sp!, {r0,r1,r2,r3,r4}\n"
63365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
63465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r0, %0\n"
63565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r1, %1\n"
63665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r2, %2\n"
63765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r4, #0\n"
63865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
63965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "0:\n"
64065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r3, r1, lsr #5\n"
64165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
64265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "add r4, r4, #1\n"
64365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vdup.8 d0, r4\n"
64465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vmov d1, d0\n"
64565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vmov d2, d0\n"
64665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vmov d3, d0\n"
64765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
64865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "1:\n"
64965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vstr d0, [r0, #0]\n"
65065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "subs r3, r3, #1\n"
65165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vstr d1, [r0, #8]\n"
65265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vstr d0, [r0, #16]\n"
65365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vstr d1, [r0, #24]\n"
65465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "add r0, r0, #32\n"
65565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "bgt 1b\n"
65665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
65765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "sub r0, r0, r1\n"
65865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "subs r2, r2, #1\n"
65965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "bgt 0b\n"
66065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
66165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
66265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris        :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
66365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris#endif
66465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    }
66565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris};
66665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
6671348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisclass WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
6681348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
6691a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
6701348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    virtual ~WriteVstmiaBenchmark() {}
6711348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
6721348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    const char *getName() { return "vstmia"; }
6731348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
6741a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    bool usesNeon() { return true; }
6751a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
6761348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprotected:
6771348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    // Write a given value using vstmia.
6781348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    void bench(size_t num_loops) {
679e5282ae49814c9ed0b9bdec920a20e6978349327Christopher Ferris#if defined(__ARM_NEON__)
6801348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        asm volatile(
6811348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "stmfd sp!, {r0,r1,r2,r3,r4}\n"
6821348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
6831348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r0, %0\n"
6841348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r1, %1\n"
6851348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r2, %2\n"
6861348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r4, #0\n"
6871348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
6881348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "0:\n"
6891348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "mov r3, r1, lsr #5\n"
6901348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
6911348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "add r4, r4, #1\n"
6921348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vdup.8 d0, r4\n"
6931348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vmov d1, d0\n"
6941348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vmov d2, d0\n"
6951348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vmov d3, d0\n"
6961348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
6971348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "1:\n"
6981348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r3, r3, #1\n"
6991348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "vstmia r0!, {d0-d3}\n"
7001348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 1b\n"
7011348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
7021348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "sub r0, r0, r1\n"
7031348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "subs r2, r2, #1\n"
7041348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "bgt 0b\n"
7051348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
7061348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
7071348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
708e5282ae49814c9ed0b9bdec920a20e6978349327Christopher Ferris#endif
7091348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
7101348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
7111348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
7121348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisclass MemsetBenchmark : public WriteBandwidthBenchmark {
7131348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrispublic:
7141a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    MemsetBenchmark() : WriteBandwidthBenchmark() { }
7151348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    virtual ~MemsetBenchmark() {}
7161348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
7171348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    const char *getName() { return "memset"; }
7181348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
7191348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferrisprotected:
7201348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    void bench(size_t num_loops) {
7211348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        for (size_t i = 0; i < num_loops; i++) {
7221348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris            memset(_buffer, (i % 255) + 1, _size);
7231348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris        }
7241348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris    }
7251348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris};
7261348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris
7271a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrisclass ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
7281a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrispublic:
7291a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
7301a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    virtual ~ReadLdrdBenchmark() {}
7311a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7321a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    const char *getName() { return "ldrd"; }
7331a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7341a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrisprotected:
7351a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    // Write a given value using strd.
7361a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    void bench(size_t num_loops) {
7371a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        asm volatile(
7381a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
7391a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7401a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "mov r0, %0\n"
7411a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "mov r1, %1\n"
7421a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "mov r2, %2\n"
7431a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7441a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "0:\n"
7451a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "mov r3, r1, lsr #5\n"
7461a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7471a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "1:\n"
7481a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "subs r3, r3, #1\n"
7491a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "ldrd r4, r5, [r0]\n"
7501a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "ldrd r4, r5, [r0, #8]\n"
7511a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "ldrd r4, r5, [r0, #16]\n"
7521a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "ldrd r4, r5, [r0, #24]\n"
7531a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "add  r0, r0, #32\n"
7541a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "bgt 1b\n"
7551a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7561a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "sub r0, r0, r1\n"
7571a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "subs r2, r2, #1\n"
7581a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "bgt 0b\n"
7591a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7601a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
7611a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris          :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
7621a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    }
7631a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris};
7641a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7651a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrisclass ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
7661a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrispublic:
7671a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
7681a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    virtual ~ReadLdmiaBenchmark() {}
7691a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7701a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    const char *getName() { return "ldmia"; }
7711a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7721a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrisprotected:
7731a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris      // Write a given value using stmia.
7741a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris      void bench(size_t num_loops) {
7751a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris          asm volatile(
7761a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
7771a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7781a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "mov r0, %0\n"
7791a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "mov r1, %1\n"
7801a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "mov r2, %2\n"
7811a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7821a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "0:\n"
7831a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "mov r3, r1, lsr #5\n"
7841a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7851a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "1:\n"
7861a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "subs r3, r3, #1\n"
7871a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
7881a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "bgt 1b\n"
7891a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7901a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "sub r0, r0, r1\n"
7911a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "subs r2, r2, #1\n"
7921a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "bgt 0b\n"
7931a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
7941a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris              "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
7951a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
7961a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    }
7971a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris};
7981a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
79965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferrisclass ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
8001a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrispublic:
80165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
80265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    virtual ~ReadVld1Benchmark() {}
8031a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
80465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    const char *getName() { return "vld1"; }
8051a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
8061a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    bool usesNeon() { return true; }
8071a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
8081a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrisprotected:
8091a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    // Write a given value using vst.
8101a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    void bench(size_t num_loops) {
8111a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris#if defined(__ARM_NEON__)
8121a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        asm volatile(
8131a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "stmfd sp!, {r0,r1,r2,r3}\n"
8141a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
8151a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "mov r0, %0\n"
8161a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "mov r1, %1\n"
8171a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "mov r2, %2\n"
8181a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
8191a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "0:\n"
8201a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "mov r3, r1, lsr #5\n"
8211a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
8221a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "1:\n"
8231a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "subs r3, r3, #1\n"
8241a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "vld1.8 {d0-d3}, [r0:128]!\n"
8251a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "bgt 1b\n"
8261a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
8271a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "sub r0, r0, r1\n"
8281a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "subs r2, r2, #1\n"
8291a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "bgt 0b\n"
8301a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
8311a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "ldmfd sp!, {r0,r1,r2,r3}\n"
8321a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
8331a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris#endif
8341a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    }
8351a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris};
8361a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
83765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferrisclass ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
83865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferrispublic:
83965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
84065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    virtual ~ReadVldrBenchmark() {}
84165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
84265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    const char *getName() { return "vldr"; }
84365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
84465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    bool usesNeon() { return true; }
84565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
84665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferrisprotected:
84765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    // Write a given value using vst.
84865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    void bench(size_t num_loops) {
84965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris#if defined(__ARM_NEON__)
85065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris        asm volatile(
85165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "stmfd sp!, {r0,r1,r2,r3}\n"
85265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
85365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r0, %0\n"
85465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r1, %1\n"
85565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r2, %2\n"
85665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
85765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "0:\n"
85865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "mov r3, r1, lsr #5\n"
85965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
86065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "1:\n"
86165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vldr d0, [r0, #0]\n"
86265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "subs r3, r3, #1\n"
86365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vldr d1, [r0, #8]\n"
86465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vldr d0, [r0, #16]\n"
86565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "vldr d1, [r0, #24]\n"
86665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "add r0, r0, #32\n"
86765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "bgt 1b\n"
86865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
86965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "sub r0, r0, r1\n"
87065d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "subs r2, r2, #1\n"
87165d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "bgt 0b\n"
87265d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
87365d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris            "ldmfd sp!, {r0,r1,r2,r3}\n"
87465d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris        :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
87565d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris#endif
87665d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris    }
87765d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris};
87865d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
87965d2c7894a93f66ea41d55f843285e7e7bdbf488Christopher Ferris
8801a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrisclass ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
8811a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrispublic:
8821a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
8831a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    virtual ~ReadVldmiaBenchmark() {}
8841a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
8851a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    const char *getName() { return "vldmia"; }
8861a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
8871a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    bool usesNeon() { return true; }
8881a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
8891a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferrisprotected:
8901a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    // Write a given value using vstmia.
8911a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    void bench(size_t num_loops) {
8921a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris#if defined(__ARM_NEON__)
8931a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        asm volatile(
8941a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "stmfd sp!, {r0,r1,r2,r3}\n"
8951a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
8961a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "mov r0, %0\n"
8971a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "mov r1, %1\n"
8981a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "mov r2, %2\n"
8991a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
9001a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "0:\n"
9011a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "mov r3, r1, lsr #5\n"
9021a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
9031a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "1:\n"
9041a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "subs r3, r3, #1\n"
9051a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "vldmia r0!, {d0-d3}\n"
9061a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "bgt 1b\n"
9071a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
9081a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "sub r0, r0, r1\n"
9091a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "subs r2, r2, #1\n"
9101a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "bgt 0b\n"
9111a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
9121a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris            "ldmfd sp!, {r0,r1,r2,r3}\n"
9131a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris        :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
9141a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris#endif
9151a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris    }
9161a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris};
9171a3794a84074d7f22b8ddaba840aedd758a14cddChristopher Ferris
9181348ce27ee9bb8e50a3294879c1523fa4b4d8f8bChristopher Ferris#endif  // __BANDWIDTH_H__
919