Skip to content

Commit

Permalink
Add initial AArch64 assembly code
Browse files Browse the repository at this point in the history
  • Loading branch information
ssvb committed Mar 30, 2016
1 parent f45e61b commit 229a747
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 2 deletions.
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ ifdef WINDIR
CC = gcc
endif

tinymembench: main.c util.o util.h asm-opt.h version.h asm-opt.o x86-sse2.o arm-neon.o mips-32.o
${CC} -O2 ${CFLAGS} -o tinymembench main.c util.o asm-opt.o x86-sse2.o arm-neon.o mips-32.o -lm
tinymembench: main.c util.o util.h asm-opt.h version.h asm-opt.o x86-sse2.o arm-neon.o mips-32.o aarch64-asm.o
${CC} -O2 ${CFLAGS} -o tinymembench main.c util.o asm-opt.o x86-sse2.o arm-neon.o mips-32.o aarch64-asm.o -lm

util.o: util.c util.h
${CC} -O2 ${CFLAGS} -c util.c
Expand All @@ -19,6 +19,9 @@ x86-sse2.o: x86-sse2.S
arm-neon.o: arm-neon.S
${CC} -O2 ${CFLAGS} -c arm-neon.S

aarch64-asm.o: aarch64-asm.S
${CC} -O2 ${CFLAGS} -c aarch64-asm.S

mips-32.o: mips-32.S
${CC} -O2 ${CFLAGS} -c mips-32.S

Expand Down
125 changes: 125 additions & 0 deletions aarch64-asm.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Copyright © 2016 Siarhei Siamashka <siarhei.siamashka@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

#ifdef __aarch64__

.cpu cortex-a53+fp+simd
.text
.align 2

.macro asm_function function_name
.global \function_name
.type \function_name,%function
.func \function_name
\function_name:
DST .req x0
SRC .req x1
SIZE .req x2
.endm

asm_function aligned_block_copy_ldpstp_x_aarch64
0:
ldp x3, x4, [SRC, #(0 * 16)]
ldp x5, x6, [SRC, #(1 * 16)]
ldp x7, x8, [SRC, #(2 * 16)]
ldp x9, x10, [SRC, #(3 * 16)]
add SRC, SRC, #64
stp x3, x4, [DST, #(0 * 16)]
stp x5, x6, [DST, #(1 * 16)]
stp x7, x8, [DST, #(2 * 16)]
stp x9, x10, [DST, #(3 * 16)]
add DST, DST, #64
subs SIZE, SIZE, #64
bgt 0b
ret
.endfunc

asm_function aligned_block_copy_ldpstp_q_aarch64
0:
ldp q0, q1, [SRC, #(0 * 32)]
ldp q2, q3, [SRC, #(1 * 32)]
add SRC, SRC, #64
stp q0, q1, [DST, #(0 * 32)]
stp q2, q3, [DST, #(1 * 32)]
add DST, DST, #64
subs SIZE, SIZE, #64
bgt 0b
ret
.endfunc

asm_function aligned_block_fill_stp_x_aarch64
0:
stp x3, x4, [DST, #(0 * 16)]
stp x5, x6, [DST, #(1 * 16)]
stp x7, x8, [DST, #(2 * 16)]
stp x9, x10, [DST, #(3 * 16)]
add DST, DST, #64
subs SIZE, SIZE, #64
bgt 0b
ret
.endfunc

asm_function aligned_block_fill_stp_q_aarch64
0:
stp q0, q1, [DST, #(0 * 32)]
stp q2, q3, [DST, #(1 * 32)]
add DST, DST, #64
subs SIZE, SIZE, #64
bgt 0b
ret
.endfunc

asm_function aligned_block_fill_stnp_x_aarch64
0:
stnp x3, x4, [DST, #(0 * 16)]
stnp x5, x6, [DST, #(1 * 16)]
stnp x7, x8, [DST, #(2 * 16)]
stnp x9, x10, [DST, #(3 * 16)]
add DST, DST, #64
subs SIZE, SIZE, #64
bgt 0b
ret
.endfunc

asm_function aligned_block_fill_stnp_q_aarch64
0:
stnp q0, q1, [DST, #(0 * 32)]
stnp q2, q3, [DST, #(1 * 32)]
add DST, DST, #64
subs SIZE, SIZE, #64
bgt 0b
ret
.endfunc

asm_function aligned_block_copy_ld1st1_aarch64
0:
ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [SRC]
st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [DST]
add SRC, SRC, #64
add DST, DST, #64
subs SIZE, SIZE, #64
bgt 0b
ret
.endfunc

#endif
61 changes: 61 additions & 0 deletions aarch64-asm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright © 2016 Siarhei Siamashka <siarhei.siamashka@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

#ifndef __AARCH64_ASM_H__
#define __AARCH64_ASM_H__

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

void aligned_block_copy_ldpstp_x_aarch64(int64_t * __restrict dst,
int64_t * __restrict src,
int size);
void aligned_block_copy_ldpstp_q_aarch64(int64_t * __restrict dst,
int64_t * __restrict src,
int size);
void aligned_block_copy_ld1st1_aarch64(int64_t * __restrict dst,
int64_t * __restrict src,
int size);

void aligned_block_fill_stp_x_aarch64(int64_t * __restrict dst,
int64_t * __restrict src,
int size);
void aligned_block_fill_stp_q_aarch64(int64_t * __restrict dst,
int64_t * __restrict src,
int size);

void aligned_block_fill_stnp_x_aarch64(int64_t * __restrict dst,
int64_t * __restrict src,
int size);
void aligned_block_fill_stnp_q_aarch64(int64_t * __restrict dst,
int64_t * __restrict src,
int size);

#ifdef __cplusplus
}
#endif

#endif
37 changes: 37 additions & 0 deletions asm-opt.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,43 @@ bench_info *get_asm_framebuffer_benchmarks(void)
return empty;
}

#elif defined(__aarch64__)

#include "aarch64-asm.h"

static bench_info aarch64_neon[] =
{
{ "NEON LDP/STP copy", 0, aligned_block_copy_ldpstp_q_aarch64 },
{ "NEON LD1/ST1 copy", 0, aligned_block_copy_ld1st1_aarch64 },
{ "NEON STP fill", 0, aligned_block_fill_stp_q_aarch64 },
{ "NEON STNP fill", 0, aligned_block_fill_stnp_q_aarch64 },
{ "ARM LDP/STP copy", 0, aligned_block_copy_ldpstp_x_aarch64 },
{ "ARM STP fill", 0, aligned_block_fill_stp_x_aarch64 },
{ "ARM STNP fill", 0, aligned_block_fill_stnp_x_aarch64 },
{ NULL, 0, NULL }
};

static bench_info aarch64_neon_fb[] =
{
{ "NEON LDP/STP copy (from framebuffer)", 0, aligned_block_copy_ldpstp_q_aarch64 },
{ "NEON LDP/STP 2-pass copy (from framebuffer)", 1, aligned_block_copy_ldpstp_q_aarch64 },
{ "NEON LD1/ST1 copy (from framebuffer)", 0, aligned_block_copy_ld1st1_aarch64 },
{ "NEON LD1/ST1 2-pass copy (from framebuffer)", 1, aligned_block_copy_ld1st1_aarch64 },
{ "ARM LDP/STP copy (from framebuffer)", 0, aligned_block_copy_ldpstp_x_aarch64 },
{ "ARM LDP/STP 2-pass copy (from framebuffer)", 1, aligned_block_copy_ldpstp_x_aarch64 },
{ NULL, 0, NULL }
};

bench_info *get_asm_benchmarks(void)
{
return aarch64_neon;
}

bench_info *get_asm_framebuffer_benchmarks(void)
{
return aarch64_neon_fb;
}

#elif defined(__mips__) && defined(_ABIO32)

#include "mips-32.h"
Expand Down

0 comments on commit 229a747

Please sign in to comment.