Plan 9 from Bell Labs’s /usr/web/sources/contrib/stallion/root/arm/go/src/internal/bytealg/index_arm64.s

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "go_asm.h"
#include "textflag.h"

TEXT ·Index(SB),NOSPLIT,$0-56
	MOVD	a_base+0(FP), R0
	MOVD	a_len+8(FP), R1
	MOVD	b_base+24(FP), R2
	MOVD	b_len+32(FP), R3
	MOVD	$ret+48(FP), R9
	B	indexbody<>(SB)

TEXT ·IndexString(SB),NOSPLIT,$0-40
	MOVD	a_base+0(FP), R0
	MOVD	a_len+8(FP), R1
	MOVD	b_base+16(FP), R2
	MOVD	b_len+24(FP), R3
	MOVD	$ret+32(FP), R9
	B	indexbody<>(SB)

// input:
//   R0: haystack
//   R1: length of haystack
//   R2: needle
//   R3: length of needle (2 <= len <= 32)
//   R9: address to put result
TEXT indexbody<>(SB),NOSPLIT,$0-56
	// main idea is to load 'sep' into separate register(s)
	// to avoid repeatedly re-load it again and again
	// for sebsequent substring comparisons
	SUB	R3, R1, R4
	// R4 contains the start of last substring for comparison
	ADD	R0, R4, R4
	ADD	$1, R0, R8

	CMP	$8, R3
	BHI	greater_8
	TBZ	$3, R3, len_2_7
len_8:
	// R5 contains 8-byte of sep
	MOVD	(R2), R5
loop_8:
	// R6 contains substring for comparison
	CMP	R4, R0
	BHI	not_found
	MOVD.P	1(R0), R6
	CMP	R5, R6
	BNE	loop_8
	B	found
len_2_7:
	TBZ	$2, R3, len_2_3
	TBZ	$1, R3, len_4_5
	TBZ	$0, R3, len_6
len_7:
	// R5 and R6 contain 7-byte of sep
	MOVWU	(R2), R5
	// 1-byte overlap with R5
	MOVWU	3(R2), R6
loop_7:
	CMP	R4, R0
	BHI	not_found
	MOVWU.P	1(R0), R3
	CMP	R5, R3
	BNE	loop_7
	MOVWU	2(R0), R3
	CMP	R6, R3
	BNE	loop_7
	B	found
len_6:
	// R5 and R6 contain 6-byte of sep
	MOVWU	(R2), R5
	MOVHU	4(R2), R6
loop_6:
	CMP	R4, R0
	BHI	not_found
	MOVWU.P	1(R0), R3
	CMP	R5, R3
	BNE	loop_6
	MOVHU	3(R0), R3
	CMP	R6, R3
	BNE	loop_6
	B	found
len_4_5:
	TBZ	$0, R3, len_4
len_5:
	// R5 and R7 contain 5-byte of sep
	MOVWU	(R2), R5
	MOVBU	4(R2), R7
loop_5:
	CMP	R4, R0
	BHI	not_found
	MOVWU.P	1(R0), R3
	CMP	R5, R3
	BNE	loop_5
	MOVBU	3(R0), R3
	CMP	R7, R3
	BNE	loop_5
	B	found
len_4:
	// R5 contains 4-byte of sep
	MOVWU	(R2), R5
loop_4:
	CMP	R4, R0
	BHI	not_found
	MOVWU.P	1(R0), R6
	CMP	R5, R6
	BNE	loop_4
	B	found
len_2_3:
	TBZ	$0, R3, len_2
len_3:
	// R6 and R7 contain 3-byte of sep
	MOVHU	(R2), R6
	MOVBU	2(R2), R7
loop_3:
	CMP	R4, R0
	BHI	not_found
	MOVHU.P	1(R0), R3
	CMP	R6, R3
	BNE	loop_3
	MOVBU	1(R0), R3
	CMP	R7, R3
	BNE	loop_3
	B	found
len_2:
	// R5 contains 2-byte of sep
	MOVHU	(R2), R5
loop_2:
	CMP	R4, R0
	BHI	not_found
	MOVHU.P	1(R0), R6
	CMP	R5, R6
	BNE	loop_2
found:
	SUB	R8, R0, R0
	MOVD	R0, (R9)
	RET
not_found:
	MOVD	$-1, R0
	MOVD	R0, (R9)
	RET
greater_8:
	SUB	$9, R3, R11	// len(sep) - 9, offset of R0 for last 8 bytes
	CMP	$16, R3
	BHI	greater_16
len_9_16:
	MOVD.P	8(R2), R5	// R5 contains the first 8-byte of sep
	SUB	$16, R3, R7	// len(sep) - 16, offset of R2 for last 8 bytes
	MOVD	(R2)(R7), R6	// R6 contains the last 8-byte of sep
loop_9_16:
	// search the first 8 bytes first
	CMP	R4, R0
	BHI	not_found
	MOVD.P	1(R0), R7
	CMP	R5, R7
	BNE	loop_9_16
	MOVD	(R0)(R11), R7
	CMP	R6, R7		// compare the last 8 bytes
	BNE	loop_9_16
	B	found
greater_16:
	CMP	$24, R3
	BHI	len_25_32
len_17_24:
	LDP.P	16(R2), (R5, R6)	// R5 and R6 contain the first 16-byte of sep
	SUB	$24, R3, R10		// len(sep) - 24
	MOVD	(R2)(R10), R7		// R7 contains the last 8-byte of sep
loop_17_24:
	// search the first 16 bytes first
	CMP	R4, R0
	BHI	not_found
	MOVD.P	1(R0), R10
	CMP	R5, R10
	BNE	loop_17_24
	MOVD	7(R0), R10
	CMP	R6, R10
	BNE	loop_17_24
	MOVD	(R0)(R11), R10
	CMP	R7, R10		// compare the last 8 bytes
	BNE	loop_17_24
	B	found
len_25_32:
	LDP.P	16(R2), (R5, R6)
	MOVD.P	8(R2), R7	// R5, R6 and R7 contain the first 24-byte of sep
	SUB	$32, R3, R12	// len(sep) - 32
	MOVD	(R2)(R12), R10	// R10 contains the last 8-byte of sep
loop_25_32:
	// search the first 24 bytes first
	CMP	R4, R0
	BHI	not_found
	MOVD.P	1(R0), R12
	CMP	R5, R12
	BNE	loop_25_32
	MOVD	7(R0), R12
	CMP	R6, R12
	BNE	loop_25_32
	MOVD	15(R0), R12
	CMP	R7, R12
	BNE	loop_25_32
	MOVD	(R0)(R11), R12
	CMP	R10, R12	// compare the last 8 bytes
	BNE	loop_25_32
	B	found

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to webmaster@9p.io.