Text file src/internal/bytealg/index_arm64.s

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Index(SB),NOSPLIT,$0-56
     9  	MOVD	a_base+0(FP), R0
    10  	MOVD	a_len+8(FP), R1
    11  	MOVD	b_base+24(FP), R2
    12  	MOVD	b_len+32(FP), R3
    13  	MOVD	$ret+48(FP), R9
    14  	B	indexbody<>(SB)
    15  
    16  TEXT ·IndexString(SB),NOSPLIT,$0-40
    17  	MOVD	a_base+0(FP), R0
    18  	MOVD	a_len+8(FP), R1
    19  	MOVD	b_base+16(FP), R2
    20  	MOVD	b_len+24(FP), R3
    21  	MOVD	$ret+32(FP), R9
    22  	B	indexbody<>(SB)
    23  
    24  // input:
    25  //   R0: haystack
    26  //   R1: length of haystack
    27  //   R2: needle
    28  //   R3: length of needle (2 <= len <= 32)
    29  //   R9: address to put result
    30  TEXT indexbody<>(SB),NOSPLIT,$0-56
    31  	// main idea is to load 'sep' into separate register(s)
    32  	// to avoid repeatedly re-load it again and again
    33  	// for sebsequent substring comparisons
    34  	SUB	R3, R1, R4
    35  	// R4 contains the start of last substring for comparison
    36  	ADD	R0, R4, R4
    37  	ADD	$1, R0, R8
    38  
    39  	CMP	$8, R3
    40  	BHI	greater_8
    41  	TBZ	$3, R3, len_2_7
    42  len_8:
    43  	// R5 contains 8-byte of sep
    44  	MOVD	(R2), R5
    45  loop_8:
    46  	// R6 contains substring for comparison
    47  	CMP	R4, R0
    48  	BHI	not_found
    49  	MOVD.P	1(R0), R6
    50  	CMP	R5, R6
    51  	BNE	loop_8
    52  	B	found
    53  len_2_7:
    54  	TBZ	$2, R3, len_2_3
    55  	TBZ	$1, R3, len_4_5
    56  	TBZ	$0, R3, len_6
    57  len_7:
    58  	// R5 and R6 contain 7-byte of sep
    59  	MOVWU	(R2), R5
    60  	// 1-byte overlap with R5
    61  	MOVWU	3(R2), R6
    62  loop_7:
    63  	CMP	R4, R0
    64  	BHI	not_found
    65  	MOVWU.P	1(R0), R3
    66  	CMP	R5, R3
    67  	BNE	loop_7
    68  	MOVWU	2(R0), R3
    69  	CMP	R6, R3
    70  	BNE	loop_7
    71  	B	found
    72  len_6:
    73  	// R5 and R6 contain 6-byte of sep
    74  	MOVWU	(R2), R5
    75  	MOVHU	4(R2), R6
    76  loop_6:
    77  	CMP	R4, R0
    78  	BHI	not_found
    79  	MOVWU.P	1(R0), R3
    80  	CMP	R5, R3
    81  	BNE	loop_6
    82  	MOVHU	3(R0), R3
    83  	CMP	R6, R3
    84  	BNE	loop_6
    85  	B	found
    86  len_4_5:
    87  	TBZ	$0, R3, len_4
    88  len_5:
    89  	// R5 and R7 contain 5-byte of sep
    90  	MOVWU	(R2), R5
    91  	MOVBU	4(R2), R7
    92  loop_5:
    93  	CMP	R4, R0
    94  	BHI	not_found
    95  	MOVWU.P	1(R0), R3
    96  	CMP	R5, R3
    97  	BNE	loop_5
    98  	MOVBU	3(R0), R3
    99  	CMP	R7, R3
   100  	BNE	loop_5
   101  	B	found
   102  len_4:
   103  	// R5 contains 4-byte of sep
   104  	MOVWU	(R2), R5
   105  loop_4:
   106  	CMP	R4, R0
   107  	BHI	not_found
   108  	MOVWU.P	1(R0), R6
   109  	CMP	R5, R6
   110  	BNE	loop_4
   111  	B	found
   112  len_2_3:
   113  	TBZ	$0, R3, len_2
   114  len_3:
   115  	// R6 and R7 contain 3-byte of sep
   116  	MOVHU	(R2), R6
   117  	MOVBU	2(R2), R7
   118  loop_3:
   119  	CMP	R4, R0
   120  	BHI	not_found
   121  	MOVHU.P	1(R0), R3
   122  	CMP	R6, R3
   123  	BNE	loop_3
   124  	MOVBU	1(R0), R3
   125  	CMP	R7, R3
   126  	BNE	loop_3
   127  	B	found
   128  len_2:
   129  	// R5 contains 2-byte of sep
   130  	MOVHU	(R2), R5
   131  loop_2:
   132  	CMP	R4, R0
   133  	BHI	not_found
   134  	MOVHU.P	1(R0), R6
   135  	CMP	R5, R6
   136  	BNE	loop_2
   137  found:
   138  	SUB	R8, R0, R0
   139  	MOVD	R0, (R9)
   140  	RET
   141  not_found:
   142  	MOVD	$-1, R0
   143  	MOVD	R0, (R9)
   144  	RET
   145  greater_8:
   146  	SUB	$9, R3, R11	// len(sep) - 9, offset of R0 for last 8 bytes
   147  	CMP	$16, R3
   148  	BHI	greater_16
   149  len_9_16:
   150  	MOVD.P	8(R2), R5	// R5 contains the first 8-byte of sep
   151  	SUB	$16, R3, R7	// len(sep) - 16, offset of R2 for last 8 bytes
   152  	MOVD	(R2)(R7), R6	// R6 contains the last 8-byte of sep
   153  loop_9_16:
   154  	// search the first 8 bytes first
   155  	CMP	R4, R0
   156  	BHI	not_found
   157  	MOVD.P	1(R0), R7
   158  	CMP	R5, R7
   159  	BNE	loop_9_16
   160  	MOVD	(R0)(R11), R7
   161  	CMP	R6, R7		// compare the last 8 bytes
   162  	BNE	loop_9_16
   163  	B	found
   164  greater_16:
   165  	CMP	$24, R3
   166  	BHI	len_25_32
   167  len_17_24:
   168  	LDP.P	16(R2), (R5, R6)	// R5 and R6 contain the first 16-byte of sep
   169  	SUB	$24, R3, R10		// len(sep) - 24
   170  	MOVD	(R2)(R10), R7		// R7 contains the last 8-byte of sep
   171  loop_17_24:
   172  	// search the first 16 bytes first
   173  	CMP	R4, R0
   174  	BHI	not_found
   175  	MOVD.P	1(R0), R10
   176  	CMP	R5, R10
   177  	BNE	loop_17_24
   178  	MOVD	7(R0), R10
   179  	CMP	R6, R10
   180  	BNE	loop_17_24
   181  	MOVD	(R0)(R11), R10
   182  	CMP	R7, R10		// compare the last 8 bytes
   183  	BNE	loop_17_24
   184  	B	found
   185  len_25_32:
   186  	LDP.P	16(R2), (R5, R6)
   187  	MOVD.P	8(R2), R7	// R5, R6 and R7 contain the first 24-byte of sep
   188  	SUB	$32, R3, R12	// len(sep) - 32
   189  	MOVD	(R2)(R12), R10	// R10 contains the last 8-byte of sep
   190  loop_25_32:
   191  	// search the first 24 bytes first
   192  	CMP	R4, R0
   193  	BHI	not_found
   194  	MOVD.P	1(R0), R12
   195  	CMP	R5, R12
   196  	BNE	loop_25_32
   197  	MOVD	7(R0), R12
   198  	CMP	R6, R12
   199  	BNE	loop_25_32
   200  	MOVD	15(R0), R12
   201  	CMP	R7, R12
   202  	BNE	loop_25_32
   203  	MOVD	(R0)(R11), R12
   204  	CMP	R10, R12	// compare the last 8 bytes
   205  	BNE	loop_25_32
   206  	B	found
   207  

View as plain text