memmove_arm.s

     1  // Inferno's libkern/memmove-arm.s
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-arm.s
     3  //
     4  //         Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
     5  //         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6  //         Portions Copyright 2009 The Go Authors. All rights reserved.
     7  //
     8  // Permission is hereby granted, free of charge, to any person obtaining a copy
     9  // of this software and associated documentation files (the "Software"), to deal
    10  // in the Software without restriction, including without limitation the rights
    11  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12  // copies of the Software, and to permit persons to whom the Software is
    13  // furnished to do so, subject to the following conditions:
    14  //
    15  // The above copyright notice and this permission notice shall be included in
    16  // all copies or substantial portions of the Software.
    17  //
    18  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24  // THE SOFTWARE.
    25  
    26  #include "textflag.h"
    27  
    28  // TE or TS are spilled to the stack during bulk register moves.
    29  #define TS	R0
    30  #define TE	R8
    31  
    32  // Warning: the linker will use R11 to synthesize certain instructions. Please
    33  // take care and double check with objdump.
    34  #define FROM	R11
    35  #define N	R12
    36  #define TMP	R12				/* N and TMP don't overlap */
    37  #define TMP1	R5
    38  
    39  #define RSHIFT	R5
    40  #define LSHIFT	R6
    41  #define OFFSET	R7
    42  
    43  #define BR0	R0					/* shared with TS */
    44  #define BW0	R1
    45  #define BR1	R1
    46  #define BW1	R2
    47  #define BR2	R2
    48  #define BW2	R3
    49  #define BR3	R3
    50  #define BW3	R4
    51  
    52  #define FW0	R1
    53  #define FR0	R2
    54  #define FW1	R2
    55  #define FR1	R3
    56  #define FW2	R3
    57  #define FR2	R4
    58  #define FW3	R4
    59  #define FR3	R8					/* shared with TE */
    60  
    61  // See memmove Go doc for important implementation constraints.
    62  
    63  // func memmove(to, from unsafe.Pointer, n uintptr)
    64  TEXT runtime·memmove(SB), NOSPLIT, $4-12
    65  _memmove:
    66  	MOVW	to+0(FP), TS
    67  	MOVW	from+4(FP), FROM
    68  	MOVW	n+8(FP), N
    69  
    70  	ADD	N, TS, TE	/* to end pointer */
    71  
    72  	CMP	FROM, TS
    73  	BLS	_forward
    74  
    75  _back:
    76  	ADD	N, FROM		/* from end pointer */
    77  	CMP	$4, N		/* need at least 4 bytes to copy */
    78  	BLT	_b1tail
    79  
    80  _b4align:				/* align destination on 4 */
    81  	AND.S	$3, TE, TMP
    82  	BEQ	_b4aligned
    83  
    84  	MOVBU.W	-1(FROM), TMP	/* pre-indexed */
    85  	MOVBU.W	TMP, -1(TE)	/* pre-indexed */
    86  	B	_b4align
    87  
    88  _b4aligned:				/* is source now aligned? */
    89  	AND.S	$3, FROM, TMP
    90  	BNE	_bunaligned
    91  
    92  	ADD	$31, TS, TMP	/* do 32-byte chunks if possible */
    93  	MOVW	TS, savedts-4(SP)
    94  _b32loop:
    95  	CMP	TMP, TE
    96  	BLS	_b4tail
    97  
    98  	MOVM.DB.W (FROM), [R0-R7]
    99  	MOVM.DB.W [R0-R7], (TE)
   100  	B	_b32loop
   101  
   102  _b4tail:				/* do remaining words if possible */
   103  	MOVW	savedts-4(SP), TS
   104  	ADD	$3, TS, TMP
   105  _b4loop:
   106  	CMP	TMP, TE
   107  	BLS	_b1tail
   108  
   109  	MOVW.W	-4(FROM), TMP1	/* pre-indexed */
   110  	MOVW.W	TMP1, -4(TE)	/* pre-indexed */
   111  	B	_b4loop
   112  
   113  _b1tail:				/* remaining bytes */
   114  	CMP	TE, TS
   115  	BEQ	_return
   116  
   117  	MOVBU.W	-1(FROM), TMP	/* pre-indexed */
   118  	MOVBU.W	TMP, -1(TE)	/* pre-indexed */
   119  	B	_b1tail
   120  
   121  _forward:
   122  	CMP	$4, N		/* need at least 4 bytes to copy */
   123  	BLT	_f1tail
   124  
   125  _f4align:				/* align destination on 4 */
   126  	AND.S	$3, TS, TMP
   127  	BEQ	_f4aligned
   128  
   129  	MOVBU.P	1(FROM), TMP	/* implicit write back */
   130  	MOVBU.P	TMP, 1(TS)	/* implicit write back */
   131  	B	_f4align
   132  
   133  _f4aligned:				/* is source now aligned? */
   134  	AND.S	$3, FROM, TMP
   135  	BNE	_funaligned
   136  
   137  	SUB	$31, TE, TMP	/* do 32-byte chunks if possible */
   138  	MOVW	TE, savedte-4(SP)
   139  _f32loop:
   140  	CMP	TMP, TS
   141  	BHS	_f4tail
   142  
   143  	MOVM.IA.W (FROM), [R1-R8]
   144  	MOVM.IA.W [R1-R8], (TS)
   145  	B	_f32loop
   146  
   147  _f4tail:
   148  	MOVW	savedte-4(SP), TE
   149  	SUB	$3, TE, TMP	/* do remaining words if possible */
   150  _f4loop:
   151  	CMP	TMP, TS
   152  	BHS	_f1tail
   153  
   154  	MOVW.P	4(FROM), TMP1	/* implicit write back */
   155  	MOVW.P	TMP1, 4(TS)	/* implicit write back */
   156  	B	_f4loop
   157  
   158  _f1tail:
   159  	CMP	TS, TE
   160  	BEQ	_return
   161  
   162  	MOVBU.P	1(FROM), TMP	/* implicit write back */
   163  	MOVBU.P	TMP, 1(TS)	/* implicit write back */
   164  	B	_f1tail
   165  
   166  _return:
   167  	MOVW	to+0(FP), R0
   168  	RET
   169  
   170  _bunaligned:
   171  	CMP	$2, TMP		/* is TMP < 2 ? */
   172  
   173  	MOVW.LT	$8, RSHIFT		/* (R(n)<<24)|(R(n-1)>>8) */
   174  	MOVW.LT	$24, LSHIFT
   175  	MOVW.LT	$1, OFFSET
   176  
   177  	MOVW.EQ	$16, RSHIFT		/* (R(n)<<16)|(R(n-1)>>16) */
   178  	MOVW.EQ	$16, LSHIFT
   179  	MOVW.EQ	$2, OFFSET
   180  
   181  	MOVW.GT	$24, RSHIFT		/* (R(n)<<8)|(R(n-1)>>24) */
   182  	MOVW.GT	$8, LSHIFT
   183  	MOVW.GT	$3, OFFSET
   184  
   185  	ADD	$16, TS, TMP	/* do 16-byte chunks if possible */
   186  	CMP	TMP, TE
   187  	BLS	_b1tail
   188  
   189  	BIC	$3, FROM		/* align source */
   190  	MOVW	TS, savedts-4(SP)
   191  	MOVW	(FROM), BR0	/* prime first block register */
   192  
   193  _bu16loop:
   194  	CMP	TMP, TE
   195  	BLS	_bu1tail
   196  
   197  	MOVW	BR0<<LSHIFT, BW3
   198  	MOVM.DB.W (FROM), [BR0-BR3]
   199  	ORR	BR3>>RSHIFT, BW3
   200  
   201  	MOVW	BR3<<LSHIFT, BW2
   202  	ORR	BR2>>RSHIFT, BW2
   203  
   204  	MOVW	BR2<<LSHIFT, BW1
   205  	ORR	BR1>>RSHIFT, BW1
   206  
   207  	MOVW	BR1<<LSHIFT, BW0
   208  	ORR	BR0>>RSHIFT, BW0
   209  
   210  	MOVM.DB.W [BW0-BW3], (TE)
   211  	B	_bu16loop
   212  
   213  _bu1tail:
   214  	MOVW	savedts-4(SP), TS
   215  	ADD	OFFSET, FROM
   216  	B	_b1tail
   217  
   218  _funaligned:
   219  	CMP	$2, TMP
   220  
   221  	MOVW.LT	$8, RSHIFT		/* (R(n+1)<<24)|(R(n)>>8) */
   222  	MOVW.LT	$24, LSHIFT
   223  	MOVW.LT	$3, OFFSET
   224  
   225  	MOVW.EQ	$16, RSHIFT		/* (R(n+1)<<16)|(R(n)>>16) */
   226  	MOVW.EQ	$16, LSHIFT
   227  	MOVW.EQ	$2, OFFSET
   228  
   229  	MOVW.GT	$24, RSHIFT		/* (R(n+1)<<8)|(R(n)>>24) */
   230  	MOVW.GT	$8, LSHIFT
   231  	MOVW.GT	$1, OFFSET
   232  
   233  	SUB	$16, TE, TMP	/* do 16-byte chunks if possible */
   234  	CMP	TMP, TS
   235  	BHS	_f1tail
   236  
   237  	BIC	$3, FROM		/* align source */
   238  	MOVW	TE, savedte-4(SP)
   239  	MOVW.P	4(FROM), FR3	/* prime last block register, implicit write back */
   240  
   241  _fu16loop:
   242  	CMP	TMP, TS
   243  	BHS	_fu1tail
   244  
   245  	MOVW	FR3>>RSHIFT, FW0
   246  	MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3]
   247  	ORR	FR0<<LSHIFT, FW0
   248  
   249  	MOVW	FR0>>RSHIFT, FW1
   250  	ORR	FR1<<LSHIFT, FW1
   251  
   252  	MOVW	FR1>>RSHIFT, FW2
   253  	ORR	FR2<<LSHIFT, FW2
   254  
   255  	MOVW	FR2>>RSHIFT, FW3
   256  	ORR	FR3<<LSHIFT, FW3
   257  
   258  	MOVM.IA.W [FW0,FW1,FW2,FW3], (TS)
   259  	B	_fu16loop
   260  
   261  _fu1tail:
   262  	MOVW	savedte-4(SP), TE
   263  	SUB	OFFSET, FROM
   264  	B	_f1tail
   265
View as plain text