 c53c70a90f
			
		
	
	
	c53c70a90f
	
	
	
		
			
			This change cleans up the string code in a number of ways: - For memcpy(), fix bug in prefetch and increase distance to 3 lines; optimize for unaligned data; do all loads before wh64 to make memcpy safe for forward-overlapping calls; etc. Performance is improved. - Use new copy_byte() function on tilegx to spread a single byte value out into a full word using the shufflebytes instruction. - Clean up header include ordering to be more canonical, and remove spurious #undefs of function names. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
		
			
				
	
	
		
			44 lines
		
	
	
	
		
			1.5 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			44 lines
		
	
	
	
		
			1.5 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Copyright 2013 Tilera Corporation. All Rights Reserved.
 | |
|  *
 | |
|  *   This program is free software; you can redistribute it and/or
 | |
|  *   modify it under the terms of the GNU General Public License
 | |
|  *   as published by the Free Software Foundation, version 2.
 | |
|  *
 | |
|  *   This program is distributed in the hope that it will be useful, but
 | |
|  *   WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 | |
|  *   NON INFRINGEMENT.  See the GNU General Public License for
 | |
|  *   more details.
 | |
|  *
 | |
|  * Provide a mask based on the pointer alignment that
 | |
|  * sets up non-zero bytes before the beginning of the string.
 | |
|  * The MASK expression works because shift counts are taken mod 64.
 | |
|  * Also, specify how to count "first" and "last" bits
 | |
|  * when the bits have been read as a word.
 | |
|  */
 | |
| 
 | |
| #include <asm/byteorder.h>
 | |
| 
 | |
| #ifdef __LITTLE_ENDIAN
 | |
| #define MASK(x) (__insn_shl(1ULL, (x << 3)) - 1)
 | |
| #define NULMASK(x) ((2ULL << x) - 1)
 | |
| #define CFZ(x) __insn_ctz(x)
 | |
| #define REVCZ(x) __insn_clz(x)
 | |
| #else
 | |
| #define MASK(x) (__insn_shl(-2LL, ((-x << 3) - 1)))
 | |
| #define NULMASK(x) (-2LL << (63 - x))
 | |
| #define CFZ(x) __insn_clz(x)
 | |
| #define REVCZ(x) __insn_ctz(x)
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * Create eight copies of the byte in a uint64_t.  Byte Shuffle uses
 | |
|  * the bytes of srcB as the index into the dest vector to select a
 | |
|  * byte.  With all indices of zero, the first byte is copied into all
 | |
|  * the other bytes.
 | |
|  */
 | |
| static inline uint64_t copy_byte(uint8_t byte)
 | |
| {
 | |
| 	return __insn_shufflebytes(byte, 0, 0);
 | |
| }
 |