This adds the overhead of a copy when serializing and deserializing keys. I have not benchmarked much, but runtimes seem barely changed at all by that. When a lot of keys are in memory, it improves memory use. And, it prevents keys sometimes getting PINNED in memory and failing to GC, which is a problem ByteString has sometimes. In particular, git-annex sync from a borg special remote had that problem and this improved its memory use by a large amount. Sponsored-by: Shae Erisson on Patreon
		
			
				
	
	
		
			77 lines
		
	
	
	
		
			2.4 KiB
			
		
	
	
	
		
			Haskell
		
	
	
	
	
	
			
		
		
	
	
			77 lines
		
	
	
	
		
			2.4 KiB
			
		
	
	
	
		
			Haskell
		
	
	
	
	
	
{- git-annex backend utilities
 | 
						|
 -
 | 
						|
 - Copyright 2012-2020 Joey Hess <id@joeyh.name>
 | 
						|
 -
 | 
						|
 - Licensed under the GNU AGPL version 3 or higher.
 | 
						|
 -}
 | 
						|
 | 
						|
{-# LANGUAGE OverloadedStrings #-}
 | 
						|
 | 
						|
module Backend.Utilities where
 | 
						|
 | 
						|
import Annex.Common
 | 
						|
import qualified Annex
 | 
						|
import Utility.Hash
 | 
						|
import Types.Key
 | 
						|
import Types.KeySource
 | 
						|
 | 
						|
import qualified Data.ByteString as S
 | 
						|
import qualified Data.ByteString.Short as S (ShortByteString, toShort)
 | 
						|
import qualified Data.ByteString.Lazy as L
 | 
						|
import qualified System.FilePath.ByteString as P
 | 
						|
import Data.Char
 | 
						|
import Data.Word
 | 
						|
 | 
						|
{- Generates a keyName from an input string. Takes care of sanitizing it.
 | 
						|
 - If it's not too long, the full string is used as the keyName.
 | 
						|
 - Otherwise, it's truncated, and its md5 is prepended to ensure a unique
 | 
						|
 - key. -}
 | 
						|
genKeyName :: String -> S.ShortByteString
 | 
						|
genKeyName s
 | 
						|
	-- Avoid making keys longer than the length of a SHA256 checksum.
 | 
						|
	| bytelen > sha256len = S.toShort $ encodeBS $
 | 
						|
		truncateFilePath (sha256len - md5len - 1) s' ++ "-" ++ 
 | 
						|
			show (md5 bl)
 | 
						|
	| otherwise = S.toShort $ encodeBS s'
 | 
						|
  where
 | 
						|
	s' = preSanitizeKeyName s
 | 
						|
	bl = encodeBL s
 | 
						|
	bytelen = fromIntegral $ L.length bl
 | 
						|
 | 
						|
	sha256len = 64
 | 
						|
	md5len = 32
 | 
						|
 | 
						|
{- Converts a key to a version that includes an extension from the
 | 
						|
 - file that the key was generated from.  -}
 | 
						|
addE :: KeySource -> (KeyVariety -> KeyVariety) -> Key -> Annex Key
 | 
						|
addE source sethasext k = do
 | 
						|
	maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig
 | 
						|
	let ext = selectExtension maxlen (keyFilename source)
 | 
						|
	return $ alterKey k $ \d -> d
 | 
						|
		{ keyName = keyName d <> S.toShort ext
 | 
						|
		, keyVariety = sethasext (keyVariety d)
 | 
						|
		}
 | 
						|
 | 
						|
selectExtension :: Maybe Int -> RawFilePath -> S.ByteString
 | 
						|
selectExtension maxlen f
 | 
						|
	| null es = ""
 | 
						|
	| otherwise = S.intercalate "." ("":es)
 | 
						|
  where
 | 
						|
	es = filter (not . S.null) $ reverse $
 | 
						|
		take 2 $ filter (S.all validInExtension) $
 | 
						|
		takeWhile shortenough $
 | 
						|
		reverse $ S.split (fromIntegral (ord '.')) (P.takeExtensions f')
 | 
						|
	shortenough e = S.length e <= fromMaybe maxExtensionLen maxlen
 | 
						|
	-- Avoid treating a file ".foo" as having its whole name as an
 | 
						|
	-- extension.
 | 
						|
	f' = S.dropWhile (== fromIntegral (ord '.')) (P.takeFileName f)
 | 
						|
 | 
						|
validInExtension :: Word8 -> Bool
 | 
						|
validInExtension c
 | 
						|
	| isAlphaNum (chr (fromIntegral c)) = True
 | 
						|
	| fromIntegral c == ord '.' = True
 | 
						|
	| c <= 127 = False -- other ascii: spaces, punctuation, control chars
 | 
						|
	| otherwise = True -- utf8 is allowed, also other encodings
 | 
						|
 | 
						|
maxExtensionLen :: Int
 | 
						|
maxExtensionLen = 4 -- long enough for "jpeg"
 |