convert Key to ShortByteString
This adds the overhead of a copy when serializing and deserializing keys. I have not benchmarked much, but runtimes seem barely changed at all by that. When a lot of keys are in memory, it improves memory use. And, it prevents keys sometimes getting PINNED in memory and failing to GC, which is a problem ByteString has sometimes. In particular, git-annex sync from a borg special remote had that problem and this improved its memory use by a large amount. Sponsored-by: Shae Erisson on Patreon
This commit is contained in:
parent
012b71e471
commit
19e78816f0
15 changed files with 65 additions and 36 deletions
|
@ -18,6 +18,7 @@ import qualified Types.Remote as Remote
|
|||
import Messages
|
||||
|
||||
import Data.Maybe
|
||||
import qualified Data.ByteString.Short as S (fromShort, toShort)
|
||||
|
||||
-- From a sha pointing to the content of a file to the key
|
||||
-- to use to export it. When the file is annexed, it's the annexed key.
|
||||
|
@ -39,7 +40,7 @@ exportKey sha = mk <$> catKey sha
|
|||
-- only checksum the content.
|
||||
gitShaKey :: Git.Sha -> Key
|
||||
gitShaKey (Git.Ref s) = mkKey $ \kd -> kd
|
||||
{ keyName = s
|
||||
{ keyName = S.toShort s
|
||||
, keyVariety = OtherKey "GIT"
|
||||
}
|
||||
|
||||
|
@ -47,7 +48,7 @@ gitShaKey (Git.Ref s) = mkKey $ \kd -> kd
|
|||
keyGitSha :: Key -> Maybe Git.Sha
|
||||
keyGitSha k
|
||||
| fromKey keyVariety k == OtherKey "GIT" =
|
||||
Just (Git.Ref (fromKey keyName k))
|
||||
Just (Git.Ref (S.fromShort (fromKey keyName k)))
|
||||
| otherwise = Nothing
|
||||
|
||||
-- Is a key storing a git sha, and not used for an annexed file?
|
||||
|
|
|
@ -33,6 +33,7 @@ import qualified Backend.URL
|
|||
import qualified Backend.External
|
||||
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.ByteString.Short as S (toShort, fromShort)
|
||||
import qualified Data.ByteString.Char8 as S8
|
||||
|
||||
{- Build-in backends. Does not include externals. -}
|
||||
|
@ -67,7 +68,7 @@ genKey source meterupdate preferredbackend = do
|
|||
where
|
||||
-- keyNames should not contain newline characters.
|
||||
makesane k = alterKey k $ \d -> d
|
||||
{ keyName = S8.map fixbadchar (fromKey keyName k)
|
||||
{ keyName = S.toShort (S8.map fixbadchar (S.fromShort (fromKey keyName k)))
|
||||
}
|
||||
fixbadchar c
|
||||
| c == '\n' = '_'
|
||||
|
|
|
@ -20,6 +20,7 @@ import Utility.Metered
|
|||
import qualified Utility.SimpleProtocol as Proto
|
||||
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Short as S (toShort, fromShort)
|
||||
import qualified Data.Map.Strict as M
|
||||
import Data.Char
|
||||
import Control.Concurrent
|
||||
|
@ -285,7 +286,7 @@ toProtoKey k = ProtoKey $ alterKey k $ \d -> d
|
|||
-- The extension can be easily removed, because the protocol
|
||||
-- documentation does not allow '.' to be used in the keyName,
|
||||
-- so the first one is the extension.
|
||||
{ keyName = S.takeWhile (/= dot) (keyName d)
|
||||
{ keyName = S.toShort (S.takeWhile (/= dot) (S.fromShort (keyName d)))
|
||||
, keyVariety = setHasExt (HasExt False) (keyVariety d)
|
||||
}
|
||||
where
|
||||
|
|
|
@ -24,6 +24,7 @@ import Utility.Metered
|
|||
import qualified Utility.RawFilePath as R
|
||||
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Short as S (toShort, fromShort)
|
||||
import qualified Data.ByteString.Char8 as S8
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import Control.DeepSeq
|
||||
|
@ -106,7 +107,7 @@ keyValue hash source meterupdate = do
|
|||
filesize <- liftIO $ getFileSize file
|
||||
s <- hashFile hash file meterupdate
|
||||
return $ mkKey $ \k -> k
|
||||
{ keyName = encodeBS s
|
||||
{ keyName = S.toShort (encodeBS s)
|
||||
, keyVariety = hashKeyVariety hash (HasExt False)
|
||||
, keySize = Just filesize
|
||||
}
|
||||
|
@ -160,7 +161,7 @@ needsUpgrade :: Key -> Bool
|
|||
needsUpgrade key = or
|
||||
[ "\\" `S8.isPrefixOf` keyHash key
|
||||
, S.any (not . validInExtension) (snd $ splitKeyNameExtension key)
|
||||
, not (hasExt (fromKey keyVariety key)) && keyHash key /= fromKey keyName key
|
||||
, not (hasExt (fromKey keyVariety key)) && keyHash key /= S.fromShort (fromKey keyName key)
|
||||
]
|
||||
|
||||
trivialMigrate :: Key -> Backend -> AssociatedFile -> Annex (Maybe Key)
|
||||
|
@ -171,14 +172,14 @@ trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Key
|
|||
trivialMigrate' oldkey newbackend afile maxextlen
|
||||
{- Fast migration from hashE to hash backend. -}
|
||||
| migratable && hasExt oldvariety = Just $ alterKey oldkey $ \d -> d
|
||||
{ keyName = keyHash oldkey
|
||||
{ keyName = S.toShort (keyHash oldkey)
|
||||
, keyVariety = newvariety
|
||||
}
|
||||
{- Fast migration from hash to hashE backend. -}
|
||||
| migratable && hasExt newvariety = case afile of
|
||||
AssociatedFile Nothing -> Nothing
|
||||
AssociatedFile (Just file) -> Just $ alterKey oldkey $ \d -> d
|
||||
{ keyName = keyHash oldkey
|
||||
{ keyName = S.toShort $ keyHash oldkey
|
||||
<> selectExtension maxextlen file
|
||||
, keyVariety = newvariety
|
||||
}
|
||||
|
@ -186,9 +187,9 @@ trivialMigrate' oldkey newbackend afile maxextlen
|
|||
- non-extension preserving key, with an extension
|
||||
- in its keyName. -}
|
||||
| newvariety == oldvariety && not (hasExt oldvariety) &&
|
||||
keyHash oldkey /= fromKey keyName oldkey =
|
||||
keyHash oldkey /= S.fromShort (fromKey keyName oldkey) =
|
||||
Just $ alterKey oldkey $ \d -> d
|
||||
{ keyName = keyHash oldkey
|
||||
{ keyName = S.toShort (keyHash oldkey)
|
||||
}
|
||||
| otherwise = Nothing
|
||||
where
|
||||
|
|
|
@ -16,6 +16,7 @@ import Types.Key
|
|||
import Types.KeySource
|
||||
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Short as S (ShortByteString, toShort)
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified System.FilePath.ByteString as P
|
||||
import Data.Char
|
||||
|
@ -25,13 +26,13 @@ import Data.Word
|
|||
- If it's not too long, the full string is used as the keyName.
|
||||
- Otherwise, it's truncated, and its md5 is prepended to ensure a unique
|
||||
- key. -}
|
||||
genKeyName :: String -> S.ByteString
|
||||
genKeyName :: String -> S.ShortByteString
|
||||
genKeyName s
|
||||
-- Avoid making keys longer than the length of a SHA256 checksum.
|
||||
| bytelen > sha256len = encodeBS $
|
||||
| bytelen > sha256len = S.toShort $ encodeBS $
|
||||
truncateFilePath (sha256len - md5len - 1) s' ++ "-" ++
|
||||
show (md5 bl)
|
||||
| otherwise = encodeBS s'
|
||||
| otherwise = S.toShort $ encodeBS s'
|
||||
where
|
||||
s' = preSanitizeKeyName s
|
||||
bl = encodeBL s
|
||||
|
@ -47,7 +48,7 @@ addE source sethasext k = do
|
|||
maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig
|
||||
let ext = selectExtension maxlen (keyFilename source)
|
||||
return $ alterKey k $ \d -> d
|
||||
{ keyName = keyName d <> ext
|
||||
{ keyName = keyName d <> S.toShort ext
|
||||
, keyVariety = sethasext (keyVariety d)
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ import Utility.Metered
|
|||
|
||||
import qualified Data.ByteString.Char8 as S8
|
||||
import qualified Utility.RawFilePath as R
|
||||
import qualified Data.ByteString.Short as S (toShort, fromShort)
|
||||
|
||||
backends :: [Backend]
|
||||
backends = [backend]
|
||||
|
@ -53,12 +54,13 @@ keyValue source _ = do
|
|||
{- Old WORM keys could contain spaces and carriage returns,
|
||||
- and can be upgraded to remove them. -}
|
||||
needsUpgrade :: Key -> Bool
|
||||
needsUpgrade key = any (`S8.elem` fromKey keyName key) [' ', '\r']
|
||||
needsUpgrade key =
|
||||
any (`S8.elem` S.fromShort (fromKey keyName key)) [' ', '\r']
|
||||
|
||||
removeProblemChars :: Key -> Backend -> AssociatedFile -> Annex (Maybe Key)
|
||||
removeProblemChars oldkey newbackend _
|
||||
| migratable = return $ Just $ alterKey oldkey $ \d -> d
|
||||
{ keyName = encodeBS $ reSanitizeKeyName $ decodeBS $ keyName d }
|
||||
{ keyName = S.toShort $ encodeBS $ reSanitizeKeyName $ decodeBS $ S.fromShort $ keyName d }
|
||||
| otherwise = return Nothing
|
||||
where
|
||||
migratable = oldvariety == newvariety
|
||||
|
|
|
@ -10,6 +10,7 @@ module Command.Find where
|
|||
import Data.Default
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Short as S (fromShort)
|
||||
import qualified Data.ByteString.Char8 as S8
|
||||
|
||||
import Command
|
||||
|
@ -100,7 +101,7 @@ formatVars key (AssociatedFile af) =
|
|||
, ("backend", decodeBS $ formatKeyVariety $ fromKey keyVariety key)
|
||||
, ("bytesize", size show)
|
||||
, ("humansize", size $ roughSize storageUnits True)
|
||||
, ("keyname", decodeBS $ fromKey keyName key)
|
||||
, ("keyname", decodeBS $ S.fromShort $ fromKey keyName key)
|
||||
, ("hashdirlower", fromRawFilePath $ hashDirLower def key)
|
||||
, ("hashdirmixed", fromRawFilePath $ hashDirMixed def key)
|
||||
, ("mtime", whenavail show $ fromKey keyMtime key)
|
||||
|
|
|
@ -47,6 +47,7 @@ import Types.Crypto
|
|||
import Types.Remote
|
||||
import Types.Key
|
||||
import Annex.SpecialRemote.Config
|
||||
import qualified Data.ByteString.Short as S (toShort)
|
||||
|
||||
{- The beginning of a Cipher is used for MAC'ing; the remainder is used
|
||||
- as the GPG symmetric encryption passphrase when using the hybrid
|
||||
|
@ -163,7 +164,7 @@ type EncKey = Key -> Key
|
|||
- on content. It does need to be repeatable. -}
|
||||
encryptKey :: Mac -> Cipher -> EncKey
|
||||
encryptKey mac c k = mkKey $ \d -> d
|
||||
{ keyName = encodeBS (macWithCipher mac c (serializeKey k))
|
||||
{ keyName = S.toShort $ encodeBS $ macWithCipher mac c (serializeKey k)
|
||||
, keyVariety = OtherKey $
|
||||
encryptedBackendNamePrefix <> encodeBS (showMac mac)
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import Types.Key
|
|||
import Utility.DataUnits
|
||||
|
||||
import Criterion.Main
|
||||
import qualified Data.ByteString.Short as S (toShort)
|
||||
import qualified Data.ByteString.Char8 as B8
|
||||
import System.Random
|
||||
import Control.Concurrent
|
||||
|
@ -87,7 +88,7 @@ populateAssociatedFiles h num = do
|
|||
|
||||
keyN :: Integer -> Key
|
||||
keyN n = mkKey $ \k -> k
|
||||
{ keyName = B8.pack $ "key" ++ show n
|
||||
{ keyName = S.toShort (B8.pack $ "key" ++ show n)
|
||||
, keyVariety = OtherKey "BENCH"
|
||||
}
|
||||
|
||||
|
|
5
Key.hs
5
Key.hs
|
@ -31,6 +31,7 @@ module Key (
|
|||
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Short as S (toShort, fromShort)
|
||||
import qualified Data.Attoparsec.ByteString as A
|
||||
|
||||
import Common
|
||||
|
@ -62,7 +63,7 @@ serializeKey :: Key -> String
|
|||
serializeKey = decodeBS . serializeKey'
|
||||
|
||||
serializeKey' :: Key -> S.ByteString
|
||||
serializeKey' = keySerialization
|
||||
serializeKey' = S.fromShort . keySerialization
|
||||
|
||||
deserializeKey :: String -> Maybe Key
|
||||
deserializeKey = deserializeKey' . encodeBS
|
||||
|
@ -72,7 +73,7 @@ deserializeKey' = eitherToMaybe . A.parseOnly keyParser
|
|||
|
||||
instance Arbitrary KeyData where
|
||||
arbitrary = Key
|
||||
<$> (encodeBS <$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t"))
|
||||
<$> (S.toShort . encodeBS <$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t"))
|
||||
<*> (parseKeyVariety . encodeBS <$> (listOf1 $ elements ['A'..'Z'])) -- BACKEND
|
||||
<*> ((abs <$>) <$> arbitrary) -- size cannot be negative
|
||||
<*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative
|
||||
|
|
3
Remote/External/Types.hs
vendored
3
Remote/External/Types.hs
vendored
|
@ -60,6 +60,7 @@ import Control.Concurrent.STM
|
|||
import Network.URI
|
||||
import Data.Char
|
||||
import Text.Read
|
||||
import qualified Data.ByteString.Short as S (fromShort)
|
||||
|
||||
data External = External
|
||||
{ externalType :: ExternalType
|
||||
|
@ -138,7 +139,7 @@ newtype SafeKey = SafeKey Key
|
|||
|
||||
mkSafeKey :: Key -> Either String SafeKey
|
||||
mkSafeKey k
|
||||
| any isSpace (decodeBS $ fromKey keyName k) = Left $ concat
|
||||
| any isSpace (decodeBS $ S.fromShort $ fromKey keyName k) = Left $ concat
|
||||
[ "Sorry, this file cannot be stored on an external special remote because its key's name contains a space. "
|
||||
, "To avoid this problem, you can run: git-annex migrate --backend="
|
||||
, decodeBS (formatKeyVariety (fromKey keyVariety k))
|
||||
|
|
|
@ -56,6 +56,7 @@ import Network.HTTP.Types
|
|||
import Network.HTTP.Client hiding (port)
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.ByteString.Short as S (fromShort)
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as E
|
||||
import qualified Control.Concurrent.MSemN as MSemN
|
||||
|
@ -374,7 +375,7 @@ extractKeySha256 :: Key -> Maybe LFS.SHA256
|
|||
extractKeySha256 k = case fromKey keyVariety k of
|
||||
SHA2Key (HashSize 256) (HasExt hasext)
|
||||
| hasext -> eitherToMaybe $ E.decodeUtf8' (keyHash k)
|
||||
| otherwise -> eitherToMaybe $ E.decodeUtf8' (fromKey keyName k)
|
||||
| otherwise -> eitherToMaybe $ E.decodeUtf8' $ S.fromShort (fromKey keyName k)
|
||||
_ -> Nothing
|
||||
|
||||
-- The size of an encrypted key is the size of the input data, but we need
|
||||
|
|
15
Types/Key.hs
15
Types/Key.hs
|
@ -29,6 +29,7 @@ module Types.Key (
|
|||
) where
|
||||
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Short as S (ShortByteString, toShort, fromShort)
|
||||
import qualified Data.ByteString.Char8 as S8
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import Data.ByteString.Builder
|
||||
|
@ -49,7 +50,7 @@ import Prelude
|
|||
{- A Key has a unique name, which is derived from a particular backend,
|
||||
- and may contain other optional metadata. -}
|
||||
data KeyData = Key
|
||||
{ keyName :: S.ByteString
|
||||
{ keyName :: S.ShortByteString
|
||||
, keyVariety :: KeyVariety
|
||||
, keySize :: Maybe Integer
|
||||
, keyMtime :: Maybe EpochTime
|
||||
|
@ -66,7 +67,7 @@ instance NFData KeyData
|
|||
-}
|
||||
data Key = MkKey
|
||||
{ keyData :: KeyData
|
||||
, keySerialization :: S.ByteString
|
||||
, keySerialization :: S.ShortByteString
|
||||
} deriving (Show, Generic)
|
||||
|
||||
instance Eq Key where
|
||||
|
@ -111,8 +112,8 @@ isKeyPrefix s = [fieldSep, fieldSep] `isInfixOf` s
|
|||
fieldSep :: Char
|
||||
fieldSep = '-'
|
||||
|
||||
mkKeySerialization :: KeyData -> S.ByteString
|
||||
mkKeySerialization = L.toStrict
|
||||
mkKeySerialization :: KeyData -> S.ShortByteString
|
||||
mkKeySerialization = S.toShort . L.toStrict
|
||||
. toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty
|
||||
. buildKeyData
|
||||
|
||||
|
@ -127,7 +128,7 @@ buildKeyData k = byteString (formatKeyVariety (keyVariety k))
|
|||
<> 'm' ?: (integerDec . (\(CTime t) -> fromIntegral t) <$> keyMtime k)
|
||||
<> 'S' ?: (integerDec <$> keyChunkSize k)
|
||||
<> 'C' ?: (integerDec <$> keyChunkNum k)
|
||||
<> sepbefore (sepbefore (byteString (keyName k)))
|
||||
<> sepbefore (sepbefore (byteString (S.fromShort (keyName k))))
|
||||
where
|
||||
sepbefore s = char7 fieldSep <> s
|
||||
c ?: (Just b) = sepbefore (char7 c <> b)
|
||||
|
@ -156,7 +157,7 @@ keyParser = do
|
|||
if validKeyName v n
|
||||
then
|
||||
let d = Key
|
||||
{ keyName = n
|
||||
{ keyName = S.toShort n
|
||||
, keyVariety = v
|
||||
, keySize = s
|
||||
, keyMtime = m
|
||||
|
@ -195,7 +196,7 @@ validKeyName kv name
|
|||
- keyName minus extension, and the extension (including leading dot).
|
||||
-}
|
||||
splitKeyNameExtension :: Key -> (S.ByteString, S.ByteString)
|
||||
splitKeyNameExtension = splitKeyNameExtension' . keyName . keyData
|
||||
splitKeyNameExtension = splitKeyNameExtension' . S.fromShort . keyName . keyData
|
||||
|
||||
splitKeyNameExtension' :: S.ByteString -> (S.ByteString, S.ByteString)
|
||||
splitKeyNameExtension' keyname = S8.span (/= '.') keyname
|
||||
|
|
|
@ -12,6 +12,7 @@ import Data.Char
|
|||
import Data.Default
|
||||
import Data.ByteString.Builder
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Short as S (toShort, fromShort)
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified System.FilePath.ByteString as P
|
||||
|
||||
|
@ -138,7 +139,7 @@ oldlog2key l
|
|||
where
|
||||
len = length l - 4
|
||||
k = readKey1 (take len l)
|
||||
sane = (not . S.null $ fromKey keyName k) && (not . S.null $ formatKeyVariety $ fromKey keyVariety k)
|
||||
sane = (not . S.null $ S.fromShort $ fromKey keyName k) && (not . S.null $ formatKeyVariety $ fromKey keyVariety k)
|
||||
|
||||
-- WORM backend keys: "WORM:mtime:size:filename"
|
||||
-- all the rest: "backend:key"
|
||||
|
@ -150,7 +151,7 @@ readKey1 :: String -> Key
|
|||
readKey1 v
|
||||
| mixup = fromJust $ deserializeKey $ intercalate ":" $ Prelude.tail bits
|
||||
| otherwise = mkKey $ \d -> d
|
||||
{ keyName = encodeBS n
|
||||
{ keyName = S.toShort (encodeBS n)
|
||||
, keyVariety = parseKeyVariety (encodeBS b)
|
||||
, keySize = s
|
||||
, keyMtime = t
|
||||
|
@ -175,7 +176,7 @@ showKey1 k = intercalate ":" $ filter (not . null)
|
|||
showifhere Nothing = ""
|
||||
showifhere (Just x) = show x
|
||||
b = decodeBS $ formatKeyVariety v
|
||||
n = fromKey keyName k
|
||||
n = S.fromShort $ fromKey keyName k
|
||||
v = fromKey keyVariety k
|
||||
s = fromKey keySize k
|
||||
t = fromKey keyMtime k
|
||||
|
@ -212,7 +213,7 @@ lookupKey1 file = do
|
|||
where
|
||||
k = fileKey1 l
|
||||
bname = decodeBS (formatKeyVariety (fromKey keyVariety k))
|
||||
kname = decodeBS (fromKey keyName k)
|
||||
kname = decodeBS (S.fromShort (fromKey keyName k))
|
||||
skip = "skipping " ++ file ++
|
||||
" (unknown backend " ++ bname ++ ")"
|
||||
|
||||
|
|
|
@ -4,6 +4,20 @@
|
|||
date="2021-10-05T23:00:18Z"
|
||||
content="""
|
||||
I tried converting Ref to use ShortByteString. Memory use did not improve
|
||||
and the -hc profile is unchanged. So the pinned memory is not in refs. My
|
||||
guess is it must be filenames in the tree then.
|
||||
and the -hc profile is unchanged. So the pinned memory is not in refs.
|
||||
|
||||
Also tried converting Key to use ShortByteString. That was a win!
|
||||
My 20 borg archive test case is down from 320 mb to 242 mb.
|
||||
|
||||
Looking at Command.SyncpullThirdPartyPopulated,
|
||||
it calls listContents, which calls borg's listImportableContents,
|
||||
and produces an `ImportableContents (ContentIdentifier, ByteSize)`
|
||||
then that gets passed through importKeys to produce
|
||||
an `ImportableContents (Either Sha Key)`. Probably
|
||||
double memory is used while doing that conversion, unless
|
||||
the GC manages to free the first one while it's traversed.
|
||||
|
||||
If borg's listImportableContents included a Key (which it does
|
||||
produce already only to throw away!) that might
|
||||
eliminate the big spike just before treeItemsToTree.
|
||||
"""]]
|
||||
|
|
Loading…
Reference in a new issue