cache the serialization of a Key

This will speed up the common case where a Key is deserialized from
disk, but is then serialized to build eg, the path to the annex object.

Previously attempted in 4536c93bb2
and reverted in 96aba8eff7.
The problems mentioned in the latter commit are addressed now:

Read/Show of KeyData is backwards-compatible with Read/Show of Key from before
this change, so Types.Distribution will keep working.

The Eq instance is fixed.

Also, Key has smart constructors, avoiding needing to remember to update
the cached serialization.

Used git-annex benchmark:
  find is 7% faster
  whereis is 3% faster
  get when all files are already present is 5% faster
Generally, the benchmarks are running 0.1 seconds faster per 2000 files,
on a ram disk in my laptop.
This commit is contained in:
Joey Hess 2019-11-22 16:24:04 -04:00
parent e296637737
commit 81d402216d
53 changed files with 388 additions and 289 deletions

View file

@ -329,7 +329,7 @@ getViaTmpFromDisk rsp v key action = checkallowed $ do
checkallowed a = case rsp of
RetrievalAllKeysSecure -> a
RetrievalVerifiableKeysSecure
| isVerifiable (keyVariety key) -> a
| isVerifiable (fromKey keyVariety key) -> a
| otherwise -> ifM (annexAllowUnverifiedDownloads <$> Annex.getGitConfig)
( a
, warnUnverifiableInsecure key >> return False
@ -353,7 +353,7 @@ verifyKeyContent :: RetrievalSecurityPolicy -> VerifyConfig -> Verification -> K
verifyKeyContent rsp v verification k f = case (rsp, verification) of
(_, Verified) -> return True
(RetrievalVerifiableKeysSecure, _)
| isVerifiable (keyVariety k) -> verify
| isVerifiable (fromKey keyVariety k) -> verify
| otherwise -> ifM (annexAllowUnverifiedDownloads <$> Annex.getGitConfig)
( verify
, warnUnverifiableInsecure k >> return False
@ -365,12 +365,12 @@ verifyKeyContent rsp v verification k f = case (rsp, verification) of
(_, MustVerify) -> verify
where
verify = enteringStage VerifyStage $ verifysize <&&> verifycontent
verifysize = case keySize k of
verifysize = case fromKey keySize k of
Nothing -> return True
Just size -> do
size' <- liftIO $ catchDefaultIO 0 $ getFileSize f
return (size' == size)
verifycontent = case Types.Backend.verifyKeyContent =<< Backend.maybeLookupBackendVariety (keyVariety k) of
verifycontent = case Types.Backend.verifyKeyContent =<< Backend.maybeLookupBackendVariety (fromKey keyVariety k) of
Nothing -> return True
Just verifier -> verifier k f
@ -382,7 +382,7 @@ warnUnverifiableInsecure k = warning $ unwords
, "this safety check.)"
]
where
kv = decodeBS (formatKeyVariety (keyVariety k))
kv = decodeBS (formatKeyVariety (fromKey keyVariety k))
data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify
@ -490,10 +490,10 @@ moveAnnex key src = ifM (checkSecureHashes key)
checkSecureHashes :: Key -> Annex Bool
checkSecureHashes key
| cryptographicallySecure (keyVariety key) = return True
| cryptographicallySecure (fromKey keyVariety key) = return True
| otherwise = ifM (annexSecureHashesOnly <$> Annex.getGitConfig)
( do
warning $ "annex.securehashesonly blocked adding " ++ decodeBS (formatKeyVariety (keyVariety key)) ++ " key to annex objects"
warning $ "annex.securehashesonly blocked adding " ++ decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " key to annex objects"
return False
, return True
)

View file

@ -100,7 +100,7 @@ preserveGitMode _ _ = return True
- when doing concurrent downloads.
-}
checkDiskSpace :: Maybe FilePath -> Key -> Integer -> Bool -> Annex Bool
checkDiskSpace destdir key = checkDiskSpace' (fromMaybe 1 (keySize key)) destdir key
checkDiskSpace destdir key = checkDiskSpace' (fromMaybe 1 (fromKey keySize key)) destdir key
{- Allows specifying the size of the key, if it's known, which is useful
- as not all keys know their size. -}

View file

@ -65,14 +65,14 @@ hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
hashDirLower :: HashLevels -> Hasher
hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5 $ serializeKey' $ nonChunkKey k
hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5s $ serializeKey' $ nonChunkKey k
{- This was originally using Data.Hash.MD5 from MissingH. This new version
- is faster, but ugly as it has to replicate the 4 Word32's that produced. -}
hashDirMixed :: HashLevels -> Hasher
hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $
encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $
Utility.Hash.md5 $ serializeKey' $ nonChunkKey k
Utility.Hash.md5s $ serializeKey' $ nonChunkKey k
where
encodeWord32 (b1:b2:b3:b4:rest) =
(shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1)

View file

@ -33,7 +33,7 @@ exportKey :: Git.Sha -> Annex ExportKey
exportKey sha = mk <$> catKey sha
where
mk (Just k) = AnnexKey k
mk Nothing = GitKey $ Key
mk Nothing = GitKey $ mkKey $ \k -> k
{ keyName = encodeBS $ Git.fromRef sha
, keyVariety = SHA1Key (HasExt False)
, keySize = Nothing

View file

@ -398,7 +398,7 @@ downloadImport remote importtreeconfig importablecontents = do
{- Temporary key used for import of a ContentIdentifier while downloading
- content, before generating its real key. -}
importKey :: ContentIdentifier -> Integer -> Key
importKey (ContentIdentifier cid) size = stubKey
importKey (ContentIdentifier cid) size = mkKey $ \k -> k
{ keyName = cid
, keyVariety = OtherKey "CID"
, keySize = Just size

View file

@ -93,7 +93,6 @@ module Annex.Locations (
import Data.Char
import Data.Default
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import Common
import Key
@ -563,7 +562,7 @@ keyFile = fromRawFilePath . keyFile'
keyFile' :: Key -> RawFilePath
keyFile' k =
let b = L.toStrict (serializeKey' k)
let b = serializeKey' k
in if any (`S8.elem` b) ['&', '%', ':', '/']
then S8.concatMap esc b
else b

View file

@ -40,15 +40,15 @@ import Data.Ord
upload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
upload u key f d a _witness = guardHaveUUID u $
runTransfer (Transfer Upload u key) f d a
runTransfer (Transfer Upload u (fromKey id key)) f d a
alwaysUpload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
alwaysUpload u key f d a _witness = guardHaveUUID u $
alwaysRunTransfer (Transfer Upload u key) f d a
alwaysRunTransfer (Transfer Upload u (fromKey id key)) f d a
download :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
download u key f d a _witness = guardHaveUUID u $
runTransfer (Transfer Download u key) f d a
runTransfer (Transfer Download u (fromKey id key)) f d a
guardHaveUUID :: Observable v => UUID -> Annex v -> Annex v
guardHaveUUID u a
@ -185,7 +185,7 @@ checkSecureHashes t a
, a
)
where
variety = keyVariety (transferKey t)
variety = fromKey keyVariety (transferKey t)
type RetryDecider = Annex (TransferInfo -> TransferInfo -> Annex Bool)

View file

@ -10,7 +10,7 @@ module Annex.VariantFile where
import Annex.Common
import Utility.Hash
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
variantMarker :: String
variantMarker = ".variant-"
@ -41,5 +41,5 @@ variantFile file key
where
doubleconflict = variantMarker `isInfixOf` file
shortHash :: L.ByteString -> String
shortHash = take 4 . show . md5
shortHash :: S.ByteString -> String
shortHash = take 4 . show . md5s

View file

@ -64,7 +64,7 @@ removableRemote urlrenderer uuid = do
where
queueremaining r k =
queueTransferWhenSmall "remaining object in unwanted remote"
(AssociatedFile Nothing) (Transfer Download uuid k) r
(AssociatedFile Nothing) (Transfer Download uuid (fromKey id k)) r
{- Scanning for keys can take a long time; do not tie up
- the Annex monad while doing it, so other threads continue to
- run. -}

View file

@ -186,7 +186,7 @@ genTransfer :: Direction -> Bool -> Key -> S.Set UUID -> Remote -> Maybe (Remote
genTransfer direction want key slocs r
| direction == Upload && Remote.readonly r = Nothing
| S.member (Remote.uuid r) slocs == want = Just
(r, Transfer direction (Remote.uuid r) key)
(r, Transfer direction (Remote.uuid r) (fromKey id key))
| otherwise = Nothing
remoteHas :: Remote -> Key -> Annex Bool

View file

@ -96,7 +96,7 @@ queueTransfersMatching matching reason schedule k f direction
inset s r = S.member (Remote.uuid r) s
gentransfer r = Transfer
{ transferDirection = direction
, transferKey = k
, transferKeyData = fromKey id k
, transferUUID = Remote.uuid r
}
defer
@ -129,7 +129,7 @@ queueDeferredDownloads reason schedule = do
where
gentransfer r = Transfer
{ transferDirection = Download
, transferKey = k
, transferKeyData = fromKey id k
, transferUUID = Remote.uuid r
}

View file

@ -62,7 +62,7 @@ describeUnused' whenbig = liftAnnex $ go =<< readUnusedLog ""
tenthused Nothing _ = False
tenthused (Just disksize) used = used >= disksize `div` 10
sumkeysize s k = s + fromMaybe 0 (keySize k)
sumkeysize s k = s + fromMaybe 0 (fromKey keySize k)
forpath a = inRepo $ liftIO . a . Git.repoPath

View file

@ -25,7 +25,6 @@ import Annex.Content
import Annex.UUID
import qualified Backend
import qualified Types.Backend
import qualified Types.Key
import Assistant.TransferQueue
import Assistant.TransferSlots
import Remote (remoteFromUUID)
@ -91,13 +90,13 @@ startDistributionDownload d = go =<< liftIO . newVersionLocation d =<< liftIO ol
maybe noop (queueTransfer "upgrade" Next (AssociatedFile (Just f)) t)
=<< liftAnnex (remoteFromUUID webUUID)
startTransfer t
k = distributionKey d
k = mkKey $ const $ distributionKey d
u = distributionUrl d
f = takeFileName u ++ " (for upgrade)"
t = Transfer
{ transferDirection = Download
, transferUUID = webUUID
, transferKey = k
, transferKeyData = fromKey id k
}
cleanup = liftAnnex $ do
lockContentForRemoval k removeAnnex
@ -117,8 +116,8 @@ distributionDownloadComplete d dest cleanup t
=<< liftAnnex (withObjectLoc k fsckit)
| otherwise = cleanup
where
k = distributionKey d
fsckit f = case Backend.maybeLookupBackendVariety (Types.Key.keyVariety k) of
k = mkKey $ const $ distributionKey d
fsckit f = case Backend.maybeLookupBackendVariety (fromKey keyVariety k) of
Nothing -> return $ Just f
Just b -> case Types.Backend.verifyKeyContent b of
Nothing -> return $ Just f

View file

@ -59,16 +59,18 @@ genKey source meterupdate preferredbackend = do
Just k -> Just (makesane k, b)
where
-- keyNames should not contain newline characters.
makesane k = k { keyName = S8.map fixbadchar (keyName k) }
makesane k = alterKey k $ \d -> d
{ keyName = S8.map fixbadchar (fromKey keyName k)
}
fixbadchar c
| c == '\n' = '_'
| otherwise = c
getBackend :: FilePath -> Key -> Annex (Maybe Backend)
getBackend file k = case maybeLookupBackendVariety (keyVariety k) of
getBackend file k = case maybeLookupBackendVariety (fromKey keyVariety k) of
Just backend -> return $ Just backend
Nothing -> do
warning $ "skipping " ++ file ++ " (unknown backend " ++ decodeBS (formatKeyVariety (keyVariety k)) ++ ")"
warning $ "skipping " ++ file ++ " (unknown backend " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ ")"
return Nothing
{- Looks up the backend that should be used for a file.
@ -95,4 +97,4 @@ varietyMap = M.fromList $ zip (map B.backendVariety list) list
isStableKey :: Key -> Bool
isStableKey k = maybe False (`B.isStableKey` k)
(maybeLookupBackendVariety (keyVariety k))
(maybeLookupBackendVariety (fromKey keyVariety k))

View file

@ -91,7 +91,7 @@ keyValue hash source meterupdate = do
let file = contentLocation source
filesize <- liftIO $ getFileSize file
s <- hashFile hash file meterupdate
return $ Just $ stubKey
return $ Just $ mkKey $ \k -> k
{ keyName = encodeBS s
, keyVariety = hashKeyVariety hash (HasExt False)
, keySize = Just filesize
@ -105,8 +105,8 @@ keyValueE hash source meterupdate =
addE k = do
maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig
let ext = selectExtension maxlen (keyFilename source)
return $ Just $ k
{ keyName = keyName k <> encodeBS ext
return $ Just $ alterKey k $ \d -> d
{ keyName = keyName d <> encodeBS ext
, keyVariety = hashKeyVariety hash (HasExt True)
}
@ -169,7 +169,7 @@ needsUpgrade :: Key -> Bool
needsUpgrade key = or
[ "\\" `S8.isPrefixOf` keyHash key
, any (not . validInExtension) (decodeBS $ snd $ splitKeyNameExtension key)
, not (hasExt (keyVariety key)) && keyHash key /= keyName key
, not (hasExt (fromKey keyVariety key)) && keyHash key /= fromKey keyName key
]
trivialMigrate :: Key -> Backend -> AssociatedFile -> Annex (Maybe Key)
@ -179,14 +179,14 @@ trivialMigrate oldkey newbackend afile = trivialMigrate' oldkey newbackend afile
trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Key
trivialMigrate' oldkey newbackend afile maxextlen
{- Fast migration from hashE to hash backend. -}
| migratable && hasExt oldvariety = Just $ oldkey
| migratable && hasExt oldvariety = Just $ alterKey oldkey $ \d -> d
{ keyName = keyHash oldkey
, keyVariety = newvariety
}
{- Fast migration from hash to hashE backend. -}
| migratable && hasExt newvariety = case afile of
AssociatedFile Nothing -> Nothing
AssociatedFile (Just file) -> Just $ oldkey
AssociatedFile (Just file) -> Just $ alterKey oldkey $ \d -> d
{ keyName = keyHash oldkey
<> encodeBS (selectExtension maxextlen file)
, keyVariety = newvariety
@ -195,14 +195,15 @@ trivialMigrate' oldkey newbackend afile maxextlen
- non-extension preserving key, with an extension
- in its keyName. -}
| newvariety == oldvariety && not (hasExt oldvariety) &&
keyHash oldkey /= keyName oldkey = Just $ oldkey
keyHash oldkey /= fromKey keyName oldkey =
Just $ alterKey oldkey $ \d -> d
{ keyName = keyHash oldkey
}
| otherwise = Nothing
where
migratable = oldvariety /= newvariety
&& sameExceptExt oldvariety newvariety
oldvariety = keyVariety oldkey
oldvariety = fromKey keyVariety oldkey
newvariety = backendVariety newbackend
hashFile :: Hash -> FilePath -> MeterUpdate -> Annex String
@ -294,5 +295,7 @@ testKeyBackend =
let b = genBackendE (SHA2Hash (HashSize 256))
in b { getKey = \ks p -> (fmap addE) <$> getKey b ks p }
where
addE k = k { keyName = keyName k <> longext }
addE k = alterKey k $ \d -> d
{ keyName = keyName d <> longext
}
longext = ".this-is-a-test-key"

View file

@ -32,7 +32,7 @@ backend = Backend
{- Every unique url has a corresponding key. -}
fromUrl :: String -> Maybe Integer -> Key
fromUrl url size = stubKey
fromUrl url size = mkKey $ \k -> k
{ keyName = genKeyName url
, keyVariety = URLKey
, keySize = size

View file

@ -39,7 +39,7 @@ keyValue source _ = do
stat <- liftIO $ getFileStatus f
sz <- liftIO $ getFileSize' f stat
relf <- getTopFilePath <$> inRepo (toTopFilePath $ keyFilename source)
return $ Just $ stubKey
return $ Just $ mkKey $ \k -> k
{ keyName = genKeyName relf
, keyVariety = WORMKey
, keySize = Just sz
@ -48,14 +48,14 @@ keyValue source _ = do
{- Old WORM keys could contain spaces, and can be upgraded to remove them. -}
needsUpgrade :: Key -> Bool
needsUpgrade key = ' ' `S8.elem` keyName key
needsUpgrade key = ' ' `S8.elem` fromKey keyName key
removeSpaces :: Key -> Backend -> AssociatedFile -> Annex (Maybe Key)
removeSpaces oldkey newbackend _
| migratable = return $ Just $ oldkey
{ keyName = encodeBS $ reSanitizeKeyName $ decodeBS $ keyName oldkey }
| migratable = return $ Just $ alterKey oldkey $ \d -> d
{ keyName = encodeBS $ reSanitizeKeyName $ decodeBS $ keyName d }
| otherwise = return Nothing
where
migratable = oldvariety == newvariety
oldvariety = keyVariety oldkey
oldvariety = fromKey keyVariety oldkey
newvariety = backendVariety newbackend

View file

@ -1,5 +1,9 @@
git-annex (7.20191115) UNRELEASED; urgency=medium
* Sped up many git-annex commands that operare on many files, by
avoiding reserialization of keys.
find is 7% faster; whereis is 3% faster; and git-annex get when
all files are already present is 5% faster
* Stop displaying rsync progress, and use git-annex's own progress display
for local-to-local repo transfers.
* git-lfs: The url provided to initremote/enableremote will now be

View file

@ -162,7 +162,7 @@ performRemote r o uri file sz = ifAnnexed file adduri geturi
adduri = addUrlChecked o loguri file (Remote.uuid r) checkexistssize
checkexistssize key = return $ case sz of
Nothing -> (True, True, loguri)
Just n -> (True, n == fromMaybe n (keySize key), loguri)
Just n -> (True, n == fromMaybe n (fromKey keySize key), loguri)
geturi = next $ isJust <$> downloadRemoteFile r (downloadOptions o) uri file sz
downloadRemoteFile :: Remote -> DownloadOptions -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key)
@ -218,7 +218,7 @@ performWeb o url file urlinfo = ifAnnexed file addurl geturl
addurl = addUrlChecked o url file webUUID $ \k ->
ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url)
( return (True, True, setDownloader url YoutubeDownloader)
, return (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k, url)
, return (Url.urlExists urlinfo, Url.urlSize urlinfo == fromKey keySize k, url)
)
{- Check that the url exists, and has the same size as the key,
@ -379,7 +379,9 @@ finishDownloadWith tmp u url file = do
{- Adds the url size to the Key. -}
addSizeUrlKey :: Url.UrlInfo -> Key -> Key
addSizeUrlKey urlinfo key = key { keySize = Url.urlSize urlinfo }
addSizeUrlKey urlinfo key = alterKey key $ \d -> d
{ keySize = Url.urlSize urlinfo
}
{- Adds worktree file to the repository. -}
addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()

View file

@ -87,14 +87,14 @@ showFormatted format unformatted vars =
keyVars :: Key -> [(String, String)]
keyVars key =
[ ("key", serializeKey key)
, ("backend", decodeBS $ formatKeyVariety $ keyVariety key)
, ("backend", decodeBS $ formatKeyVariety $ fromKey keyVariety key)
, ("bytesize", size show)
, ("humansize", size $ roughSize storageUnits True)
, ("keyname", decodeBS $ keyName key)
, ("keyname", decodeBS $ fromKey keyName key)
, ("hashdirlower", hashDirLower def key)
, ("hashdirmixed", hashDirMixed def key)
, ("mtime", whenavail show $ keyMtime key)
, ("mtime", whenavail show $ fromKey keyMtime key)
]
where
size c = whenavail c $ keySize key
size c = whenavail c $ fromKey keySize key
whenavail = maybe "unknown"

View file

@ -49,14 +49,14 @@ seekBatch fmt = batchInput fmt parse commandAction
parse s =
let (keyname, file) = separate (== ' ') s
in if not (null keyname) && not (null file)
then Right $ go file (mkKey keyname)
then Right $ go file (keyOpt keyname)
else Left "Expected pairs of key and filename"
go file key = starting "fromkey" (mkActionItem (key, file)) $
perform key file
start :: Bool -> (String, FilePath) -> CommandStart
start force (keyname, file) = do
let key = mkKey keyname
let key = keyOpt keyname
unless force $ do
inbackend <- inAnnex key
unless inbackend $ giveup $
@ -71,8 +71,8 @@ start force (keyname, file) = do
-- For example, "WORM--a:a" parses as an uri. To disambiguate, check
-- the uri scheme, to see if it looks like the prefix of a key. This relies
-- on key backend names never containing a ':'.
mkKey :: String -> Key
mkKey s = case parseURI s of
keyOpt :: String -> Key
keyOpt s = case parseURI s of
Just u | not (isKeyPrefix (uriScheme u)) ->
Backend.URL.fromUrl s Nothing
_ -> case deserializeKey s of

View file

@ -182,7 +182,7 @@ performRemote key afile backend numcopies remote =
startKey :: Maybe Remote -> Incremental -> (Key, ActionItem) -> NumCopies -> CommandStart
startKey from inc (key, ai) numcopies =
case Backend.maybeLookupBackendVariety (keyVariety key) of
case Backend.maybeLookupBackendVariety (fromKey keyVariety key) of
Nothing -> stop
Just backend -> runFsck inc ai key $
case from of
@ -244,9 +244,9 @@ verifyLocationLog key keystatus ai = do
- insecure hash is present. This should only be able to happen
- if the repository already contained the content before the
- config was set. -}
when (present && not (cryptographicallySecure (keyVariety key))) $
when (present && not (cryptographicallySecure (fromKey keyVariety key))) $
whenM (annexSecureHashesOnly <$> Annex.getGitConfig) $
warning $ "** Despite annex.securehashesonly being set, " ++ obj ++ " has content present in the annex using an insecure " ++ decodeBS (formatKeyVariety (keyVariety key)) ++ " key"
warning $ "** Despite annex.securehashesonly being set, " ++ obj ++ " has content present in the annex using an insecure " ++ decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " key"
verifyLocationLog' key ai present u (logChange key u)
@ -362,7 +362,7 @@ checkKeySizeRemote key remote ai localcopy =
checkKeySizeOr (badContentRemote remote localcopy) key localcopy ai
checkKeySizeOr :: (Key -> Annex String) -> Key -> FilePath -> ActionItem -> Annex Bool
checkKeySizeOr bad key file ai = case keySize key of
checkKeySizeOr bad key file ai = case fromKey keySize key of
Nothing -> return True
Just size -> do
size' <- liftIO $ getFileSize file
@ -396,7 +396,7 @@ checkKeyUpgrade backend key ai (AssociatedFile (Just file)) =
[ actionItemDesc ai
, ": Can be upgraded to an improved key format. "
, "You can do so by running: git annex migrate --backend="
, decodeBS (formatKeyVariety (keyVariety key)) ++ " "
, decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " "
, file
]
return True

View file

@ -50,23 +50,23 @@ import qualified Command.Unused
type Stat = StatState (Maybe (String, StatState String))
-- data about a set of keys
data KeyData = KeyData
data KeyInfo = KeyInfo
{ countKeys :: Integer
, sizeKeys :: Integer
, unknownSizeKeys :: Integer
, backendsKeys :: M.Map KeyVariety Integer
}
instance Sem.Semigroup KeyData where
a <> b = KeyData
instance Sem.Semigroup KeyInfo where
a <> b = KeyInfo
{ countKeys = countKeys a + countKeys b
, sizeKeys = sizeKeys a + sizeKeys b
, unknownSizeKeys = unknownSizeKeys a + unknownSizeKeys b
, backendsKeys = backendsKeys a <> backendsKeys b
}
instance Monoid KeyData where
mempty = KeyData 0 0 0 M.empty
instance Monoid KeyInfo where
mempty = KeyInfo 0 0 0 M.empty
data NumCopiesStats = NumCopiesStats
{ numCopiesVarianceMap :: M.Map Variance Integer
@ -82,9 +82,9 @@ instance Show Variance where
-- cached info that multiple Stats use
data StatInfo = StatInfo
{ presentData :: Maybe KeyData
, referencedData :: Maybe KeyData
, repoData :: M.Map UUID KeyData
{ presentData :: Maybe KeyInfo
, referencedData :: Maybe KeyInfo
, repoData :: M.Map UUID KeyInfo
, numCopiesStats :: Maybe NumCopiesStats
, infoOptions :: InfoOptions
}
@ -512,7 +512,7 @@ reposizes_total :: Stat
reposizes_total = simpleStat "combined size of repositories containing these files" $
showSizeKeys . mconcat . M.elems =<< cachedRepoData
cachedPresentData :: StatState KeyData
cachedPresentData :: StatState KeyInfo
cachedPresentData = do
s <- get
case presentData s of
@ -522,7 +522,7 @@ cachedPresentData = do
put s { presentData = Just v }
return v
cachedRemoteData :: UUID -> StatState KeyData
cachedRemoteData :: UUID -> StatState KeyInfo
cachedRemoteData u = do
s <- get
case M.lookup u (repoData s) of
@ -531,19 +531,19 @@ cachedRemoteData u = do
let combinedata d uk = finishCheck uk >>= \case
Nothing -> return d
Just k -> return $ addKey k d
v <- lift $ foldM combinedata emptyKeyData
v <- lift $ foldM combinedata emptyKeyInfo
=<< loggedKeysFor' u
put s { repoData = M.insert u v (repoData s) }
return v
cachedReferencedData :: StatState KeyData
cachedReferencedData :: StatState KeyInfo
cachedReferencedData = do
s <- get
case referencedData s of
Just v -> return v
Nothing -> do
!v <- lift $ Command.Unused.withKeysReferenced
emptyKeyData addKey
emptyKeyInfo addKey
put s { referencedData = Just v }
return v
@ -552,7 +552,7 @@ cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
cachedNumCopiesStats = numCopiesStats <$> get
-- currently only available for directory info
cachedRepoData :: StatState (M.Map UUID KeyData)
cachedRepoData :: StatState (M.Map UUID KeyInfo)
cachedRepoData = repoData <$> get
getDirStatInfo :: InfoOptions -> FilePath -> Annex StatInfo
@ -564,7 +564,7 @@ getDirStatInfo o dir = do
(update matcher fast)
return $ StatInfo (Just presentdata) (Just referenceddata) repodata (Just numcopiesstats) o
where
initial = (emptyKeyData, emptyKeyData, emptyNumCopiesStats, M.empty)
initial = (emptyKeyInfo, emptyKeyInfo, emptyNumCopiesStats, M.empty)
update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats, repodata) =
ifM (matcher $ MatchingFile $ FileInfo file file)
( do
@ -594,7 +594,7 @@ getTreeStatInfo o r = do
, return Nothing
)
where
initial = (emptyKeyData, emptyKeyData, M.empty)
initial = (emptyKeyInfo, emptyKeyInfo, M.empty)
go _ [] vs = return vs
go fast (l:ls) vs@(presentdata, referenceddata, repodata) = do
mk <- catKey (LsTree.sha l)
@ -613,33 +613,33 @@ getTreeStatInfo o r = do
return (updateRepoData key locs repodata)
go fast ls $! (presentdata', referenceddata', repodata')
emptyKeyData :: KeyData
emptyKeyData = KeyData 0 0 0 M.empty
emptyKeyInfo :: KeyInfo
emptyKeyInfo = KeyInfo 0 0 0 M.empty
emptyNumCopiesStats :: NumCopiesStats
emptyNumCopiesStats = NumCopiesStats M.empty
foldKeys :: [Key] -> KeyData
foldKeys = foldl' (flip addKey) emptyKeyData
foldKeys :: [Key] -> KeyInfo
foldKeys = foldl' (flip addKey) emptyKeyInfo
addKey :: Key -> KeyData -> KeyData
addKey key (KeyData count size unknownsize backends) =
KeyData count' size' unknownsize' backends'
addKey :: Key -> KeyInfo -> KeyInfo
addKey key (KeyInfo count size unknownsize backends) =
KeyInfo count' size' unknownsize' backends'
where
{- All calculations strict to avoid thunks when repeatedly
- applied to many keys. -}
!count' = count + 1
!backends' = M.insertWith (+) (keyVariety key) 1 backends
!backends' = M.insertWith (+) (fromKey keyVariety key) 1 backends
!size' = maybe size (+ size) ks
!unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
ks = keySize key
ks = fromKey keySize key
updateRepoData :: Key -> [UUID] -> M.Map UUID KeyData -> M.Map UUID KeyData
updateRepoData :: Key -> [UUID] -> M.Map UUID KeyInfo -> M.Map UUID KeyInfo
updateRepoData key locs m = m'
where
!m' = M.unionWith (\_old new -> new) m $
M.fromList $ zip locs (map update locs)
update loc = addKey key (fromMaybe emptyKeyData $ M.lookup loc m)
update loc = addKey key (fromMaybe emptyKeyInfo $ M.lookup loc m)
updateNumCopiesStats :: FilePath -> NumCopiesStats -> [UUID] -> Annex NumCopiesStats
updateNumCopiesStats file (NumCopiesStats m) locs = do
@ -649,7 +649,7 @@ updateNumCopiesStats file (NumCopiesStats m) locs = do
let !ret = NumCopiesStats m'
return ret
showSizeKeys :: KeyData -> StatState String
showSizeKeys :: KeyInfo -> StatState String
showSizeKeys d = do
sizer <- mkSizer
return $ total sizer ++ missingnote

View file

@ -67,7 +67,7 @@ optParser desc = MatchExpressionOptions
missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
-- When a key is provided, make its size also be provided.
addkeysize p = case providedKey p of
Right k -> case keySize k of
Right k -> case fromKey keySize k of
Just sz -> p { providedFileSize = Right sz }
Nothing -> p
Left _ -> p

View file

@ -50,7 +50,7 @@ start file key = do
- - Something has changed in the backend, such as a bug fix.
-}
upgradableKey :: Backend -> Key -> Bool
upgradableKey backend key = isNothing (keySize key) || backendupgradable
upgradableKey backend key = isNothing (fromKey keySize key) || backendupgradable
where
backendupgradable = maybe False (\a -> a key) (canUpgradeKey backend)

View file

@ -11,7 +11,7 @@ module Command.RegisterUrl where
import Command
import Logs.Web
import Command.FromKey (mkKey)
import Command.FromKey (keyOpt)
import qualified Remote
cmd :: Command
@ -41,7 +41,7 @@ seek o = case (batchOption o, keyUrlPairs o) of
start :: [String] -> CommandStart
start (keyname:url:[]) =
starting "registerurl" (ActionItemOther (Just url)) $ do
let key = mkKey keyname
let key = keyOpt keyname
perform key url
start _ = giveup "specify a key and an url"
@ -55,7 +55,7 @@ massAdd fmt = go True =<< map (separate (== ' ')) <$> batchLines fmt
where
go status [] = next $ return status
go status ((keyname,u):rest) | not (null keyname) && not (null u) = do
let key = mkKey keyname
let key = keyOpt keyname
ok <- perform' key u
let !status' = status && ok
go status' rest

View file

@ -49,7 +49,7 @@ fieldTransfer direction key a = do
afile <- AssociatedFile <$> Fields.getField Fields.associatedFile
ok <- maybe (a $ const noop)
-- Using noRetry here because we're the sender.
(\u -> runner (Transfer direction (toUUID u) key) afile noRetry a)
(\u -> runner (Transfer direction (toUUID u) (fromKey id key)) afile noRetry a)
=<< Fields.getField Fields.remoteUUID
liftIO $ debugM "fieldTransfer" "transfer done"
liftIO $ exitBool ok

View file

@ -21,11 +21,11 @@ seek = withWords (commandAction . start)
start :: [String] -> CommandStart
start (keyname:file:[]) = starting "setkey" (ActionItemOther (Just file)) $
perform file (mkKey keyname)
perform file (keyOpt keyname)
start _ = giveup "specify a key and a content file"
mkKey :: String -> Key
mkKey = fromMaybe (giveup "bad key") . deserializeKey
keyOpt :: String -> Key
keyOpt = fromMaybe (giveup "bad key") . deserializeKey
perform :: FilePath -> Key -> CommandPerform
perform file key = do

View file

@ -119,7 +119,7 @@ clean file = do
-- Look up the backend that was used for this file
-- before, so that when git re-cleans a file its
-- backend does not change.
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
let oldbackend = maybe Nothing (maybeLookupBackendVariety . fromKey keyVariety) oldkey
-- Can't restage associated files because git add
-- runs this and has the index locked.
let norestage = Restage False

View file

@ -107,14 +107,14 @@ perform rs unavailrs exportr ks = do
next $ cleanup rs ks ok
where
desc r' k = intercalate "; " $ map unwords
[ [ "key size", show (keySize k) ]
[ [ "key size", show (fromKey keySize k) ]
, [ show (getChunkConfig (Remote.config r')) ]
, ["encryption", fromMaybe "none" (M.lookup "encryption" (Remote.config r'))]
]
descexport k1 k2 = intercalate "; " $ map unwords
[ [ "exporttree=yes" ]
, [ "key1 size", show (keySize k1) ]
, [ "key2 size", show (keySize k2) ]
, [ "key1 size", show (fromKey keySize k1) ]
, [ "key2 size", show (fromKey keySize k2) ]
]
adjustChunkSize :: Remote -> Int -> Annex (Maybe Remote)
@ -199,7 +199,7 @@ test st r k = catMaybes
Annex.eval st (Annex.setOutput QuietOutput >> a) @? "failed"
present b = check ("present " ++ show b) $
(== Right b) <$> Remote.hasKey r k
fsck = case maybeLookupBackendVariety (keyVariety k) of
fsck = case maybeLookupBackendVariety (fromKey keyVariety k) of
Nothing -> return True
Just b -> case Backend.verifyKeyContent b of
Nothing -> return True

View file

@ -47,7 +47,7 @@ start (k:[]) = do
let t = Transfer
{ transferDirection = Upload
, transferUUID = u
, transferKey = key
, transferKeyData = fromKey id key
}
tinfo <- liftIO $ startTransferInfo afile
(update, tfile, createtfile, _) <- mkProgressUpdater t tinfo

View file

@ -161,7 +161,7 @@ type EncKey = Key -> Key
- reversable, nor does it need to be the same type of encryption used
- on content. It does need to be repeatable. -}
encryptKey :: Mac -> Cipher -> EncKey
encryptKey mac c k = stubKey
encryptKey mac c k = mkKey $ \d -> d
{ keyName = encodeBS (macWithCipher mac c (serializeKey k))
, keyVariety = OtherKey $
encryptedBackendNamePrefix <> encodeBS (showMac mac)
@ -171,7 +171,7 @@ encryptedBackendNamePrefix :: S.ByteString
encryptedBackendNamePrefix = "GPG"
isEncKey :: Key -> Bool
isEncKey k = case keyVariety k of
isEncKey k = case fromKey keyVariety k of
OtherKey s -> encryptedBackendNamePrefix `S.isPrefixOf` s
_ -> False

View file

@ -87,7 +87,7 @@ populateAssociatedFiles h num = do
H.flushDbQueue h
keyN :: Integer -> Key
keyN n = stubKey
keyN n = mkKey $ \k -> k
{ keyName = B8.pack $ "key" ++ show n
, keyVariety = OtherKey "BENCH"
}

137
Key.hs
View file

@ -8,10 +8,12 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Key (
Key(..),
Key,
KeyData(..),
AssociatedFile(..),
stubKey,
buildKey,
fromKey,
mkKey,
alterKey,
keyParser,
serializeKey,
serializeKey',
@ -28,13 +30,7 @@ module Key (
import qualified Data.Text as T
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Builder
import Data.ByteString.Builder.Extra
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import Foreign.C.Types
import Common
import Types.Key
@ -43,19 +39,11 @@ import Utility.Bloom
import Utility.Aeson
import qualified Utility.SimpleProtocol as Proto
stubKey :: Key
stubKey = Key
{ keyName = mempty
, keyVariety = OtherKey mempty
, keySize = Nothing
, keyMtime = Nothing
, keyChunkSize = Nothing
, keyChunkNum = Nothing
}
-- Gets the parent of a chunk key.
nonChunkKey :: Key -> Key
nonChunkKey k = k
nonChunkKey k
| fromKey keyChunkSize k == Nothing && fromKey keyChunkNum k == Nothing = k
| otherwise = alterKey k $ \d -> d
{ keyChunkSize = Nothing
, keyChunkNum = Nothing
}
@ -63,114 +51,25 @@ nonChunkKey k = k
-- Where a chunk key is offset within its parent.
chunkKeyOffset :: Key -> Maybe Integer
chunkKeyOffset k = (*)
<$> keyChunkSize k
<*> (pred <$> keyChunkNum k)
<$> fromKey keyChunkSize k
<*> (pred <$> fromKey keyChunkNum k)
isChunkKey :: Key -> Bool
isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k)
-- Checks if a string looks like at least the start of a key.
isKeyPrefix :: String -> Bool
isKeyPrefix s = [fieldSep, fieldSep] `isInfixOf` s
fieldSep :: Char
fieldSep = '-'
{- Builds a ByteString from a Key.
-
- The name field is always shown last, separated by doubled fieldSeps,
- and is the only field allowed to contain the fieldSep.
-}
buildKey :: Key -> Builder
buildKey k = byteString (formatKeyVariety (keyVariety k))
<> 's' ?: (integerDec <$> keySize k)
<> 'm' ?: (integerDec . (\(CTime t) -> fromIntegral t) <$> keyMtime k)
<> 'S' ?: (integerDec <$> keyChunkSize k)
<> 'C' ?: (integerDec <$> keyChunkNum k)
<> sepbefore (sepbefore (byteString (keyName k)))
where
sepbefore s = char7 fieldSep <> s
c ?: (Just b) = sepbefore (char7 c <> b)
_ ?: Nothing = mempty
isChunkKey k = isJust (fromKey keyChunkSize k) && isJust (fromKey keyChunkNum k)
serializeKey :: Key -> String
serializeKey = decodeBL' . serializeKey'
serializeKey = decodeBS' . serializeKey'
serializeKey' :: Key -> L.ByteString
serializeKey' = toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty . buildKey
{- This is a strict parser for security reasons; a key
- can contain only 4 fields, which all consist only of numbers.
- Any key containing other fields, or non-numeric data will fail
- to parse.
-
- If a key contained non-numeric fields, they could be used to
- embed data used in a SHA1 collision attack, which would be a
- problem since the keys are committed to git.
-}
keyParser :: A.Parser Key
keyParser = do
-- key variety cannot be empty
v <- (parseKeyVariety <$> A8.takeWhile1 (/= fieldSep))
s <- parsesize
m <- parsemtime
cs <- parsechunksize
cn <- parsechunknum
_ <- A8.char fieldSep
_ <- A8.char fieldSep
n <- A.takeByteString
if validKeyName v n
then return $ Key
{ keyName = n
, keyVariety = v
, keySize = s
, keyMtime = m
, keyChunkSize = cs
, keyChunkNum = cn
}
else fail "invalid keyName"
where
parseopt p = (Just <$> (A8.char fieldSep *> p)) <|> pure Nothing
parsesize = parseopt $ A8.char 's' *> A8.decimal
parsemtime = parseopt $ CTime <$> (A8.char 'm' *> A8.decimal)
parsechunksize = parseopt $ A8.char 'S' *> A8.decimal
parsechunknum = parseopt $ A8.char 'C' *> A8.decimal
serializeKey' :: Key -> S.ByteString
serializeKey' = keySerialization
deserializeKey :: String -> Maybe Key
deserializeKey = deserializeKey' . encodeBS'
deserializeKey' :: S.ByteString -> Maybe Key
deserializeKey' b = eitherToMaybe $ A.parseOnly keyParser b
deserializeKey' = either (const Nothing) Just . A.parseOnly keyParser
{- This splits any extension out of the keyName, returning the
- keyName minus extension, and the extension (including leading dot).
-}
splitKeyNameExtension :: Key -> (S.ByteString, S.ByteString)
splitKeyNameExtension = splitKeyNameExtension' . keyName
splitKeyNameExtension' :: S.ByteString -> (S.ByteString, S.ByteString)
splitKeyNameExtension' keyname = S8.span (/= '.') keyname
{- Limits the length of the extension in the keyName to mitigate against
- SHA1 collision attacks.
-
- In such an attack, the extension of the key could be made to contain
- the collision generation data, with the result that a signed git commit
- including such keys would not be secure.
-
- The maximum extension length ever generated for such a key was 8
- characters, but they may be unicode which could use up to 4 bytes each,
- so 32 bytes. 64 bytes is used here to give a little future wiggle-room.
- The SHA1 common-prefix attack needs 128 bytes of data.
-}
validKeyName :: KeyVariety -> S.ByteString -> Bool
validKeyName kv name
| hasExt kv =
let ext = snd $ splitKeyNameExtension' name
in S.length ext <= 64
| otherwise = True
instance Arbitrary Key where
instance Arbitrary KeyData where
arbitrary = Key
<$> (encodeBS <$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t"))
<*> (parseKeyVariety . encodeBS <$> (listOf1 $ elements ['A'..'Z'])) -- BACKEND
@ -179,6 +78,9 @@ instance Arbitrary Key where
<*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative
<*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative
instance Arbitrary Key where
arbitrary = mkKey . const <$> arbitrary
instance Hashable Key where
hashIO32 = hashIO32 . serializeKey'
hashIO64 = hashIO64 . serializeKey'
@ -196,3 +98,4 @@ instance Proto.Serializable Key where
prop_isomorphic_key_encode :: Key -> Bool
prop_isomorphic_key_encode k = Just k == (deserializeKey . serializeKey) k

View file

@ -294,7 +294,7 @@ addInBackend = addLimit . limitInBackend
limitInBackend :: MkLimit Annex
limitInBackend name = Right $ const $ checkKey check
where
check key = pure $ keyVariety key == variety
check key = pure $ fromKey keyVariety key == variety
variety = parseKeyVariety (encodeBS name)
{- Adds a limit to skip files not using a secure hash. -}
@ -302,7 +302,7 @@ addSecureHash :: Annex ()
addSecureHash = addLimit $ Right limitSecureHash
limitSecureHash :: MatchFiles Annex
limitSecureHash _ = checkKey $ pure . cryptographicallySecure . keyVariety
limitSecureHash _ = checkKey $ pure . cryptographicallySecure . fromKey keyVariety
{- Adds a limit to skip files that are too large or too small -}
addLargerThan :: String -> Annex ()
@ -327,7 +327,7 @@ limitSize lb vs s = case readSize dataUnits s of
go sz _ (MatchingInfo p) =
getInfo (providedFileSize p)
>>= \sz' -> return (Just sz' `vs` Just sz)
checkkey sz key = return $ keySize key `vs` Just sz
checkkey sz key = return $ fromKey keySize key `vs` Just sz
addMetaData :: String -> Annex ()
addMetaData = addLimit . limitMetaData

View file

@ -1,6 +1,6 @@
{- git-annex transfer information files and lock files
-
- Copyright 2012 Joey Hess <id@joeyh.name>
- Copyright 2012-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -41,12 +41,14 @@ describeTransfer t info = unwords
equivilantTransfer :: Transfer -> Transfer -> Bool
equivilantTransfer t1 t2
| transferDirection t1 == Download && transferDirection t2 == Download &&
transferKey t1 == transferKey t2 = True
transferKeyData t1 == transferKeyData t2 = True
| otherwise = t1 == t2
percentComplete :: Transfer -> TransferInfo -> Maybe Percentage
percentComplete (Transfer { transferKey = key }) info =
percentage <$> keySize key <*> Just (fromMaybe 0 $ bytesComplete info)
percentComplete t info =
percentage
<$> keySize (transferKeyData t)
<*> Just (fromMaybe 0 $ bytesComplete info)
{- Generates a callback that can be called as transfer progresses to update
- the transfer info file. Also returns the file it'll be updating,
@ -72,7 +74,7 @@ mkProgressUpdater t info = do
{- The minimum change in bytesComplete that is worth
- updating a transfer info file for is 1% of the total
- keySize, rounded down. -}
mindelta = case keySize (transferKey t) of
mindelta = case keySize (transferKeyData t) of
Just sz -> sz `div` 100
Nothing -> 100 * 1024 -- arbitrarily, 100 kb
@ -155,7 +157,7 @@ sizeOfDownloadsInProgress wanted = sum . map remaining
<$> getTransfers' [Download] wanted
where
remaining (t, info) =
case (keySize (transferKey t), bytesComplete info) of
case (fromKey keySize (transferKey t), bytesComplete info) of
(Just sz, Just done) -> sz - done
(Just sz, Nothing) -> sz
(Nothing, _) -> 0
@ -191,14 +193,14 @@ recordFailedTransfer t info = do
{- The transfer information file to use for a given Transfer. -}
transferFile :: Transfer -> Git.Repo -> FilePath
transferFile (Transfer direction u key) r = transferDir direction r
transferFile (Transfer direction u kd) r = transferDir direction r
</> filter (/= '/') (fromUUID u)
</> keyFile key
</> keyFile (mkKey (const kd))
{- The transfer information file to use to record a failed Transfer -}
failedTransferFile :: Transfer -> Git.Repo -> FilePath
failedTransferFile (Transfer direction u key) r = failedTransferDir u direction r
</> keyFile key
failedTransferFile (Transfer direction u kd) r = failedTransferDir u direction r
</> keyFile (mkKey (const kd))
{- The transfer lock file corresponding to a given transfer info file. -}
transferLockFile :: FilePath -> FilePath
@ -213,7 +215,7 @@ parseTransferFile file
[direction, u, key] -> Transfer
<$> parseDirection direction
<*> pure (toUUID u)
<*> fileKey key
<*> fmap (fromKey id) (fileKey key)
_ -> Nothing
where
bits = splitDirectories file

View file

@ -36,7 +36,7 @@ instance MeterSize FileSize where
getMeterSize = pure . Just
instance MeterSize Key where
getMeterSize = pure . keySize
getMeterSize = pure . fromKey keySize
instance MeterSize InodeCache where
getMeterSize = pure . Just . inodeCacheFileSize
@ -51,7 +51,7 @@ instance MeterSize KeySource where
data KeySizer = KeySizer Key (Annex (Maybe FilePath))
instance MeterSize KeySizer where
getMeterSize (KeySizer k getsrcfile) = case keySize k of
getMeterSize (KeySizer k getsrcfile) = case fromKey keySize k of
Just sz -> return (Just sz)
Nothing -> do
srcfile <- getsrcfile

View file

@ -258,7 +258,7 @@ downloadTorrentContent k u dest filenum p = do
, return False
)
where
download torrent tmpdir = ariaProgress (keySize k) p
download torrent tmpdir = ariaProgress (fromKey keySize k) p
[ Param $ "--select-file=" ++ show filenum
, File torrent
, Param "-d"

View file

@ -716,7 +716,7 @@ checkKeyUrl :: Git.Repo -> CheckPresent
checkKeyUrl r k = do
showChecking r
us <- getWebUrls k
anyM (\u -> withUrlOptions $ checkBoth u (keySize k)) us
anyM (\u -> withUrlOptions $ checkBoth u (fromKey keySize k)) us
getWebUrls :: Key -> Annex [URLString]
getWebUrls key = filter supported <$> getUrls key

View file

@ -101,10 +101,10 @@ newtype SafeKey = SafeKey Key
mkSafeKey :: Key -> Either String SafeKey
mkSafeKey k
| any isSpace (decodeBS $ keyName k) = Left $ concat
| any isSpace (decodeBS $ fromKey keyName k) = Left $ concat
[ "Sorry, this file cannot be stored on an external special remote because its key's name contains a space. "
, "To avoid this problem, you can run: git-annex migrate --backend="
, decodeBS (formatKeyVariety (keyVariety k))
, decodeBS (formatKeyVariety (fromKey keyVariety k))
, " and pass it the name of the file"
]
| otherwise = Right (SafeKey k)

View file

@ -367,7 +367,7 @@ inAnnex' repo rmt (State connpool duc _ _) key
checkhttp = do
showChecking repo
gc <- Annex.getGitConfig
ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls gc repo rmt key))
ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (fromKey keySize key) uo) (keyUrls gc repo rmt key))
( return True
, giveup "not found"
)
@ -511,7 +511,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _) key file dest meter
Nothing -> return (False, UnVerified)
Just (object, checksuccess) -> do
copier <- mkCopier hardlink st params
runTransfer (Transfer Download u key)
runTransfer (Transfer Download u (fromKey id key))
file stdRetry $ \p ->
metered (Just (combineMeterUpdate p meterupdate)) key $ \_ p' ->
copier object dest p' checksuccess
@ -647,7 +647,7 @@ copyToRemote' repo r st@(State connpool duc _ _) key file meterupdate
-- run copy from perspective of remote
onLocalFast repo r $ ifM (Annex.Content.inAnnex key)
( return True
, runTransfer (Transfer Download u key) file stdRetry $ \p -> do
, runTransfer (Transfer Download u (fromKey id key)) file stdRetry $ \p -> do
copier <- mkCopier hardlink st params
let verify = Annex.Content.RemoteVerify r
let rsp = RetrievalAllKeysSecure

View file

@ -344,10 +344,10 @@ sendTransferRequest req endpoint = do
LFS.ParseFailed err -> Left err
extractKeySha256 :: Key -> Maybe LFS.SHA256
extractKeySha256 k = case keyVariety k of
extractKeySha256 k = case fromKey keyVariety k of
SHA2Key (HashSize 256) (HasExt hasext)
| hasext -> eitherToMaybe $ E.decodeUtf8' (keyHash k)
| otherwise -> eitherToMaybe $ E.decodeUtf8' (keyName k)
| otherwise -> eitherToMaybe $ E.decodeUtf8' (fromKey keyName k)
_ -> Nothing
-- The size of an encrypted key is the size of the input data, but we need
@ -355,7 +355,7 @@ extractKeySha256 k = case keyVariety k of
extractKeySize :: Key -> Maybe Integer
extractKeySize k
| isEncKey k = Nothing
| otherwise = keySize k
| otherwise = fromKey keySize k
mkUploadRequest :: RemoteStateHandle -> Key -> FilePath -> Annex (LFS.TransferRequest, LFS.SHA256, Integer)
mkUploadRequest rs k content = case (extractKeySha256 k, extractKeySize k) of

View file

@ -117,7 +117,7 @@ prepareStore r = checkPrepare nonEmpty (byteStorer $ store r)
nonEmpty :: Key -> Annex Bool
nonEmpty k
| keySize k == Just 0 = do
| fromKey keySize k == Just 0 = do
warning "Cannot store empty files in Glacier."
return False
| otherwise = return True

View file

@ -68,8 +68,10 @@ newtype ChunkKeyStream = ChunkKeyStream [Key]
chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream
chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..]
where
mk chunknum = sizedk { keyChunkNum = Just chunknum }
sizedk = basek { keyChunkSize = Just (toInteger chunksize) }
mk chunknum = alterKey sizedk $ \d -> d
{ keyChunkNum = Just chunknum }
sizedk = alterKey basek $ \d -> d
{ keyChunkSize = Just (toInteger chunksize) }
nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream)
nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l)
@ -80,7 +82,7 @@ takeChunkKeyStream n (ChunkKeyStream l) = genericTake n l
-- Number of chunks already consumed from the stream.
numChunks :: ChunkKeyStream -> Integer
numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream
numChunks = pred . fromJust . fromKey keyChunkNum . fst . nextChunkKeyStream
{- Splits up the key's content into chunks, passing each chunk to
- the storer action, along with a corresponding chunk key and a
@ -173,7 +175,7 @@ seekResume
-> Annex (ChunkKeyStream, BytesProcessed)
seekResume h encryptor chunkkeys checker = do
sz <- liftIO (hFileSize h)
if sz <= fromMaybe 0 (keyChunkSize $ fst $ nextChunkKeyStream chunkkeys)
if sz <= fromMaybe 0 (fromKey keyChunkSize $ fst $ nextChunkKeyStream chunkkeys)
then return (chunkkeys, zeroBytesProcessed)
else check 0 chunkkeys sz
where
@ -193,7 +195,7 @@ seekResume h encryptor chunkkeys checker = do
return (cks, toBytesProcessed pos)
where
(k, cks') = nextChunkKeyStream cks
pos' = pos + fromMaybe 0 (keyChunkSize k)
pos' = pos + fromMaybe 0 (fromKey keyChunkSize k)
{- Removes all chunks of a key from a remote, by calling a remover
- action on each.
@ -208,7 +210,7 @@ removeChunks remover u chunkconfig encryptor k = do
ls <- chunkKeys u chunkconfig k
ok <- allM (remover . encryptor) (concat ls)
when ok $ do
let chunksizes = catMaybes $ map (keyChunkSize <=< headMaybe) ls
let chunksizes = catMaybes $ map (fromKey keyChunkSize <=< headMaybe) ls
forM_ chunksizes $ chunksRemoved u k . FixedSizeChunks . fromIntegral
return ok
@ -272,7 +274,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
bracketIO (maybe opennew openresume offset) hClose $ \h -> do
void $ tosink (Just h) p content
let sz = toBytesProcessed $
fromMaybe 0 $ keyChunkSize k
fromMaybe 0 $ fromKey keyChunkSize k
getrest p h sz sz ks
`catchNonAsync` unable
case v of
@ -333,7 +335,7 @@ setupResume :: [[Key]] -> Integer -> [[Key]]
setupResume ls currsize = map dropunneeded ls
where
dropunneeded [] = []
dropunneeded l@(k:_) = case keyChunkSize k of
dropunneeded l@(k:_) = case fromKey keyChunkSize k of
Just chunksize | chunksize > 0 ->
genericDrop (currsize `div` chunksize) l
_ -> l

View file

@ -324,7 +324,7 @@ adjustExportImport r rs = case M.lookup "exporttree" (config r) of
liftIO $ Export.getExportTree db k
retrieveKeyFileFromExport dbv k _af dest p = unVerified $
if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k))
if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (fromKey keyVariety k))
then do
locs <- getexportlocs dbv k
case locs of
@ -336,5 +336,5 @@ adjustExportImport r rs = case M.lookup "exporttree" (config r) of
return False
(l:_) -> retrieveExport (exportActions r) k l dest p
else do
warning $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (keyVariety k)) ++ " backend"
warning $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ " backend"
return False

View file

@ -347,7 +347,7 @@ checkKey hv r rs c info k = withS3Handle hv $ \case
Right us -> do
showChecking r
let check u = withUrlOptions $
Url.checkBoth u (keySize k)
Url.checkBoth u (fromKey keySize k)
anyM check us
checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool
@ -417,7 +417,7 @@ checkPresentExportS3 hv r info k loc = withS3Handle hv $ \case
Just h -> checkKeyHelper info h (Left (T.pack $ bucketExportLocation info loc))
Nothing -> case getPublicUrlMaker info of
Just geturl -> withUrlOptions $
Url.checkBoth (geturl $ bucketExportLocation info loc) (keySize k)
Url.checkBoth (geturl $ bucketExportLocation info loc) (fromKey keySize k)
Nothing -> do
warning $ needS3Creds (uuid r)
giveup "No S3 credentials configured"

View file

@ -117,7 +117,7 @@ checkKey' key us = firsthit us (Right False) $ \u -> do
case downloader of
YoutubeDownloader -> youtubeDlCheck u'
_ -> catchMsgIO $
Url.withUrlOptions $ Url.checkBoth u' (keySize key)
Url.withUrlOptions $ Url.checkBoth u' (fromKey keySize key)
where
firsthit [] miss _ = return miss
firsthit (u:rest) _ a = do

View file

@ -21,7 +21,9 @@ type GitAnnexVersion = String
data GitAnnexDistribution = GitAnnexDistribution
{ distributionUrl :: String
, distributionKey :: Key
, distributionKey :: KeyData
-- ^ This used to be a Key, but now KeyData serializes
-- to Key { ... }, so back-compat for Read and Show is preserved.
, distributionVersion :: GitAnnexVersion
, distributionReleasedate :: UTCTime
, distributionUrgentUpgrade :: Maybe GitAnnexVersion
@ -46,7 +48,7 @@ parseInfoFile s = case lines s of
formatGitAnnexDistribution :: GitAnnexDistribution -> String
formatGitAnnexDistribution d = unlines
[ distributionUrl d
, serializeKey (distributionKey d)
, serializeKey $ mkKey $ const $ distributionKey d
, distributionVersion d
, show (distributionReleasedate d)
, maybe "" show (distributionUrgentUpgrade d)
@ -56,7 +58,7 @@ parseGitAnnexDistribution :: String -> Maybe GitAnnexDistribution
parseGitAnnexDistribution s = case lines s of
(u:k:v:d:uu:_) -> GitAnnexDistribution
<$> pure u
<*> deserializeKey k
<*> fmap (fromKey id) (deserializeKey k)
<*> pure v
<*> readish d
<*> pure (readish uu)

View file

@ -7,19 +7,47 @@
{-# LANGUAGE OverloadedStrings, DeriveGeneric #-}
module Types.Key where
module Types.Key (
KeyData(..),
Key,
fromKey,
mkKey,
alterKey,
isKeyPrefix,
splitKeyNameExtension,
keyParser,
keySerialization,
AssociatedFile(..),
KeyVariety(..),
HasExt(..),
HashSize(..),
hasExt,
sameExceptExt,
cryptographicallySecure,
isVerifiable,
formatKeyVariety,
parseKeyVariety,
) where
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Builder
import Data.ByteString.Builder.Extra
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.List
import System.Posix.Types
import Foreign.C.Types
import Data.Monoid
import Control.Applicative
import GHC.Generics
import Control.DeepSeq
import Prelude
{- A Key has a unique name, which is derived from a particular backend,
- and may contain other optional metadata. -}
data Key = Key
data KeyData = Key
{ keyName :: S.ByteString
, keyVariety :: KeyVariety
, keySize :: Maybe Integer
@ -28,8 +56,149 @@ data Key = Key
, keyChunkNum :: Maybe Integer
} deriving (Eq, Ord, Read, Show, Generic)
instance NFData KeyData
{- Caching the seralization of a key is an optimization.
-
- This constructor is not exported, and all smart constructors maintain
- the serialization.
-}
data Key = MkKey
{ keyData :: KeyData
, keySerialization :: S.ByteString
} deriving (Show, Generic)
instance Eq Key where
-- comparing the serialization would be unncessary work
a == b = keyData a == keyData b
instance Ord Key where
compare a b = compare (keyData a) (keyData b)
instance NFData Key
{- Access a field of data from the KeyData. -}
{-# INLINE fromKey #-}
fromKey :: (KeyData -> a) -> Key -> a
fromKey f = f . keyData
{- Smart constructor for a Key. The provided KeyData has all values empty. -}
mkKey :: (KeyData -> KeyData) -> Key
mkKey f =
let d = f stub
in MkKey d (mkKeySerialization d)
where
stub = Key
{ keyName = mempty
, keyVariety = OtherKey mempty
, keySize = Nothing
, keyMtime = Nothing
, keyChunkSize = Nothing
, keyChunkNum = Nothing
}
{- Alter a Key's data. -}
alterKey :: Key -> (KeyData -> KeyData) -> Key
alterKey k f =
let d = f (keyData k)
in MkKey d (mkKeySerialization d)
-- Checks if a string looks like at least the start of a key.
isKeyPrefix :: String -> Bool
isKeyPrefix s = [fieldSep, fieldSep] `isInfixOf` s
fieldSep :: Char
fieldSep = '-'
mkKeySerialization :: KeyData -> S.ByteString
mkKeySerialization = L.toStrict
. toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty
. buildKeyData
{- Builds a ByteString from a KeyData.
-
- The name field is always shown last, separated by doubled fieldSeps,
- and is the only field allowed to contain the fieldSep.
-}
buildKeyData :: KeyData -> Builder
buildKeyData k = byteString (formatKeyVariety (keyVariety k))
<> 's' ?: (integerDec <$> keySize k)
<> 'm' ?: (integerDec . (\(CTime t) -> fromIntegral t) <$> keyMtime k)
<> 'S' ?: (integerDec <$> keyChunkSize k)
<> 'C' ?: (integerDec <$> keyChunkNum k)
<> sepbefore (sepbefore (byteString (keyName k)))
where
sepbefore s = char7 fieldSep <> s
c ?: (Just b) = sepbefore (char7 c <> b)
_ ?: Nothing = mempty
{- This is a strict parser for security reasons; a key
- can contain only 4 fields, which all consist only of numbers.
- Any key containing other fields, or non-numeric data will fail
- to parse.
-
- If a key contained non-numeric fields, they could be used to
- embed data used in a SHA1 collision attack, which would be a
- problem since the keys are committed to git.
-}
keyParser :: A.Parser Key
keyParser = do
-- key variety cannot be empty
v <- (parseKeyVariety <$> A8.takeWhile1 (/= fieldSep))
s <- parsesize
m <- parsemtime
cs <- parsechunksize
cn <- parsechunknum
_ <- A8.char fieldSep
_ <- A8.char fieldSep
n <- A.takeByteString
if validKeyName v n
then
let d = Key
{ keyName = n
, keyVariety = v
, keySize = s
, keyMtime = m
, keyChunkSize = cs
, keyChunkNum = cn
}
in pure $ MkKey d (mkKeySerialization d)
else fail "invalid keyName"
where
parseopt p = (Just <$> (A8.char fieldSep *> p)) <|> pure Nothing
parsesize = parseopt $ A8.char 's' *> A8.decimal
parsemtime = parseopt $ CTime <$> (A8.char 'm' *> A8.decimal)
parsechunksize = parseopt $ A8.char 'S' *> A8.decimal
parsechunknum = parseopt $ A8.char 'C' *> A8.decimal
{- Limits the length of the extension in the keyName to mitigate against
- SHA1 collision attacks.
-
- In such an attack, the extension of the key could be made to contain
- the collision generation data, with the result that a signed git commit
- including such keys would not be secure.
-
- The maximum extension length ever generated for such a key was 8
- characters, but they may be unicode which could use up to 4 bytes each,
- so 32 bytes. 64 bytes is used here to give a little future wiggle-room.
- The SHA1 common-prefix attack needs 128 bytes of data.
-}
validKeyName :: KeyVariety -> S.ByteString -> Bool
validKeyName kv name
| hasExt kv =
let ext = snd $ splitKeyNameExtension' name
in S.length ext <= 64
| otherwise = True
{- This splits any extension out of the keyName, returning the
- keyName minus extension, and the extension (including leading dot).
-}
splitKeyNameExtension :: Key -> (S.ByteString, S.ByteString)
splitKeyNameExtension = splitKeyNameExtension' . keyName . keyData
splitKeyNameExtension' :: S.ByteString -> (S.ByteString, S.ByteString)
splitKeyNameExtension' keyname = S8.span (/= '.') keyname
{- A filename may be associated with a Key. -}
newtype AssociatedFile = AssociatedFile (Maybe FilePath)
deriving (Show, Eq, Ord)

View file

@ -11,6 +11,7 @@ module Types.Transfer where
import Types
import Types.Remote (Verification(..))
import Types.Key
import Utility.PID
import Utility.QuickCheck
import Utility.Url
@ -24,9 +25,12 @@ import Prelude
data Transfer = Transfer
{ transferDirection :: Direction
, transferUUID :: UUID
, transferKey :: Key
, transferKeyData :: KeyData
}
deriving (Eq, Ord, Read, Show)
deriving (Eq, Ord, Show, Read)
transferKey :: Transfer -> Key
transferKey = mkKey . const . transferKeyData
{- Information about a Transfer, stored in the transfer information file.
-

View file

@ -134,7 +134,7 @@ oldlog2key l
where
len = length l - 4
k = readKey1 (take len l)
sane = (not . S.null $ keyName k) && (not . S.null $ formatKeyVariety $ keyVariety k)
sane = (not . S.null $ fromKey keyName k) && (not . S.null $ formatKeyVariety $ fromKey keyVariety k)
-- WORM backend keys: "WORM:mtime:size:filename"
-- all the rest: "backend:key"
@ -145,7 +145,7 @@ oldlog2key l
readKey1 :: String -> Key
readKey1 v
| mixup = fromJust $ deserializeKey $ intercalate ":" $ Prelude.tail bits
| otherwise = stubKey
| otherwise = mkKey $ \d -> d
{ keyName = encodeBS n
, keyVariety = parseKeyVariety (encodeBS b)
, keySize = s
@ -165,12 +165,16 @@ readKey1 v
mixup = wormy && isUpper (Prelude.head $ bits !! 1)
showKey1 :: Key -> String
showKey1 Key { keyName = n , keyVariety = v, keySize = s, keyMtime = t } =
intercalate ":" $ filter (not . null) [b, showifhere t, showifhere s, decodeBS n]
showKey1 k = intercalate ":" $ filter (not . null)
[b, showifhere t, showifhere s, decodeBS n]
where
showifhere Nothing = ""
showifhere (Just x) = show x
b = decodeBS $ formatKeyVariety v
n = fromKey keyName k
v = fromKey keyVariety k
s = fromKey keySize k
t = fromKey keyMtime k
keyFile1 :: Key -> FilePath
keyFile1 key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ showKey1 key
@ -194,7 +198,7 @@ lookupFile1 file = do
Right l -> makekey l
where
getsymlink = takeFileName <$> readSymbolicLink file
makekey l = case maybeLookupBackendVariety (keyVariety k) of
makekey l = case maybeLookupBackendVariety (fromKey keyVariety k) of
Nothing -> do
unless (null kname || null bname ||
not (isLinkToAnnex (toRawFilePath l))) $
@ -203,8 +207,8 @@ lookupFile1 file = do
Just backend -> return $ Just (k, backend)
where
k = fileKey1 l
bname = decodeBS (formatKeyVariety (keyVariety k))
kname = decodeBS (keyName k)
bname = decodeBS (formatKeyVariety (fromKey keyVariety k))
kname = decodeBS (fromKey keyName k)
skip = "skipping " ++ file ++
" (unknown backend " ++ bname ++ ")"

View file

@ -24,6 +24,7 @@ module Utility.Hash (
blake2b_512,
blake2bp_512,
md5,
md5s,
prop_hashes_stable,
Mac(..),
calcMac,
@ -106,6 +107,9 @@ blake2bp_512 = hashlazy
md5 :: L.ByteString -> Digest MD5
md5 = hashlazy
md5s :: S.ByteString -> Digest MD5
md5s = hash
{- Check that all the hashes continue to hash the same. -}
prop_hashes_stable :: Bool
prop_hashes_stable = all (\(hasher, result) -> hasher foo == result)

View file

@ -15,7 +15,7 @@
<small>
<a href="@{EditRepositoryR $ RepoUUID $ transferUUID transfer}">
#{maybe "unknown" Remote.name $ transferRemote info}
$with size <- maybe "unknown" (roughSize dataUnits True) $ keySize $ transferKey transfer
$with size <- maybe "unknown" (roughSize dataUnits True) $ keySize $ transferKeyData transfer
$if isJust $ startedTime info
$if isrunning info
<span .pull-right><b>#{percent} of #{size}</b>