From 81d402216d955260fd4744ab73031b4c693d6254 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 22 Nov 2019 16:24:04 -0400 Subject: [PATCH] cache the serialization of a Key This will speed up the common case where a Key is deserialized from disk, but is then serialized to build eg, the path to the annex object. Previously attempted in 4536c93bb2ecf114ab711beac33fa358facd6985 and reverted in 96aba8eff7597898f53bfbab5865ff30a927d355. The problems mentioned in the latter commit are addressed now: Read/Show of KeyData is backwards-compatible with Read/Show of Key from before this change, so Types.Distribution will keep working. The Eq instance is fixed. Also, Key has smart constructors, avoiding needing to remember to update the cached serialization. Used git-annex benchmark: find is 7% faster whereis is 3% faster get when all files are already present is 5% faster Generally, the benchmarks are running 0.1 seconds faster per 2000 files, on a ram disk in my laptop. --- Annex/Content.hs | 14 +-- Annex/Content/LowLevel.hs | 2 +- Annex/DirHashes.hs | 4 +- Annex/Export.hs | 2 +- Annex/Import.hs | 2 +- Annex/Locations.hs | 3 +- Annex/Transfer.hs | 8 +- Annex/VariantFile.hs | 6 +- Assistant/DeleteRemote.hs | 2 +- Assistant/Threads/TransferScanner.hs | 2 +- Assistant/TransferQueue.hs | 4 +- Assistant/Unused.hs | 2 +- Assistant/Upgrade.hs | 9 +- Backend.hs | 10 +- Backend/Hash.hs | 25 ++-- Backend/URL.hs | 2 +- Backend/WORM.hs | 10 +- CHANGELOG | 4 + Command/AddUrl.hs | 8 +- Command/Find.hs | 8 +- Command/FromKey.hs | 8 +- Command/Fsck.hs | 10 +- Command/Info.hs | 56 ++++----- Command/MatchExpression.hs | 2 +- Command/Migrate.hs | 2 +- Command/RegisterUrl.hs | 6 +- Command/SendKey.hs | 2 +- Command/SetKey.hs | 6 +- Command/Smudge.hs | 2 +- Command/TestRemote.hs | 8 +- Command/TransferInfo.hs | 2 +- Crypto.hs | 4 +- Database/Benchmark.hs | 2 +- Key.hs | 143 ++++------------------ Limit.hs | 6 +- Logs/Transfer.hs | 24 ++-- Messages/Progress.hs | 4 +- Remote/BitTorrent.hs | 2 +- Remote/External.hs | 2 +- Remote/External/Types.hs | 4 +- Remote/Git.hs | 6 +- Remote/GitLFS.hs | 6 +- Remote/Glacier.hs | 2 +- Remote/Helper/Chunked.hs | 18 +-- Remote/Helper/ExportImport.hs | 4 +- Remote/S3.hs | 4 +- Remote/Web.hs | 2 +- Types/Distribution.hs | 8 +- Types/Key.hs | 173 ++++++++++++++++++++++++++- Types/Transfer.hs | 8 +- Upgrade/V1.hs | 18 +-- Utility/Hash.hs | 4 + templates/dashboard/transfers.hamlet | 2 +- 53 files changed, 388 insertions(+), 289 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 43fc3238c6..040914bb73 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -329,7 +329,7 @@ getViaTmpFromDisk rsp v key action = checkallowed $ do checkallowed a = case rsp of RetrievalAllKeysSecure -> a RetrievalVerifiableKeysSecure - | isVerifiable (keyVariety key) -> a + | isVerifiable (fromKey keyVariety key) -> a | otherwise -> ifM (annexAllowUnverifiedDownloads <$> Annex.getGitConfig) ( a , warnUnverifiableInsecure key >> return False @@ -353,7 +353,7 @@ verifyKeyContent :: RetrievalSecurityPolicy -> VerifyConfig -> Verification -> K verifyKeyContent rsp v verification k f = case (rsp, verification) of (_, Verified) -> return True (RetrievalVerifiableKeysSecure, _) - | isVerifiable (keyVariety k) -> verify + | isVerifiable (fromKey keyVariety k) -> verify | otherwise -> ifM (annexAllowUnverifiedDownloads <$> Annex.getGitConfig) ( verify , warnUnverifiableInsecure k >> return False @@ -365,12 +365,12 @@ verifyKeyContent rsp v verification k f = case (rsp, verification) of (_, MustVerify) -> verify where verify = enteringStage VerifyStage $ verifysize <&&> verifycontent - verifysize = case keySize k of + verifysize = case fromKey keySize k of Nothing -> return True Just size -> do size' <- liftIO $ catchDefaultIO 0 $ getFileSize f return (size' == size) - verifycontent = case Types.Backend.verifyKeyContent =<< Backend.maybeLookupBackendVariety (keyVariety k) of + verifycontent = case Types.Backend.verifyKeyContent =<< Backend.maybeLookupBackendVariety (fromKey keyVariety k) of Nothing -> return True Just verifier -> verifier k f @@ -382,7 +382,7 @@ warnUnverifiableInsecure k = warning $ unwords , "this safety check.)" ] where - kv = decodeBS (formatKeyVariety (keyVariety k)) + kv = decodeBS (formatKeyVariety (fromKey keyVariety k)) data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify @@ -490,10 +490,10 @@ moveAnnex key src = ifM (checkSecureHashes key) checkSecureHashes :: Key -> Annex Bool checkSecureHashes key - | cryptographicallySecure (keyVariety key) = return True + | cryptographicallySecure (fromKey keyVariety key) = return True | otherwise = ifM (annexSecureHashesOnly <$> Annex.getGitConfig) ( do - warning $ "annex.securehashesonly blocked adding " ++ decodeBS (formatKeyVariety (keyVariety key)) ++ " key to annex objects" + warning $ "annex.securehashesonly blocked adding " ++ decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " key to annex objects" return False , return True ) diff --git a/Annex/Content/LowLevel.hs b/Annex/Content/LowLevel.hs index 599ff7d4ff..546e647def 100644 --- a/Annex/Content/LowLevel.hs +++ b/Annex/Content/LowLevel.hs @@ -100,7 +100,7 @@ preserveGitMode _ _ = return True - when doing concurrent downloads. -} checkDiskSpace :: Maybe FilePath -> Key -> Integer -> Bool -> Annex Bool -checkDiskSpace destdir key = checkDiskSpace' (fromMaybe 1 (keySize key)) destdir key +checkDiskSpace destdir key = checkDiskSpace' (fromMaybe 1 (fromKey keySize key)) destdir key {- Allows specifying the size of the key, if it's known, which is useful - as not all keys know their size. -} diff --git a/Annex/DirHashes.hs b/Annex/DirHashes.hs index 76e8bf7981..1fb0073826 100644 --- a/Annex/DirHashes.hs +++ b/Annex/DirHashes.hs @@ -65,14 +65,14 @@ hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s hashDirs _ sz s = addTrailingPathSeparator $ take sz s drop sz s hashDirLower :: HashLevels -> Hasher -hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5 $ serializeKey' $ nonChunkKey k +hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5s $ serializeKey' $ nonChunkKey k {- This was originally using Data.Hash.MD5 from MissingH. This new version - is faster, but ugly as it has to replicate the 4 Word32's that produced. -} hashDirMixed :: HashLevels -> Hasher hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $ encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $ - Utility.Hash.md5 $ serializeKey' $ nonChunkKey k + Utility.Hash.md5s $ serializeKey' $ nonChunkKey k where encodeWord32 (b1:b2:b3:b4:rest) = (shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1) diff --git a/Annex/Export.hs b/Annex/Export.hs index a72d5d0dbb..16786476de 100644 --- a/Annex/Export.hs +++ b/Annex/Export.hs @@ -33,7 +33,7 @@ exportKey :: Git.Sha -> Annex ExportKey exportKey sha = mk <$> catKey sha where mk (Just k) = AnnexKey k - mk Nothing = GitKey $ Key + mk Nothing = GitKey $ mkKey $ \k -> k { keyName = encodeBS $ Git.fromRef sha , keyVariety = SHA1Key (HasExt False) , keySize = Nothing diff --git a/Annex/Import.hs b/Annex/Import.hs index 9a939937af..7438a7794c 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -398,7 +398,7 @@ downloadImport remote importtreeconfig importablecontents = do {- Temporary key used for import of a ContentIdentifier while downloading - content, before generating its real key. -} importKey :: ContentIdentifier -> Integer -> Key -importKey (ContentIdentifier cid) size = stubKey +importKey (ContentIdentifier cid) size = mkKey $ \k -> k { keyName = cid , keyVariety = OtherKey "CID" , keySize = Just size diff --git a/Annex/Locations.hs b/Annex/Locations.hs index 96ab104c54..ac993ce387 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -93,7 +93,6 @@ module Annex.Locations ( import Data.Char import Data.Default import qualified Data.ByteString.Char8 as S8 -import qualified Data.ByteString.Lazy as L import Common import Key @@ -563,7 +562,7 @@ keyFile = fromRawFilePath . keyFile' keyFile' :: Key -> RawFilePath keyFile' k = - let b = L.toStrict (serializeKey' k) + let b = serializeKey' k in if any (`S8.elem` b) ['&', '%', ':', '/'] then S8.concatMap esc b else b diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index 909bd22dae..193adf857a 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -40,15 +40,15 @@ import Data.Ord upload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v upload u key f d a _witness = guardHaveUUID u $ - runTransfer (Transfer Upload u key) f d a + runTransfer (Transfer Upload u (fromKey id key)) f d a alwaysUpload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v alwaysUpload u key f d a _witness = guardHaveUUID u $ - alwaysRunTransfer (Transfer Upload u key) f d a + alwaysRunTransfer (Transfer Upload u (fromKey id key)) f d a download :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v download u key f d a _witness = guardHaveUUID u $ - runTransfer (Transfer Download u key) f d a + runTransfer (Transfer Download u (fromKey id key)) f d a guardHaveUUID :: Observable v => UUID -> Annex v -> Annex v guardHaveUUID u a @@ -185,7 +185,7 @@ checkSecureHashes t a , a ) where - variety = keyVariety (transferKey t) + variety = fromKey keyVariety (transferKey t) type RetryDecider = Annex (TransferInfo -> TransferInfo -> Annex Bool) diff --git a/Annex/VariantFile.hs b/Annex/VariantFile.hs index 58a6b4b634..65f989ebae 100644 --- a/Annex/VariantFile.hs +++ b/Annex/VariantFile.hs @@ -10,7 +10,7 @@ module Annex.VariantFile where import Annex.Common import Utility.Hash -import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as S variantMarker :: String variantMarker = ".variant-" @@ -41,5 +41,5 @@ variantFile file key where doubleconflict = variantMarker `isInfixOf` file -shortHash :: L.ByteString -> String -shortHash = take 4 . show . md5 +shortHash :: S.ByteString -> String +shortHash = take 4 . show . md5s diff --git a/Assistant/DeleteRemote.hs b/Assistant/DeleteRemote.hs index 99560d7afc..c7cf807831 100644 --- a/Assistant/DeleteRemote.hs +++ b/Assistant/DeleteRemote.hs @@ -64,7 +64,7 @@ removableRemote urlrenderer uuid = do where queueremaining r k = queueTransferWhenSmall "remaining object in unwanted remote" - (AssociatedFile Nothing) (Transfer Download uuid k) r + (AssociatedFile Nothing) (Transfer Download uuid (fromKey id k)) r {- Scanning for keys can take a long time; do not tie up - the Annex monad while doing it, so other threads continue to - run. -} diff --git a/Assistant/Threads/TransferScanner.hs b/Assistant/Threads/TransferScanner.hs index c6fc97fad0..71d7dd0462 100644 --- a/Assistant/Threads/TransferScanner.hs +++ b/Assistant/Threads/TransferScanner.hs @@ -186,7 +186,7 @@ genTransfer :: Direction -> Bool -> Key -> S.Set UUID -> Remote -> Maybe (Remote genTransfer direction want key slocs r | direction == Upload && Remote.readonly r = Nothing | S.member (Remote.uuid r) slocs == want = Just - (r, Transfer direction (Remote.uuid r) key) + (r, Transfer direction (Remote.uuid r) (fromKey id key)) | otherwise = Nothing remoteHas :: Remote -> Key -> Annex Bool diff --git a/Assistant/TransferQueue.hs b/Assistant/TransferQueue.hs index 43c1cf29b9..c852615ed1 100644 --- a/Assistant/TransferQueue.hs +++ b/Assistant/TransferQueue.hs @@ -96,7 +96,7 @@ queueTransfersMatching matching reason schedule k f direction inset s r = S.member (Remote.uuid r) s gentransfer r = Transfer { transferDirection = direction - , transferKey = k + , transferKeyData = fromKey id k , transferUUID = Remote.uuid r } defer @@ -129,7 +129,7 @@ queueDeferredDownloads reason schedule = do where gentransfer r = Transfer { transferDirection = Download - , transferKey = k + , transferKeyData = fromKey id k , transferUUID = Remote.uuid r } diff --git a/Assistant/Unused.hs b/Assistant/Unused.hs index d9c17e7a2c..da73f77abd 100644 --- a/Assistant/Unused.hs +++ b/Assistant/Unused.hs @@ -62,7 +62,7 @@ describeUnused' whenbig = liftAnnex $ go =<< readUnusedLog "" tenthused Nothing _ = False tenthused (Just disksize) used = used >= disksize `div` 10 - sumkeysize s k = s + fromMaybe 0 (keySize k) + sumkeysize s k = s + fromMaybe 0 (fromKey keySize k) forpath a = inRepo $ liftIO . a . Git.repoPath diff --git a/Assistant/Upgrade.hs b/Assistant/Upgrade.hs index e46ac86ced..53eeac3222 100644 --- a/Assistant/Upgrade.hs +++ b/Assistant/Upgrade.hs @@ -25,7 +25,6 @@ import Annex.Content import Annex.UUID import qualified Backend import qualified Types.Backend -import qualified Types.Key import Assistant.TransferQueue import Assistant.TransferSlots import Remote (remoteFromUUID) @@ -91,13 +90,13 @@ startDistributionDownload d = go =<< liftIO . newVersionLocation d =<< liftIO ol maybe noop (queueTransfer "upgrade" Next (AssociatedFile (Just f)) t) =<< liftAnnex (remoteFromUUID webUUID) startTransfer t - k = distributionKey d + k = mkKey $ const $ distributionKey d u = distributionUrl d f = takeFileName u ++ " (for upgrade)" t = Transfer { transferDirection = Download , transferUUID = webUUID - , transferKey = k + , transferKeyData = fromKey id k } cleanup = liftAnnex $ do lockContentForRemoval k removeAnnex @@ -117,8 +116,8 @@ distributionDownloadComplete d dest cleanup t =<< liftAnnex (withObjectLoc k fsckit) | otherwise = cleanup where - k = distributionKey d - fsckit f = case Backend.maybeLookupBackendVariety (Types.Key.keyVariety k) of + k = mkKey $ const $ distributionKey d + fsckit f = case Backend.maybeLookupBackendVariety (fromKey keyVariety k) of Nothing -> return $ Just f Just b -> case Types.Backend.verifyKeyContent b of Nothing -> return $ Just f diff --git a/Backend.hs b/Backend.hs index 2b2962ff90..9a0abf7290 100644 --- a/Backend.hs +++ b/Backend.hs @@ -59,16 +59,18 @@ genKey source meterupdate preferredbackend = do Just k -> Just (makesane k, b) where -- keyNames should not contain newline characters. - makesane k = k { keyName = S8.map fixbadchar (keyName k) } + makesane k = alterKey k $ \d -> d + { keyName = S8.map fixbadchar (fromKey keyName k) + } fixbadchar c | c == '\n' = '_' | otherwise = c getBackend :: FilePath -> Key -> Annex (Maybe Backend) -getBackend file k = case maybeLookupBackendVariety (keyVariety k) of +getBackend file k = case maybeLookupBackendVariety (fromKey keyVariety k) of Just backend -> return $ Just backend Nothing -> do - warning $ "skipping " ++ file ++ " (unknown backend " ++ decodeBS (formatKeyVariety (keyVariety k)) ++ ")" + warning $ "skipping " ++ file ++ " (unknown backend " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ ")" return Nothing {- Looks up the backend that should be used for a file. @@ -95,4 +97,4 @@ varietyMap = M.fromList $ zip (map B.backendVariety list) list isStableKey :: Key -> Bool isStableKey k = maybe False (`B.isStableKey` k) - (maybeLookupBackendVariety (keyVariety k)) + (maybeLookupBackendVariety (fromKey keyVariety k)) diff --git a/Backend/Hash.hs b/Backend/Hash.hs index 6cac6e3718..c91f175772 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -91,7 +91,7 @@ keyValue hash source meterupdate = do let file = contentLocation source filesize <- liftIO $ getFileSize file s <- hashFile hash file meterupdate - return $ Just $ stubKey + return $ Just $ mkKey $ \k -> k { keyName = encodeBS s , keyVariety = hashKeyVariety hash (HasExt False) , keySize = Just filesize @@ -105,8 +105,8 @@ keyValueE hash source meterupdate = addE k = do maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig let ext = selectExtension maxlen (keyFilename source) - return $ Just $ k - { keyName = keyName k <> encodeBS ext + return $ Just $ alterKey k $ \d -> d + { keyName = keyName d <> encodeBS ext , keyVariety = hashKeyVariety hash (HasExt True) } @@ -169,7 +169,7 @@ needsUpgrade :: Key -> Bool needsUpgrade key = or [ "\\" `S8.isPrefixOf` keyHash key , any (not . validInExtension) (decodeBS $ snd $ splitKeyNameExtension key) - , not (hasExt (keyVariety key)) && keyHash key /= keyName key + , not (hasExt (fromKey keyVariety key)) && keyHash key /= fromKey keyName key ] trivialMigrate :: Key -> Backend -> AssociatedFile -> Annex (Maybe Key) @@ -179,14 +179,14 @@ trivialMigrate oldkey newbackend afile = trivialMigrate' oldkey newbackend afile trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Key trivialMigrate' oldkey newbackend afile maxextlen {- Fast migration from hashE to hash backend. -} - | migratable && hasExt oldvariety = Just $ oldkey + | migratable && hasExt oldvariety = Just $ alterKey oldkey $ \d -> d { keyName = keyHash oldkey , keyVariety = newvariety } {- Fast migration from hash to hashE backend. -} | migratable && hasExt newvariety = case afile of AssociatedFile Nothing -> Nothing - AssociatedFile (Just file) -> Just $ oldkey + AssociatedFile (Just file) -> Just $ alterKey oldkey $ \d -> d { keyName = keyHash oldkey <> encodeBS (selectExtension maxextlen file) , keyVariety = newvariety @@ -195,14 +195,15 @@ trivialMigrate' oldkey newbackend afile maxextlen - non-extension preserving key, with an extension - in its keyName. -} | newvariety == oldvariety && not (hasExt oldvariety) && - keyHash oldkey /= keyName oldkey = Just $ oldkey - { keyName = keyHash oldkey - } + keyHash oldkey /= fromKey keyName oldkey = + Just $ alterKey oldkey $ \d -> d + { keyName = keyHash oldkey + } | otherwise = Nothing where migratable = oldvariety /= newvariety && sameExceptExt oldvariety newvariety - oldvariety = keyVariety oldkey + oldvariety = fromKey keyVariety oldkey newvariety = backendVariety newbackend hashFile :: Hash -> FilePath -> MeterUpdate -> Annex String @@ -294,5 +295,7 @@ testKeyBackend = let b = genBackendE (SHA2Hash (HashSize 256)) in b { getKey = \ks p -> (fmap addE) <$> getKey b ks p } where - addE k = k { keyName = keyName k <> longext } + addE k = alterKey k $ \d -> d + { keyName = keyName d <> longext + } longext = ".this-is-a-test-key" diff --git a/Backend/URL.hs b/Backend/URL.hs index aad6c87db8..7e6313dc1e 100644 --- a/Backend/URL.hs +++ b/Backend/URL.hs @@ -32,7 +32,7 @@ backend = Backend {- Every unique url has a corresponding key. -} fromUrl :: String -> Maybe Integer -> Key -fromUrl url size = stubKey +fromUrl url size = mkKey $ \k -> k { keyName = genKeyName url , keyVariety = URLKey , keySize = size diff --git a/Backend/WORM.hs b/Backend/WORM.hs index 5455951d9e..cd6be25fb1 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -39,7 +39,7 @@ keyValue source _ = do stat <- liftIO $ getFileStatus f sz <- liftIO $ getFileSize' f stat relf <- getTopFilePath <$> inRepo (toTopFilePath $ keyFilename source) - return $ Just $ stubKey + return $ Just $ mkKey $ \k -> k { keyName = genKeyName relf , keyVariety = WORMKey , keySize = Just sz @@ -48,14 +48,14 @@ keyValue source _ = do {- Old WORM keys could contain spaces, and can be upgraded to remove them. -} needsUpgrade :: Key -> Bool -needsUpgrade key = ' ' `S8.elem` keyName key +needsUpgrade key = ' ' `S8.elem` fromKey keyName key removeSpaces :: Key -> Backend -> AssociatedFile -> Annex (Maybe Key) removeSpaces oldkey newbackend _ - | migratable = return $ Just $ oldkey - { keyName = encodeBS $ reSanitizeKeyName $ decodeBS $ keyName oldkey } + | migratable = return $ Just $ alterKey oldkey $ \d -> d + { keyName = encodeBS $ reSanitizeKeyName $ decodeBS $ keyName d } | otherwise = return Nothing where migratable = oldvariety == newvariety - oldvariety = keyVariety oldkey + oldvariety = fromKey keyVariety oldkey newvariety = backendVariety newbackend diff --git a/CHANGELOG b/CHANGELOG index b8bdd752c6..f9ec532956 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,9 @@ git-annex (7.20191115) UNRELEASED; urgency=medium + * Sped up many git-annex commands that operare on many files, by + avoiding reserialization of keys. + find is 7% faster; whereis is 3% faster; and git-annex get when + all files are already present is 5% faster * Stop displaying rsync progress, and use git-annex's own progress display for local-to-local repo transfers. * git-lfs: The url provided to initremote/enableremote will now be diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index aafa764919..a968aae9d5 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -162,7 +162,7 @@ performRemote r o uri file sz = ifAnnexed file adduri geturi adduri = addUrlChecked o loguri file (Remote.uuid r) checkexistssize checkexistssize key = return $ case sz of Nothing -> (True, True, loguri) - Just n -> (True, n == fromMaybe n (keySize key), loguri) + Just n -> (True, n == fromMaybe n (fromKey keySize key), loguri) geturi = next $ isJust <$> downloadRemoteFile r (downloadOptions o) uri file sz downloadRemoteFile :: Remote -> DownloadOptions -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key) @@ -218,7 +218,7 @@ performWeb o url file urlinfo = ifAnnexed file addurl geturl addurl = addUrlChecked o url file webUUID $ \k -> ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url) ( return (True, True, setDownloader url YoutubeDownloader) - , return (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k, url) + , return (Url.urlExists urlinfo, Url.urlSize urlinfo == fromKey keySize k, url) ) {- Check that the url exists, and has the same size as the key, @@ -379,7 +379,9 @@ finishDownloadWith tmp u url file = do {- Adds the url size to the Key. -} addSizeUrlKey :: Url.UrlInfo -> Key -> Key -addSizeUrlKey urlinfo key = key { keySize = Url.urlSize urlinfo } +addSizeUrlKey urlinfo key = alterKey key $ \d -> d + { keySize = Url.urlSize urlinfo + } {- Adds worktree file to the repository. -} addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex () diff --git a/Command/Find.hs b/Command/Find.hs index dd16e31d01..820b993a93 100644 --- a/Command/Find.hs +++ b/Command/Find.hs @@ -87,14 +87,14 @@ showFormatted format unformatted vars = keyVars :: Key -> [(String, String)] keyVars key = [ ("key", serializeKey key) - , ("backend", decodeBS $ formatKeyVariety $ keyVariety key) + , ("backend", decodeBS $ formatKeyVariety $ fromKey keyVariety key) , ("bytesize", size show) , ("humansize", size $ roughSize storageUnits True) - , ("keyname", decodeBS $ keyName key) + , ("keyname", decodeBS $ fromKey keyName key) , ("hashdirlower", hashDirLower def key) , ("hashdirmixed", hashDirMixed def key) - , ("mtime", whenavail show $ keyMtime key) + , ("mtime", whenavail show $ fromKey keyMtime key) ] where - size c = whenavail c $ keySize key + size c = whenavail c $ fromKey keySize key whenavail = maybe "unknown" diff --git a/Command/FromKey.hs b/Command/FromKey.hs index 7f1ef6f474..45b37f94d9 100644 --- a/Command/FromKey.hs +++ b/Command/FromKey.hs @@ -49,14 +49,14 @@ seekBatch fmt = batchInput fmt parse commandAction parse s = let (keyname, file) = separate (== ' ') s in if not (null keyname) && not (null file) - then Right $ go file (mkKey keyname) + then Right $ go file (keyOpt keyname) else Left "Expected pairs of key and filename" go file key = starting "fromkey" (mkActionItem (key, file)) $ perform key file start :: Bool -> (String, FilePath) -> CommandStart start force (keyname, file) = do - let key = mkKey keyname + let key = keyOpt keyname unless force $ do inbackend <- inAnnex key unless inbackend $ giveup $ @@ -71,8 +71,8 @@ start force (keyname, file) = do -- For example, "WORM--a:a" parses as an uri. To disambiguate, check -- the uri scheme, to see if it looks like the prefix of a key. This relies -- on key backend names never containing a ':'. -mkKey :: String -> Key -mkKey s = case parseURI s of +keyOpt :: String -> Key +keyOpt s = case parseURI s of Just u | not (isKeyPrefix (uriScheme u)) -> Backend.URL.fromUrl s Nothing _ -> case deserializeKey s of diff --git a/Command/Fsck.hs b/Command/Fsck.hs index bed59a52c7..480042f9b5 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -182,7 +182,7 @@ performRemote key afile backend numcopies remote = startKey :: Maybe Remote -> Incremental -> (Key, ActionItem) -> NumCopies -> CommandStart startKey from inc (key, ai) numcopies = - case Backend.maybeLookupBackendVariety (keyVariety key) of + case Backend.maybeLookupBackendVariety (fromKey keyVariety key) of Nothing -> stop Just backend -> runFsck inc ai key $ case from of @@ -244,9 +244,9 @@ verifyLocationLog key keystatus ai = do - insecure hash is present. This should only be able to happen - if the repository already contained the content before the - config was set. -} - when (present && not (cryptographicallySecure (keyVariety key))) $ + when (present && not (cryptographicallySecure (fromKey keyVariety key))) $ whenM (annexSecureHashesOnly <$> Annex.getGitConfig) $ - warning $ "** Despite annex.securehashesonly being set, " ++ obj ++ " has content present in the annex using an insecure " ++ decodeBS (formatKeyVariety (keyVariety key)) ++ " key" + warning $ "** Despite annex.securehashesonly being set, " ++ obj ++ " has content present in the annex using an insecure " ++ decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " key" verifyLocationLog' key ai present u (logChange key u) @@ -362,7 +362,7 @@ checkKeySizeRemote key remote ai localcopy = checkKeySizeOr (badContentRemote remote localcopy) key localcopy ai checkKeySizeOr :: (Key -> Annex String) -> Key -> FilePath -> ActionItem -> Annex Bool -checkKeySizeOr bad key file ai = case keySize key of +checkKeySizeOr bad key file ai = case fromKey keySize key of Nothing -> return True Just size -> do size' <- liftIO $ getFileSize file @@ -396,7 +396,7 @@ checkKeyUpgrade backend key ai (AssociatedFile (Just file)) = [ actionItemDesc ai , ": Can be upgraded to an improved key format. " , "You can do so by running: git annex migrate --backend=" - , decodeBS (formatKeyVariety (keyVariety key)) ++ " " + , decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " " , file ] return True diff --git a/Command/Info.hs b/Command/Info.hs index 23a8fc2899..0c429dee72 100644 --- a/Command/Info.hs +++ b/Command/Info.hs @@ -50,23 +50,23 @@ import qualified Command.Unused type Stat = StatState (Maybe (String, StatState String)) -- data about a set of keys -data KeyData = KeyData +data KeyInfo = KeyInfo { countKeys :: Integer , sizeKeys :: Integer , unknownSizeKeys :: Integer , backendsKeys :: M.Map KeyVariety Integer } -instance Sem.Semigroup KeyData where - a <> b = KeyData +instance Sem.Semigroup KeyInfo where + a <> b = KeyInfo { countKeys = countKeys a + countKeys b , sizeKeys = sizeKeys a + sizeKeys b , unknownSizeKeys = unknownSizeKeys a + unknownSizeKeys b , backendsKeys = backendsKeys a <> backendsKeys b } -instance Monoid KeyData where - mempty = KeyData 0 0 0 M.empty +instance Monoid KeyInfo where + mempty = KeyInfo 0 0 0 M.empty data NumCopiesStats = NumCopiesStats { numCopiesVarianceMap :: M.Map Variance Integer @@ -82,9 +82,9 @@ instance Show Variance where -- cached info that multiple Stats use data StatInfo = StatInfo - { presentData :: Maybe KeyData - , referencedData :: Maybe KeyData - , repoData :: M.Map UUID KeyData + { presentData :: Maybe KeyInfo + , referencedData :: Maybe KeyInfo + , repoData :: M.Map UUID KeyInfo , numCopiesStats :: Maybe NumCopiesStats , infoOptions :: InfoOptions } @@ -512,7 +512,7 @@ reposizes_total :: Stat reposizes_total = simpleStat "combined size of repositories containing these files" $ showSizeKeys . mconcat . M.elems =<< cachedRepoData -cachedPresentData :: StatState KeyData +cachedPresentData :: StatState KeyInfo cachedPresentData = do s <- get case presentData s of @@ -522,7 +522,7 @@ cachedPresentData = do put s { presentData = Just v } return v -cachedRemoteData :: UUID -> StatState KeyData +cachedRemoteData :: UUID -> StatState KeyInfo cachedRemoteData u = do s <- get case M.lookup u (repoData s) of @@ -531,19 +531,19 @@ cachedRemoteData u = do let combinedata d uk = finishCheck uk >>= \case Nothing -> return d Just k -> return $ addKey k d - v <- lift $ foldM combinedata emptyKeyData + v <- lift $ foldM combinedata emptyKeyInfo =<< loggedKeysFor' u put s { repoData = M.insert u v (repoData s) } return v -cachedReferencedData :: StatState KeyData +cachedReferencedData :: StatState KeyInfo cachedReferencedData = do s <- get case referencedData s of Just v -> return v Nothing -> do !v <- lift $ Command.Unused.withKeysReferenced - emptyKeyData addKey + emptyKeyInfo addKey put s { referencedData = Just v } return v @@ -552,7 +552,7 @@ cachedNumCopiesStats :: StatState (Maybe NumCopiesStats) cachedNumCopiesStats = numCopiesStats <$> get -- currently only available for directory info -cachedRepoData :: StatState (M.Map UUID KeyData) +cachedRepoData :: StatState (M.Map UUID KeyInfo) cachedRepoData = repoData <$> get getDirStatInfo :: InfoOptions -> FilePath -> Annex StatInfo @@ -564,7 +564,7 @@ getDirStatInfo o dir = do (update matcher fast) return $ StatInfo (Just presentdata) (Just referenceddata) repodata (Just numcopiesstats) o where - initial = (emptyKeyData, emptyKeyData, emptyNumCopiesStats, M.empty) + initial = (emptyKeyInfo, emptyKeyInfo, emptyNumCopiesStats, M.empty) update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats, repodata) = ifM (matcher $ MatchingFile $ FileInfo file file) ( do @@ -594,7 +594,7 @@ getTreeStatInfo o r = do , return Nothing ) where - initial = (emptyKeyData, emptyKeyData, M.empty) + initial = (emptyKeyInfo, emptyKeyInfo, M.empty) go _ [] vs = return vs go fast (l:ls) vs@(presentdata, referenceddata, repodata) = do mk <- catKey (LsTree.sha l) @@ -613,33 +613,33 @@ getTreeStatInfo o r = do return (updateRepoData key locs repodata) go fast ls $! (presentdata', referenceddata', repodata') -emptyKeyData :: KeyData -emptyKeyData = KeyData 0 0 0 M.empty +emptyKeyInfo :: KeyInfo +emptyKeyInfo = KeyInfo 0 0 0 M.empty emptyNumCopiesStats :: NumCopiesStats emptyNumCopiesStats = NumCopiesStats M.empty -foldKeys :: [Key] -> KeyData -foldKeys = foldl' (flip addKey) emptyKeyData +foldKeys :: [Key] -> KeyInfo +foldKeys = foldl' (flip addKey) emptyKeyInfo -addKey :: Key -> KeyData -> KeyData -addKey key (KeyData count size unknownsize backends) = - KeyData count' size' unknownsize' backends' +addKey :: Key -> KeyInfo -> KeyInfo +addKey key (KeyInfo count size unknownsize backends) = + KeyInfo count' size' unknownsize' backends' where {- All calculations strict to avoid thunks when repeatedly - applied to many keys. -} !count' = count + 1 - !backends' = M.insertWith (+) (keyVariety key) 1 backends + !backends' = M.insertWith (+) (fromKey keyVariety key) 1 backends !size' = maybe size (+ size) ks !unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks - ks = keySize key + ks = fromKey keySize key -updateRepoData :: Key -> [UUID] -> M.Map UUID KeyData -> M.Map UUID KeyData +updateRepoData :: Key -> [UUID] -> M.Map UUID KeyInfo -> M.Map UUID KeyInfo updateRepoData key locs m = m' where !m' = M.unionWith (\_old new -> new) m $ M.fromList $ zip locs (map update locs) - update loc = addKey key (fromMaybe emptyKeyData $ M.lookup loc m) + update loc = addKey key (fromMaybe emptyKeyInfo $ M.lookup loc m) updateNumCopiesStats :: FilePath -> NumCopiesStats -> [UUID] -> Annex NumCopiesStats updateNumCopiesStats file (NumCopiesStats m) locs = do @@ -649,7 +649,7 @@ updateNumCopiesStats file (NumCopiesStats m) locs = do let !ret = NumCopiesStats m' return ret -showSizeKeys :: KeyData -> StatState String +showSizeKeys :: KeyInfo -> StatState String showSizeKeys d = do sizer <- mkSizer return $ total sizer ++ missingnote diff --git a/Command/MatchExpression.hs b/Command/MatchExpression.hs index 77ceb0662e..3e79a0387d 100644 --- a/Command/MatchExpression.hs +++ b/Command/MatchExpression.hs @@ -67,7 +67,7 @@ optParser desc = MatchExpressionOptions missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data" -- When a key is provided, make its size also be provided. addkeysize p = case providedKey p of - Right k -> case keySize k of + Right k -> case fromKey keySize k of Just sz -> p { providedFileSize = Right sz } Nothing -> p Left _ -> p diff --git a/Command/Migrate.hs b/Command/Migrate.hs index ca656e028d..ca65cbef1e 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -50,7 +50,7 @@ start file key = do - - Something has changed in the backend, such as a bug fix. -} upgradableKey :: Backend -> Key -> Bool -upgradableKey backend key = isNothing (keySize key) || backendupgradable +upgradableKey backend key = isNothing (fromKey keySize key) || backendupgradable where backendupgradable = maybe False (\a -> a key) (canUpgradeKey backend) diff --git a/Command/RegisterUrl.hs b/Command/RegisterUrl.hs index d13889a9b7..7fdd2836f6 100644 --- a/Command/RegisterUrl.hs +++ b/Command/RegisterUrl.hs @@ -11,7 +11,7 @@ module Command.RegisterUrl where import Command import Logs.Web -import Command.FromKey (mkKey) +import Command.FromKey (keyOpt) import qualified Remote cmd :: Command @@ -41,7 +41,7 @@ seek o = case (batchOption o, keyUrlPairs o) of start :: [String] -> CommandStart start (keyname:url:[]) = starting "registerurl" (ActionItemOther (Just url)) $ do - let key = mkKey keyname + let key = keyOpt keyname perform key url start _ = giveup "specify a key and an url" @@ -55,7 +55,7 @@ massAdd fmt = go True =<< map (separate (== ' ')) <$> batchLines fmt where go status [] = next $ return status go status ((keyname,u):rest) | not (null keyname) && not (null u) = do - let key = mkKey keyname + let key = keyOpt keyname ok <- perform' key u let !status' = status && ok go status' rest diff --git a/Command/SendKey.hs b/Command/SendKey.hs index 36959a8ae4..57832cee92 100644 --- a/Command/SendKey.hs +++ b/Command/SendKey.hs @@ -49,7 +49,7 @@ fieldTransfer direction key a = do afile <- AssociatedFile <$> Fields.getField Fields.associatedFile ok <- maybe (a $ const noop) -- Using noRetry here because we're the sender. - (\u -> runner (Transfer direction (toUUID u) key) afile noRetry a) + (\u -> runner (Transfer direction (toUUID u) (fromKey id key)) afile noRetry a) =<< Fields.getField Fields.remoteUUID liftIO $ debugM "fieldTransfer" "transfer done" liftIO $ exitBool ok diff --git a/Command/SetKey.hs b/Command/SetKey.hs index 1cf7fb14e2..703679494d 100644 --- a/Command/SetKey.hs +++ b/Command/SetKey.hs @@ -21,11 +21,11 @@ seek = withWords (commandAction . start) start :: [String] -> CommandStart start (keyname:file:[]) = starting "setkey" (ActionItemOther (Just file)) $ - perform file (mkKey keyname) + perform file (keyOpt keyname) start _ = giveup "specify a key and a content file" -mkKey :: String -> Key -mkKey = fromMaybe (giveup "bad key") . deserializeKey +keyOpt :: String -> Key +keyOpt = fromMaybe (giveup "bad key") . deserializeKey perform :: FilePath -> Key -> CommandPerform perform file key = do diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 90c55e3cc1..7191461bd2 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -119,7 +119,7 @@ clean file = do -- Look up the backend that was used for this file -- before, so that when git re-cleans a file its -- backend does not change. - let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey + let oldbackend = maybe Nothing (maybeLookupBackendVariety . fromKey keyVariety) oldkey -- Can't restage associated files because git add -- runs this and has the index locked. let norestage = Restage False diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 0e7403f19b..eef6ccaea1 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -107,14 +107,14 @@ perform rs unavailrs exportr ks = do next $ cleanup rs ks ok where desc r' k = intercalate "; " $ map unwords - [ [ "key size", show (keySize k) ] + [ [ "key size", show (fromKey keySize k) ] , [ show (getChunkConfig (Remote.config r')) ] , ["encryption", fromMaybe "none" (M.lookup "encryption" (Remote.config r'))] ] descexport k1 k2 = intercalate "; " $ map unwords [ [ "exporttree=yes" ] - , [ "key1 size", show (keySize k1) ] - , [ "key2 size", show (keySize k2) ] + , [ "key1 size", show (fromKey keySize k1) ] + , [ "key2 size", show (fromKey keySize k2) ] ] adjustChunkSize :: Remote -> Int -> Annex (Maybe Remote) @@ -199,7 +199,7 @@ test st r k = catMaybes Annex.eval st (Annex.setOutput QuietOutput >> a) @? "failed" present b = check ("present " ++ show b) $ (== Right b) <$> Remote.hasKey r k - fsck = case maybeLookupBackendVariety (keyVariety k) of + fsck = case maybeLookupBackendVariety (fromKey keyVariety k) of Nothing -> return True Just b -> case Backend.verifyKeyContent b of Nothing -> return True diff --git a/Command/TransferInfo.hs b/Command/TransferInfo.hs index abcf8c6b14..520d79b479 100644 --- a/Command/TransferInfo.hs +++ b/Command/TransferInfo.hs @@ -47,7 +47,7 @@ start (k:[]) = do let t = Transfer { transferDirection = Upload , transferUUID = u - , transferKey = key + , transferKeyData = fromKey id key } tinfo <- liftIO $ startTransferInfo afile (update, tfile, createtfile, _) <- mkProgressUpdater t tinfo diff --git a/Crypto.hs b/Crypto.hs index 8dd4e3d4f1..08aef47cd5 100644 --- a/Crypto.hs +++ b/Crypto.hs @@ -161,7 +161,7 @@ type EncKey = Key -> Key - reversable, nor does it need to be the same type of encryption used - on content. It does need to be repeatable. -} encryptKey :: Mac -> Cipher -> EncKey -encryptKey mac c k = stubKey +encryptKey mac c k = mkKey $ \d -> d { keyName = encodeBS (macWithCipher mac c (serializeKey k)) , keyVariety = OtherKey $ encryptedBackendNamePrefix <> encodeBS (showMac mac) @@ -171,7 +171,7 @@ encryptedBackendNamePrefix :: S.ByteString encryptedBackendNamePrefix = "GPG" isEncKey :: Key -> Bool -isEncKey k = case keyVariety k of +isEncKey k = case fromKey keyVariety k of OtherKey s -> encryptedBackendNamePrefix `S.isPrefixOf` s _ -> False diff --git a/Database/Benchmark.hs b/Database/Benchmark.hs index 906337ff3a..c4440abd77 100644 --- a/Database/Benchmark.hs +++ b/Database/Benchmark.hs @@ -87,7 +87,7 @@ populateAssociatedFiles h num = do H.flushDbQueue h keyN :: Integer -> Key -keyN n = stubKey +keyN n = mkKey $ \k -> k { keyName = B8.pack $ "key" ++ show n , keyVariety = OtherKey "BENCH" } diff --git a/Key.hs b/Key.hs index 6c369ac425..22f6d79144 100644 --- a/Key.hs +++ b/Key.hs @@ -8,10 +8,12 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} module Key ( - Key(..), + Key, + KeyData(..), AssociatedFile(..), - stubKey, - buildKey, + fromKey, + mkKey, + alterKey, keyParser, serializeKey, serializeKey', @@ -28,13 +30,7 @@ module Key ( import qualified Data.Text as T import qualified Data.ByteString as S -import qualified Data.ByteString.Char8 as S8 -import qualified Data.ByteString.Lazy as L -import Data.ByteString.Builder -import Data.ByteString.Builder.Extra import qualified Data.Attoparsec.ByteString as A -import qualified Data.Attoparsec.ByteString.Char8 as A8 -import Foreign.C.Types import Common import Types.Key @@ -43,134 +39,37 @@ import Utility.Bloom import Utility.Aeson import qualified Utility.SimpleProtocol as Proto -stubKey :: Key -stubKey = Key - { keyName = mempty - , keyVariety = OtherKey mempty - , keySize = Nothing - , keyMtime = Nothing - , keyChunkSize = Nothing - , keyChunkNum = Nothing - } - -- Gets the parent of a chunk key. nonChunkKey :: Key -> Key -nonChunkKey k = k - { keyChunkSize = Nothing - , keyChunkNum = Nothing - } +nonChunkKey k + | fromKey keyChunkSize k == Nothing && fromKey keyChunkNum k == Nothing = k + | otherwise = alterKey k $ \d -> d + { keyChunkSize = Nothing + , keyChunkNum = Nothing + } -- Where a chunk key is offset within its parent. chunkKeyOffset :: Key -> Maybe Integer chunkKeyOffset k = (*) - <$> keyChunkSize k - <*> (pred <$> keyChunkNum k) + <$> fromKey keyChunkSize k + <*> (pred <$> fromKey keyChunkNum k) isChunkKey :: Key -> Bool -isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k) - --- Checks if a string looks like at least the start of a key. -isKeyPrefix :: String -> Bool -isKeyPrefix s = [fieldSep, fieldSep] `isInfixOf` s - -fieldSep :: Char -fieldSep = '-' - -{- Builds a ByteString from a Key. - - - - The name field is always shown last, separated by doubled fieldSeps, - - and is the only field allowed to contain the fieldSep. - -} -buildKey :: Key -> Builder -buildKey k = byteString (formatKeyVariety (keyVariety k)) - <> 's' ?: (integerDec <$> keySize k) - <> 'm' ?: (integerDec . (\(CTime t) -> fromIntegral t) <$> keyMtime k) - <> 'S' ?: (integerDec <$> keyChunkSize k) - <> 'C' ?: (integerDec <$> keyChunkNum k) - <> sepbefore (sepbefore (byteString (keyName k))) - where - sepbefore s = char7 fieldSep <> s - c ?: (Just b) = sepbefore (char7 c <> b) - _ ?: Nothing = mempty +isChunkKey k = isJust (fromKey keyChunkSize k) && isJust (fromKey keyChunkNum k) serializeKey :: Key -> String -serializeKey = decodeBL' . serializeKey' +serializeKey = decodeBS' . serializeKey' -serializeKey' :: Key -> L.ByteString -serializeKey' = toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty . buildKey - -{- This is a strict parser for security reasons; a key - - can contain only 4 fields, which all consist only of numbers. - - Any key containing other fields, or non-numeric data will fail - - to parse. - - - - If a key contained non-numeric fields, they could be used to - - embed data used in a SHA1 collision attack, which would be a - - problem since the keys are committed to git. - -} -keyParser :: A.Parser Key -keyParser = do - -- key variety cannot be empty - v <- (parseKeyVariety <$> A8.takeWhile1 (/= fieldSep)) - s <- parsesize - m <- parsemtime - cs <- parsechunksize - cn <- parsechunknum - _ <- A8.char fieldSep - _ <- A8.char fieldSep - n <- A.takeByteString - if validKeyName v n - then return $ Key - { keyName = n - , keyVariety = v - , keySize = s - , keyMtime = m - , keyChunkSize = cs - , keyChunkNum = cn - } - else fail "invalid keyName" - where - parseopt p = (Just <$> (A8.char fieldSep *> p)) <|> pure Nothing - parsesize = parseopt $ A8.char 's' *> A8.decimal - parsemtime = parseopt $ CTime <$> (A8.char 'm' *> A8.decimal) - parsechunksize = parseopt $ A8.char 'S' *> A8.decimal - parsechunknum = parseopt $ A8.char 'C' *> A8.decimal +serializeKey' :: Key -> S.ByteString +serializeKey' = keySerialization deserializeKey :: String -> Maybe Key deserializeKey = deserializeKey' . encodeBS' deserializeKey' :: S.ByteString -> Maybe Key -deserializeKey' b = eitherToMaybe $ A.parseOnly keyParser b +deserializeKey' = either (const Nothing) Just . A.parseOnly keyParser -{- This splits any extension out of the keyName, returning the - - keyName minus extension, and the extension (including leading dot). - -} -splitKeyNameExtension :: Key -> (S.ByteString, S.ByteString) -splitKeyNameExtension = splitKeyNameExtension' . keyName - -splitKeyNameExtension' :: S.ByteString -> (S.ByteString, S.ByteString) -splitKeyNameExtension' keyname = S8.span (/= '.') keyname - -{- Limits the length of the extension in the keyName to mitigate against - - SHA1 collision attacks. - - - - In such an attack, the extension of the key could be made to contain - - the collision generation data, with the result that a signed git commit - - including such keys would not be secure. - - - - The maximum extension length ever generated for such a key was 8 - - characters, but they may be unicode which could use up to 4 bytes each, - - so 32 bytes. 64 bytes is used here to give a little future wiggle-room. - - The SHA1 common-prefix attack needs 128 bytes of data. - -} -validKeyName :: KeyVariety -> S.ByteString -> Bool -validKeyName kv name - | hasExt kv = - let ext = snd $ splitKeyNameExtension' name - in S.length ext <= 64 - | otherwise = True - -instance Arbitrary Key where +instance Arbitrary KeyData where arbitrary = Key <$> (encodeBS <$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t")) <*> (parseKeyVariety . encodeBS <$> (listOf1 $ elements ['A'..'Z'])) -- BACKEND @@ -179,6 +78,9 @@ instance Arbitrary Key where <*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative <*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative +instance Arbitrary Key where + arbitrary = mkKey . const <$> arbitrary + instance Hashable Key where hashIO32 = hashIO32 . serializeKey' hashIO64 = hashIO64 . serializeKey' @@ -196,3 +98,4 @@ instance Proto.Serializable Key where prop_isomorphic_key_encode :: Key -> Bool prop_isomorphic_key_encode k = Just k == (deserializeKey . serializeKey) k + diff --git a/Limit.hs b/Limit.hs index 5ac0fe636a..a9647fd27c 100644 --- a/Limit.hs +++ b/Limit.hs @@ -294,7 +294,7 @@ addInBackend = addLimit . limitInBackend limitInBackend :: MkLimit Annex limitInBackend name = Right $ const $ checkKey check where - check key = pure $ keyVariety key == variety + check key = pure $ fromKey keyVariety key == variety variety = parseKeyVariety (encodeBS name) {- Adds a limit to skip files not using a secure hash. -} @@ -302,7 +302,7 @@ addSecureHash :: Annex () addSecureHash = addLimit $ Right limitSecureHash limitSecureHash :: MatchFiles Annex -limitSecureHash _ = checkKey $ pure . cryptographicallySecure . keyVariety +limitSecureHash _ = checkKey $ pure . cryptographicallySecure . fromKey keyVariety {- Adds a limit to skip files that are too large or too small -} addLargerThan :: String -> Annex () @@ -327,7 +327,7 @@ limitSize lb vs s = case readSize dataUnits s of go sz _ (MatchingInfo p) = getInfo (providedFileSize p) >>= \sz' -> return (Just sz' `vs` Just sz) - checkkey sz key = return $ keySize key `vs` Just sz + checkkey sz key = return $ fromKey keySize key `vs` Just sz addMetaData :: String -> Annex () addMetaData = addLimit . limitMetaData diff --git a/Logs/Transfer.hs b/Logs/Transfer.hs index 58e035c1a0..eec270a9ce 100644 --- a/Logs/Transfer.hs +++ b/Logs/Transfer.hs @@ -1,6 +1,6 @@ {- git-annex transfer information files and lock files - - - Copyright 2012 Joey Hess + - Copyright 2012-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -41,12 +41,14 @@ describeTransfer t info = unwords equivilantTransfer :: Transfer -> Transfer -> Bool equivilantTransfer t1 t2 | transferDirection t1 == Download && transferDirection t2 == Download && - transferKey t1 == transferKey t2 = True + transferKeyData t1 == transferKeyData t2 = True | otherwise = t1 == t2 percentComplete :: Transfer -> TransferInfo -> Maybe Percentage -percentComplete (Transfer { transferKey = key }) info = - percentage <$> keySize key <*> Just (fromMaybe 0 $ bytesComplete info) +percentComplete t info = + percentage + <$> keySize (transferKeyData t) + <*> Just (fromMaybe 0 $ bytesComplete info) {- Generates a callback that can be called as transfer progresses to update - the transfer info file. Also returns the file it'll be updating, @@ -72,7 +74,7 @@ mkProgressUpdater t info = do {- The minimum change in bytesComplete that is worth - updating a transfer info file for is 1% of the total - keySize, rounded down. -} - mindelta = case keySize (transferKey t) of + mindelta = case keySize (transferKeyData t) of Just sz -> sz `div` 100 Nothing -> 100 * 1024 -- arbitrarily, 100 kb @@ -155,7 +157,7 @@ sizeOfDownloadsInProgress wanted = sum . map remaining <$> getTransfers' [Download] wanted where remaining (t, info) = - case (keySize (transferKey t), bytesComplete info) of + case (fromKey keySize (transferKey t), bytesComplete info) of (Just sz, Just done) -> sz - done (Just sz, Nothing) -> sz (Nothing, _) -> 0 @@ -191,14 +193,14 @@ recordFailedTransfer t info = do {- The transfer information file to use for a given Transfer. -} transferFile :: Transfer -> Git.Repo -> FilePath -transferFile (Transfer direction u key) r = transferDir direction r +transferFile (Transfer direction u kd) r = transferDir direction r filter (/= '/') (fromUUID u) - keyFile key + keyFile (mkKey (const kd)) {- The transfer information file to use to record a failed Transfer -} failedTransferFile :: Transfer -> Git.Repo -> FilePath -failedTransferFile (Transfer direction u key) r = failedTransferDir u direction r - keyFile key +failedTransferFile (Transfer direction u kd) r = failedTransferDir u direction r + keyFile (mkKey (const kd)) {- The transfer lock file corresponding to a given transfer info file. -} transferLockFile :: FilePath -> FilePath @@ -213,7 +215,7 @@ parseTransferFile file [direction, u, key] -> Transfer <$> parseDirection direction <*> pure (toUUID u) - <*> fileKey key + <*> fmap (fromKey id) (fileKey key) _ -> Nothing where bits = splitDirectories file diff --git a/Messages/Progress.hs b/Messages/Progress.hs index 48615918ae..e9b0208363 100644 --- a/Messages/Progress.hs +++ b/Messages/Progress.hs @@ -36,7 +36,7 @@ instance MeterSize FileSize where getMeterSize = pure . Just instance MeterSize Key where - getMeterSize = pure . keySize + getMeterSize = pure . fromKey keySize instance MeterSize InodeCache where getMeterSize = pure . Just . inodeCacheFileSize @@ -51,7 +51,7 @@ instance MeterSize KeySource where data KeySizer = KeySizer Key (Annex (Maybe FilePath)) instance MeterSize KeySizer where - getMeterSize (KeySizer k getsrcfile) = case keySize k of + getMeterSize (KeySizer k getsrcfile) = case fromKey keySize k of Just sz -> return (Just sz) Nothing -> do srcfile <- getsrcfile diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs index b18e0334a2..09fa5ed744 100644 --- a/Remote/BitTorrent.hs +++ b/Remote/BitTorrent.hs @@ -258,7 +258,7 @@ downloadTorrentContent k u dest filenum p = do , return False ) where - download torrent tmpdir = ariaProgress (keySize k) p + download torrent tmpdir = ariaProgress (fromKey keySize k) p [ Param $ "--select-file=" ++ show filenum , File torrent , Param "-d" diff --git a/Remote/External.hs b/Remote/External.hs index cbf3e57b7a..09af889e93 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -716,7 +716,7 @@ checkKeyUrl :: Git.Repo -> CheckPresent checkKeyUrl r k = do showChecking r us <- getWebUrls k - anyM (\u -> withUrlOptions $ checkBoth u (keySize k)) us + anyM (\u -> withUrlOptions $ checkBoth u (fromKey keySize k)) us getWebUrls :: Key -> Annex [URLString] getWebUrls key = filter supported <$> getUrls key diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs index 9c1e207aa1..7592764117 100644 --- a/Remote/External/Types.hs +++ b/Remote/External/Types.hs @@ -101,10 +101,10 @@ newtype SafeKey = SafeKey Key mkSafeKey :: Key -> Either String SafeKey mkSafeKey k - | any isSpace (decodeBS $ keyName k) = Left $ concat + | any isSpace (decodeBS $ fromKey keyName k) = Left $ concat [ "Sorry, this file cannot be stored on an external special remote because its key's name contains a space. " , "To avoid this problem, you can run: git-annex migrate --backend=" - , decodeBS (formatKeyVariety (keyVariety k)) + , decodeBS (formatKeyVariety (fromKey keyVariety k)) , " and pass it the name of the file" ] | otherwise = Right (SafeKey k) diff --git a/Remote/Git.hs b/Remote/Git.hs index 6e1b31f748..9e12dcb52d 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -367,7 +367,7 @@ inAnnex' repo rmt (State connpool duc _ _) key checkhttp = do showChecking repo gc <- Annex.getGitConfig - ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls gc repo rmt key)) + ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (fromKey keySize key) uo) (keyUrls gc repo rmt key)) ( return True , giveup "not found" ) @@ -511,7 +511,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _) key file dest meter Nothing -> return (False, UnVerified) Just (object, checksuccess) -> do copier <- mkCopier hardlink st params - runTransfer (Transfer Download u key) + runTransfer (Transfer Download u (fromKey id key)) file stdRetry $ \p -> metered (Just (combineMeterUpdate p meterupdate)) key $ \_ p' -> copier object dest p' checksuccess @@ -647,7 +647,7 @@ copyToRemote' repo r st@(State connpool duc _ _) key file meterupdate -- run copy from perspective of remote onLocalFast repo r $ ifM (Annex.Content.inAnnex key) ( return True - , runTransfer (Transfer Download u key) file stdRetry $ \p -> do + , runTransfer (Transfer Download u (fromKey id key)) file stdRetry $ \p -> do copier <- mkCopier hardlink st params let verify = Annex.Content.RemoteVerify r let rsp = RetrievalAllKeysSecure diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index 01f76a5a8b..3da33ac55b 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -344,10 +344,10 @@ sendTransferRequest req endpoint = do LFS.ParseFailed err -> Left err extractKeySha256 :: Key -> Maybe LFS.SHA256 -extractKeySha256 k = case keyVariety k of +extractKeySha256 k = case fromKey keyVariety k of SHA2Key (HashSize 256) (HasExt hasext) | hasext -> eitherToMaybe $ E.decodeUtf8' (keyHash k) - | otherwise -> eitherToMaybe $ E.decodeUtf8' (keyName k) + | otherwise -> eitherToMaybe $ E.decodeUtf8' (fromKey keyName k) _ -> Nothing -- The size of an encrypted key is the size of the input data, but we need @@ -355,7 +355,7 @@ extractKeySha256 k = case keyVariety k of extractKeySize :: Key -> Maybe Integer extractKeySize k | isEncKey k = Nothing - | otherwise = keySize k + | otherwise = fromKey keySize k mkUploadRequest :: RemoteStateHandle -> Key -> FilePath -> Annex (LFS.TransferRequest, LFS.SHA256, Integer) mkUploadRequest rs k content = case (extractKeySha256 k, extractKeySize k) of diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 6823179d12..00d623f50f 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -117,7 +117,7 @@ prepareStore r = checkPrepare nonEmpty (byteStorer $ store r) nonEmpty :: Key -> Annex Bool nonEmpty k - | keySize k == Just 0 = do + | fromKey keySize k == Just 0 = do warning "Cannot store empty files in Glacier." return False | otherwise = return True diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index a011d9b5a2..473760edb3 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -68,8 +68,10 @@ newtype ChunkKeyStream = ChunkKeyStream [Key] chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..] where - mk chunknum = sizedk { keyChunkNum = Just chunknum } - sizedk = basek { keyChunkSize = Just (toInteger chunksize) } + mk chunknum = alterKey sizedk $ \d -> d + { keyChunkNum = Just chunknum } + sizedk = alterKey basek $ \d -> d + { keyChunkSize = Just (toInteger chunksize) } nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream) nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l) @@ -80,7 +82,7 @@ takeChunkKeyStream n (ChunkKeyStream l) = genericTake n l -- Number of chunks already consumed from the stream. numChunks :: ChunkKeyStream -> Integer -numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream +numChunks = pred . fromJust . fromKey keyChunkNum . fst . nextChunkKeyStream {- Splits up the key's content into chunks, passing each chunk to - the storer action, along with a corresponding chunk key and a @@ -173,7 +175,7 @@ seekResume -> Annex (ChunkKeyStream, BytesProcessed) seekResume h encryptor chunkkeys checker = do sz <- liftIO (hFileSize h) - if sz <= fromMaybe 0 (keyChunkSize $ fst $ nextChunkKeyStream chunkkeys) + if sz <= fromMaybe 0 (fromKey keyChunkSize $ fst $ nextChunkKeyStream chunkkeys) then return (chunkkeys, zeroBytesProcessed) else check 0 chunkkeys sz where @@ -193,7 +195,7 @@ seekResume h encryptor chunkkeys checker = do return (cks, toBytesProcessed pos) where (k, cks') = nextChunkKeyStream cks - pos' = pos + fromMaybe 0 (keyChunkSize k) + pos' = pos + fromMaybe 0 (fromKey keyChunkSize k) {- Removes all chunks of a key from a remote, by calling a remover - action on each. @@ -208,7 +210,7 @@ removeChunks remover u chunkconfig encryptor k = do ls <- chunkKeys u chunkconfig k ok <- allM (remover . encryptor) (concat ls) when ok $ do - let chunksizes = catMaybes $ map (keyChunkSize <=< headMaybe) ls + let chunksizes = catMaybes $ map (fromKey keyChunkSize <=< headMaybe) ls forM_ chunksizes $ chunksRemoved u k . FixedSizeChunks . fromIntegral return ok @@ -272,7 +274,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink bracketIO (maybe opennew openresume offset) hClose $ \h -> do void $ tosink (Just h) p content let sz = toBytesProcessed $ - fromMaybe 0 $ keyChunkSize k + fromMaybe 0 $ fromKey keyChunkSize k getrest p h sz sz ks `catchNonAsync` unable case v of @@ -333,7 +335,7 @@ setupResume :: [[Key]] -> Integer -> [[Key]] setupResume ls currsize = map dropunneeded ls where dropunneeded [] = [] - dropunneeded l@(k:_) = case keyChunkSize k of + dropunneeded l@(k:_) = case fromKey keyChunkSize k of Just chunksize | chunksize > 0 -> genericDrop (currsize `div` chunksize) l _ -> l diff --git a/Remote/Helper/ExportImport.hs b/Remote/Helper/ExportImport.hs index 399163ba23..21d9814c65 100644 --- a/Remote/Helper/ExportImport.hs +++ b/Remote/Helper/ExportImport.hs @@ -324,7 +324,7 @@ adjustExportImport r rs = case M.lookup "exporttree" (config r) of liftIO $ Export.getExportTree db k retrieveKeyFileFromExport dbv k _af dest p = unVerified $ - if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k)) + if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (fromKey keyVariety k)) then do locs <- getexportlocs dbv k case locs of @@ -336,5 +336,5 @@ adjustExportImport r rs = case M.lookup "exporttree" (config r) of return False (l:_) -> retrieveExport (exportActions r) k l dest p else do - warning $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (keyVariety k)) ++ " backend" + warning $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ " backend" return False diff --git a/Remote/S3.hs b/Remote/S3.hs index 2787e3f554..cd0a3c205e 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -347,7 +347,7 @@ checkKey hv r rs c info k = withS3Handle hv $ \case Right us -> do showChecking r let check u = withUrlOptions $ - Url.checkBoth u (keySize k) + Url.checkBoth u (fromKey keySize k) anyM check us checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool @@ -417,7 +417,7 @@ checkPresentExportS3 hv r info k loc = withS3Handle hv $ \case Just h -> checkKeyHelper info h (Left (T.pack $ bucketExportLocation info loc)) Nothing -> case getPublicUrlMaker info of Just geturl -> withUrlOptions $ - Url.checkBoth (geturl $ bucketExportLocation info loc) (keySize k) + Url.checkBoth (geturl $ bucketExportLocation info loc) (fromKey keySize k) Nothing -> do warning $ needS3Creds (uuid r) giveup "No S3 credentials configured" diff --git a/Remote/Web.hs b/Remote/Web.hs index 645495d696..810c2f027e 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -117,7 +117,7 @@ checkKey' key us = firsthit us (Right False) $ \u -> do case downloader of YoutubeDownloader -> youtubeDlCheck u' _ -> catchMsgIO $ - Url.withUrlOptions $ Url.checkBoth u' (keySize key) + Url.withUrlOptions $ Url.checkBoth u' (fromKey keySize key) where firsthit [] miss _ = return miss firsthit (u:rest) _ a = do diff --git a/Types/Distribution.hs b/Types/Distribution.hs index b41a90a1c0..c0ad2c02fd 100644 --- a/Types/Distribution.hs +++ b/Types/Distribution.hs @@ -21,7 +21,9 @@ type GitAnnexVersion = String data GitAnnexDistribution = GitAnnexDistribution { distributionUrl :: String - , distributionKey :: Key + , distributionKey :: KeyData + -- ^ This used to be a Key, but now KeyData serializes + -- to Key { ... }, so back-compat for Read and Show is preserved. , distributionVersion :: GitAnnexVersion , distributionReleasedate :: UTCTime , distributionUrgentUpgrade :: Maybe GitAnnexVersion @@ -46,7 +48,7 @@ parseInfoFile s = case lines s of formatGitAnnexDistribution :: GitAnnexDistribution -> String formatGitAnnexDistribution d = unlines [ distributionUrl d - , serializeKey (distributionKey d) + , serializeKey $ mkKey $ const $ distributionKey d , distributionVersion d , show (distributionReleasedate d) , maybe "" show (distributionUrgentUpgrade d) @@ -56,7 +58,7 @@ parseGitAnnexDistribution :: String -> Maybe GitAnnexDistribution parseGitAnnexDistribution s = case lines s of (u:k:v:d:uu:_) -> GitAnnexDistribution <$> pure u - <*> deserializeKey k + <*> fmap (fromKey id) (deserializeKey k) <*> pure v <*> readish d <*> pure (readish uu) diff --git a/Types/Key.hs b/Types/Key.hs index 0d751bd736..e83dd57f41 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -7,19 +7,47 @@ {-# LANGUAGE OverloadedStrings, DeriveGeneric #-} -module Types.Key where +module Types.Key ( + KeyData(..), + Key, + fromKey, + mkKey, + alterKey, + isKeyPrefix, + splitKeyNameExtension, + keyParser, + keySerialization, + AssociatedFile(..), + KeyVariety(..), + HasExt(..), + HashSize(..), + hasExt, + sameExceptExt, + cryptographicallySecure, + isVerifiable, + formatKeyVariety, + parseKeyVariety, +) where import qualified Data.ByteString as S import qualified Data.ByteString.Char8 as S8 +import qualified Data.ByteString.Lazy as L +import Data.ByteString.Builder +import Data.ByteString.Builder.Extra +import qualified Data.Attoparsec.ByteString as A +import qualified Data.Attoparsec.ByteString.Char8 as A8 +import Data.List import System.Posix.Types +import Foreign.C.Types import Data.Monoid +import Control.Applicative import GHC.Generics import Control.DeepSeq import Prelude {- A Key has a unique name, which is derived from a particular backend, - and may contain other optional metadata. -} -data Key = Key +data KeyData = Key { keyName :: S.ByteString , keyVariety :: KeyVariety , keySize :: Maybe Integer @@ -28,8 +56,149 @@ data Key = Key , keyChunkNum :: Maybe Integer } deriving (Eq, Ord, Read, Show, Generic) +instance NFData KeyData + +{- Caching the seralization of a key is an optimization. + - + - This constructor is not exported, and all smart constructors maintain + - the serialization. + -} +data Key = MkKey + { keyData :: KeyData + , keySerialization :: S.ByteString + } deriving (Show, Generic) + +instance Eq Key where + -- comparing the serialization would be unncessary work + a == b = keyData a == keyData b + +instance Ord Key where + compare a b = compare (keyData a) (keyData b) + instance NFData Key +{- Access a field of data from the KeyData. -} +{-# INLINE fromKey #-} +fromKey :: (KeyData -> a) -> Key -> a +fromKey f = f . keyData + +{- Smart constructor for a Key. The provided KeyData has all values empty. -} +mkKey :: (KeyData -> KeyData) -> Key +mkKey f = + let d = f stub + in MkKey d (mkKeySerialization d) + where + stub = Key + { keyName = mempty + , keyVariety = OtherKey mempty + , keySize = Nothing + , keyMtime = Nothing + , keyChunkSize = Nothing + , keyChunkNum = Nothing + } + +{- Alter a Key's data. -} +alterKey :: Key -> (KeyData -> KeyData) -> Key +alterKey k f = + let d = f (keyData k) + in MkKey d (mkKeySerialization d) + +-- Checks if a string looks like at least the start of a key. +isKeyPrefix :: String -> Bool +isKeyPrefix s = [fieldSep, fieldSep] `isInfixOf` s + +fieldSep :: Char +fieldSep = '-' + +mkKeySerialization :: KeyData -> S.ByteString +mkKeySerialization = L.toStrict + . toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty + . buildKeyData + +{- Builds a ByteString from a KeyData. + - + - The name field is always shown last, separated by doubled fieldSeps, + - and is the only field allowed to contain the fieldSep. + -} +buildKeyData :: KeyData -> Builder +buildKeyData k = byteString (formatKeyVariety (keyVariety k)) + <> 's' ?: (integerDec <$> keySize k) + <> 'm' ?: (integerDec . (\(CTime t) -> fromIntegral t) <$> keyMtime k) + <> 'S' ?: (integerDec <$> keyChunkSize k) + <> 'C' ?: (integerDec <$> keyChunkNum k) + <> sepbefore (sepbefore (byteString (keyName k))) + where + sepbefore s = char7 fieldSep <> s + c ?: (Just b) = sepbefore (char7 c <> b) + _ ?: Nothing = mempty + +{- This is a strict parser for security reasons; a key + - can contain only 4 fields, which all consist only of numbers. + - Any key containing other fields, or non-numeric data will fail + - to parse. + - + - If a key contained non-numeric fields, they could be used to + - embed data used in a SHA1 collision attack, which would be a + - problem since the keys are committed to git. + -} +keyParser :: A.Parser Key +keyParser = do + -- key variety cannot be empty + v <- (parseKeyVariety <$> A8.takeWhile1 (/= fieldSep)) + s <- parsesize + m <- parsemtime + cs <- parsechunksize + cn <- parsechunknum + _ <- A8.char fieldSep + _ <- A8.char fieldSep + n <- A.takeByteString + if validKeyName v n + then + let d = Key + { keyName = n + , keyVariety = v + , keySize = s + , keyMtime = m + , keyChunkSize = cs + , keyChunkNum = cn + } + in pure $ MkKey d (mkKeySerialization d) + else fail "invalid keyName" + where + parseopt p = (Just <$> (A8.char fieldSep *> p)) <|> pure Nothing + parsesize = parseopt $ A8.char 's' *> A8.decimal + parsemtime = parseopt $ CTime <$> (A8.char 'm' *> A8.decimal) + parsechunksize = parseopt $ A8.char 'S' *> A8.decimal + parsechunknum = parseopt $ A8.char 'C' *> A8.decimal + +{- Limits the length of the extension in the keyName to mitigate against + - SHA1 collision attacks. + - + - In such an attack, the extension of the key could be made to contain + - the collision generation data, with the result that a signed git commit + - including such keys would not be secure. + - + - The maximum extension length ever generated for such a key was 8 + - characters, but they may be unicode which could use up to 4 bytes each, + - so 32 bytes. 64 bytes is used here to give a little future wiggle-room. + - The SHA1 common-prefix attack needs 128 bytes of data. + -} +validKeyName :: KeyVariety -> S.ByteString -> Bool +validKeyName kv name + | hasExt kv = + let ext = snd $ splitKeyNameExtension' name + in S.length ext <= 64 + | otherwise = True + +{- This splits any extension out of the keyName, returning the + - keyName minus extension, and the extension (including leading dot). + -} +splitKeyNameExtension :: Key -> (S.ByteString, S.ByteString) +splitKeyNameExtension = splitKeyNameExtension' . keyName . keyData + +splitKeyNameExtension' :: S.ByteString -> (S.ByteString, S.ByteString) +splitKeyNameExtension' keyname = S8.span (/= '.') keyname + {- A filename may be associated with a Key. -} newtype AssociatedFile = AssociatedFile (Maybe FilePath) deriving (Show, Eq, Ord) diff --git a/Types/Transfer.hs b/Types/Transfer.hs index 2a89c6c39f..e05b57efbe 100644 --- a/Types/Transfer.hs +++ b/Types/Transfer.hs @@ -11,6 +11,7 @@ module Types.Transfer where import Types import Types.Remote (Verification(..)) +import Types.Key import Utility.PID import Utility.QuickCheck import Utility.Url @@ -24,9 +25,12 @@ import Prelude data Transfer = Transfer { transferDirection :: Direction , transferUUID :: UUID - , transferKey :: Key + , transferKeyData :: KeyData } - deriving (Eq, Ord, Read, Show) + deriving (Eq, Ord, Show, Read) + +transferKey :: Transfer -> Key +transferKey = mkKey . const . transferKeyData {- Information about a Transfer, stored in the transfer information file. - diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index 93d9108db9..0d41dde2a5 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -134,7 +134,7 @@ oldlog2key l where len = length l - 4 k = readKey1 (take len l) - sane = (not . S.null $ keyName k) && (not . S.null $ formatKeyVariety $ keyVariety k) + sane = (not . S.null $ fromKey keyName k) && (not . S.null $ formatKeyVariety $ fromKey keyVariety k) -- WORM backend keys: "WORM:mtime:size:filename" -- all the rest: "backend:key" @@ -145,7 +145,7 @@ oldlog2key l readKey1 :: String -> Key readKey1 v | mixup = fromJust $ deserializeKey $ intercalate ":" $ Prelude.tail bits - | otherwise = stubKey + | otherwise = mkKey $ \d -> d { keyName = encodeBS n , keyVariety = parseKeyVariety (encodeBS b) , keySize = s @@ -165,12 +165,16 @@ readKey1 v mixup = wormy && isUpper (Prelude.head $ bits !! 1) showKey1 :: Key -> String -showKey1 Key { keyName = n , keyVariety = v, keySize = s, keyMtime = t } = - intercalate ":" $ filter (not . null) [b, showifhere t, showifhere s, decodeBS n] +showKey1 k = intercalate ":" $ filter (not . null) + [b, showifhere t, showifhere s, decodeBS n] where showifhere Nothing = "" showifhere (Just x) = show x b = decodeBS $ formatKeyVariety v + n = fromKey keyName k + v = fromKey keyVariety k + s = fromKey keySize k + t = fromKey keyMtime k keyFile1 :: Key -> FilePath keyFile1 key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ showKey1 key @@ -194,7 +198,7 @@ lookupFile1 file = do Right l -> makekey l where getsymlink = takeFileName <$> readSymbolicLink file - makekey l = case maybeLookupBackendVariety (keyVariety k) of + makekey l = case maybeLookupBackendVariety (fromKey keyVariety k) of Nothing -> do unless (null kname || null bname || not (isLinkToAnnex (toRawFilePath l))) $ @@ -203,8 +207,8 @@ lookupFile1 file = do Just backend -> return $ Just (k, backend) where k = fileKey1 l - bname = decodeBS (formatKeyVariety (keyVariety k)) - kname = decodeBS (keyName k) + bname = decodeBS (formatKeyVariety (fromKey keyVariety k)) + kname = decodeBS (fromKey keyName k) skip = "skipping " ++ file ++ " (unknown backend " ++ bname ++ ")" diff --git a/Utility/Hash.hs b/Utility/Hash.hs index d198918e67..397ada5d35 100644 --- a/Utility/Hash.hs +++ b/Utility/Hash.hs @@ -24,6 +24,7 @@ module Utility.Hash ( blake2b_512, blake2bp_512, md5, + md5s, prop_hashes_stable, Mac(..), calcMac, @@ -106,6 +107,9 @@ blake2bp_512 = hashlazy md5 :: L.ByteString -> Digest MD5 md5 = hashlazy +md5s :: S.ByteString -> Digest MD5 +md5s = hash + {- Check that all the hashes continue to hash the same. -} prop_hashes_stable :: Bool prop_hashes_stable = all (\(hasher, result) -> hasher foo == result) diff --git a/templates/dashboard/transfers.hamlet b/templates/dashboard/transfers.hamlet index ee8ddb8ab9..c1b3e15eef 100644 --- a/templates/dashboard/transfers.hamlet +++ b/templates/dashboard/transfers.hamlet @@ -15,7 +15,7 @@ #{maybe "unknown" Remote.name $ transferRemote info} - $with size <- maybe "unknown" (roughSize dataUnits True) $ keySize $ transferKey transfer + $with size <- maybe "unknown" (roughSize dataUnits True) $ keySize $ transferKeyData transfer $if isJust $ startedTime info $if isrunning info #{percent} of #{size}