diff --git a/Annex/Common.hs b/Annex/Common.hs index 1f039f1356..52a545a596 100644 --- a/Annex/Common.hs +++ b/Annex/Common.hs @@ -2,7 +2,7 @@ module Annex.Common (module X) where import Common as X import Types as X -import Types.Key as X +import Key as X import Types.UUID as X import Annex as X (gitRepo, inRepo, fromRepo, calcRepo) import Annex.Locations as X diff --git a/Annex/DirHashes.hs b/Annex/DirHashes.hs index ed20cfb8a7..82d751eeee 100644 --- a/Annex/DirHashes.hs +++ b/Annex/DirHashes.hs @@ -23,7 +23,7 @@ import Data.Hash.MD5 import Data.Default import Common -import Types.Key +import Key import Types.GitConfig import Types.Difference import Utility.FileSystemEncoding diff --git a/Annex/Locations.hs b/Annex/Locations.hs index a6af4d417f..3138b2322a 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -77,7 +77,7 @@ import Data.Char import Data.Default import Common -import Types.Key +import Key import Types.UUID import Types.GitConfig import Types.Difference diff --git a/Database/Types.hs b/Database/Types.hs index 9eabc6983e..a4b5fbcb19 100644 --- a/Database/Types.hs +++ b/Database/Types.hs @@ -14,7 +14,7 @@ import Data.Maybe import Data.Char import Utility.PartialPrelude -import Types.Key +import Key import Utility.InodeCache -- A serialized Key diff --git a/Key.hs b/Key.hs new file mode 100644 index 0000000000..99aa756e2b --- /dev/null +++ b/Key.hs @@ -0,0 +1,186 @@ +{- git-annex Keys + - + - Copyright 2011-2017 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Key ( + Key(..), + AssociatedFile, + stubKey, + key2file, + file2key, + nonChunkKey, + chunkKeyOffset, + isChunkKey, + isKeyPrefix, + + prop_isomorphic_key_encode, + prop_isomorphic_key_decode +) where + +import Data.Aeson +import Data.Char +import qualified Data.Text as T + +import Common +import Types.Key +import Utility.QuickCheck +import Utility.Bloom +import qualified Utility.SimpleProtocol as Proto + +stubKey :: Key +stubKey = Key + { keyName = "" + , keyBackendName = "" + , keySize = Nothing + , keyMtime = Nothing + , keyChunkSize = Nothing + , keyChunkNum = Nothing + } + +-- Gets the parent of a chunk key. +nonChunkKey :: Key -> Key +nonChunkKey k = k + { keyChunkSize = Nothing + , keyChunkNum = Nothing + } + +-- Where a chunk key is offset within its parent. +chunkKeyOffset :: Key -> Maybe Integer +chunkKeyOffset k = (*) + <$> keyChunkSize k + <*> (pred <$> keyChunkNum k) + +isChunkKey :: Key -> Bool +isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k) + +-- Checks if a string looks like at least the start of a key. +isKeyPrefix :: String -> Bool +isKeyPrefix s = [fieldSep, fieldSep] `isInfixOf` s + +fieldSep :: Char +fieldSep = '-' + +{- Converts a key to a string that is suitable for use as a filename. + - The name field is always shown last, separated by doubled fieldSeps, + - and is the only field allowed to contain the fieldSep. -} +key2file :: Key -> FilePath +key2file Key { keyBackendName = b, keySize = s, keyMtime = m, keyChunkSize = cs, keyChunkNum = cn, keyName = n } = + b +++ ('s' ?: s) +++ ('m' ?: m) +++ ('S' ?: cs) +++ ('C' ?: cn) +++ (fieldSep : n) + where + "" +++ y = y + x +++ "" = x + x +++ y = x ++ fieldSep:y + f ?: (Just v) = f : show v + _ ?: _ = "" + +file2key :: FilePath -> Maybe Key +file2key s + | key == Just stubKey || (keyName <$> key) == Just "" || (keyBackendName <$> key) == Just "" = Nothing + | otherwise = key + where + key = startbackend stubKey s + + startbackend k v = sepfield k v addbackend + + sepfield k v a = case span (/= fieldSep) v of + (v', _:r) -> findfields r $ a k v' + _ -> Nothing + + findfields (c:v) (Just k) + | c == fieldSep = addkeyname k v + | otherwise = sepfield k v $ addfield c + findfields _ v = v + + addbackend k v = Just k { keyBackendName = v } + + -- This is a strict parser for security reasons; a key + -- can contain only 4 fields, which all consist only of numbers. + -- Any key containing other fields, or non-numeric data is + -- rejected with Nothing. + -- + -- If a key contained non-numeric fields, they could be used to + -- embed data used in a SHA1 collision attack, which would be a + -- problem since the keys are committed to git. + addfield _ _ v | not (all isDigit v) = Nothing + addfield 's' k v = do + sz <- readish v + return $ k { keySize = Just sz } + addfield 'm' k v = do + mtime <- readish v + return $ k { keyMtime = Just mtime } + addfield 'S' k v = do + chunksize <- readish v + return $ k { keyChunkSize = Just chunksize } + addfield 'C' k v = case readish v of + Just chunknum | chunknum > 0 -> + return $ k { keyChunkNum = Just chunknum } + _ -> Nothing + addfield _ _ _ = Nothing + + addkeyname k v + | validKeyName k v = Just $ k { keyName = v } + | otherwise = Nothing + +{- A key with a backend ending in "E" is an extension preserving key, + - using some hash. + - + - The length of the extension is limited in order to mitigate against + - SHA1 collision attacks (specifically, chosen-prefix attacks). + - In such an attack, the extension of the key could be made to contain + - the collision generation data, with the result that a signed git commit + - including such keys would not be secure. + - + - The maximum extension length ever generated for such a key was 8 + - characters; 20 is used here to give a little future wiggle-room. + - The SHA1 common-prefix attack used 128 bytes of data. + - + - This code is here, and not in Backend.Hash (where it really belongs) + - so that file2key can check it whenever a Key is constructed. + -} +validKeyName :: Key -> String -> Bool +validKeyName k v + | end (keyBackendName k) == "E" = length (takeExtensions v) <= 20 + | otherwise = True + +instance Arbitrary Key where + arbitrary = Key + <$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t") + <*> (listOf1 $ elements ['A'..'Z']) -- BACKEND + <*> ((abs <$>) <$> arbitrary) -- size cannot be negative + <*> arbitrary + <*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative + <*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative + +instance Hashable Key where + hashIO32 = hashIO32 . key2file + hashIO64 = hashIO64 . key2file + +instance ToJSON Key where + toJSON = toJSON . key2file + +instance FromJSON Key where + parseJSON (String t) = maybe mempty pure $ file2key $ T.unpack t + parseJSON _ = mempty + +instance Proto.Serializable Key where + serialize = key2file + deserialize = file2key + +prop_isomorphic_key_encode :: Key -> Bool +prop_isomorphic_key_encode k = Just k == (file2key . key2file) k + +prop_isomorphic_key_decode :: FilePath -> Bool +prop_isomorphic_key_decode f + | normalfieldorder = maybe True (\k -> key2file k == f) (file2key f) + | otherwise = True + where + -- file2key will accept the fields in any order, so don't + -- try the test unless the fields are in the normal order + normalfieldorder = fields `isPrefixOf` "smSC" + fields = map (f !!) $ filter (< length f) $ map succ $ + elemIndices fieldSep f diff --git a/Messages/JSON.hs b/Messages/JSON.hs index 06bdd9a4d8..3ad0e57085 100644 --- a/Messages/JSON.hs +++ b/Messages/JSON.hs @@ -36,7 +36,7 @@ import Data.Maybe import Data.Monoid import Prelude -import Types.Key +import Key import Utility.Metered import Utility.Percentage diff --git a/Test.hs b/Test.hs index cb3df90a0e..1b724b5af2 100644 --- a/Test.hs +++ b/Test.hs @@ -63,7 +63,7 @@ import qualified Logs.Presence import qualified Logs.PreferredContent import qualified Types.MetaData import qualified Remote -import qualified Types.Key +import qualified Key import qualified Types.Messages import qualified Config import qualified Config.Cost @@ -152,8 +152,8 @@ properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck" [ testProperty "prop_isomorphic_deencode_git" Git.Filename.prop_isomorphic_deencode , testProperty "prop_isomorphic_deencode" Utility.Format.prop_isomorphic_deencode , testProperty "prop_isomorphic_fileKey" Annex.Locations.prop_isomorphic_fileKey - , testProperty "prop_isomorphic_key_encode" Types.Key.prop_isomorphic_key_encode - , testProperty "prop_isomorphic_key_decode" Types.Key.prop_isomorphic_key_decode + , testProperty "prop_isomorphic_key_encode" Key.prop_isomorphic_key_encode + , testProperty "prop_isomorphic_key_decode" Key.prop_isomorphic_key_decode , testProperty "prop_isomorphic_shellEscape" Utility.SafeCommand.prop_isomorphic_shellEscape , testProperty "prop_isomorphic_shellEscape_multiword" Utility.SafeCommand.prop_isomorphic_shellEscape_multiword , testProperty "prop_isomorphic_configEscape" Logs.Remote.prop_isomorphic_configEscape @@ -390,7 +390,7 @@ test_reinject = intmpclonerepoInDirect $ do git_annex "drop" ["--force", sha1annexedfile] @? "drop failed" annexed_notpresent sha1annexedfile writeFile tmp $ content sha1annexedfile - key <- Types.Key.key2file <$> getKey backendSHA1 tmp + key <- Key.key2file <$> getKey backendSHA1 tmp git_annex "reinject" [tmp, sha1annexedfile] @? "reinject failed" annexed_present sha1annexedfile -- fromkey can't be used on a crippled filesystem, since it makes a @@ -846,9 +846,9 @@ test_unused = intmpclonerepoInDirect $ do checkunused [annexedfilekey, sha1annexedfilekey] "after rm sha1annexedfile" -- good opportunity to test dropkey also - git_annex "dropkey" ["--force", Types.Key.key2file annexedfilekey] + git_annex "dropkey" ["--force", Key.key2file annexedfilekey] @? "dropkey failed" - checkunused [sha1annexedfilekey] ("after dropkey --force " ++ Types.Key.key2file annexedfilekey) + checkunused [sha1annexedfilekey] ("after dropkey --force " ++ Key.key2file annexedfilekey) not <$> git_annex "dropunused" ["1"] @? "dropunused failed to fail without --force" git_annex "dropunused" ["--force", "1"] @? "dropunused failed" @@ -1959,7 +1959,7 @@ checklocationlog f expected = do case r of Just k -> do uuids <- annexeval $ Remote.keyLocations k - assertEqual ("bad content in location log for " ++ f ++ " key " ++ Types.Key.key2file k ++ " uuid " ++ show thisuuid) + assertEqual ("bad content in location log for " ++ f ++ " key " ++ Key.key2file k ++ " uuid " ++ show thisuuid) expected (thisuuid `elem` uuids) _ -> assertFailure $ f ++ " failed to look up key" diff --git a/Types/ActionItem.hs b/Types/ActionItem.hs index a0097e45a8..d9beb049da 100644 --- a/Types/ActionItem.hs +++ b/Types/ActionItem.hs @@ -9,7 +9,7 @@ module Types.ActionItem where -import Types.Key +import Key import Types.Transfer import Git.FilePath diff --git a/Types/Key.hs b/Types/Key.hs index d4a4d3728d..0615adfe44 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -5,30 +5,9 @@ - Licensed under the GNU GPL version 3 or higher. -} -module Types.Key ( - Key(..), - AssociatedFile, - stubKey, - key2file, - file2key, - nonChunkKey, - chunkKeyOffset, - isChunkKey, - isKeyPrefix, - - prop_isomorphic_key_encode, - prop_isomorphic_key_decode -) where +module Types.Key where import System.Posix.Types -import Data.Aeson -import Data.Char -import qualified Data.Text as T - -import Common -import Utility.QuickCheck -import Utility.Bloom -import qualified Utility.SimpleProtocol as Proto {- A Key has a unique name, which is derived from a particular backend, - and may contain other optional metadata. -} @@ -43,156 +22,3 @@ data Key = Key {- A filename may be associated with a Key. -} type AssociatedFile = Maybe FilePath - -stubKey :: Key -stubKey = Key - { keyName = "" - , keyBackendName = "" - , keySize = Nothing - , keyMtime = Nothing - , keyChunkSize = Nothing - , keyChunkNum = Nothing - } - --- Gets the parent of a chunk key. -nonChunkKey :: Key -> Key -nonChunkKey k = k - { keyChunkSize = Nothing - , keyChunkNum = Nothing - } - --- Where a chunk key is offset within its parent. -chunkKeyOffset :: Key -> Maybe Integer -chunkKeyOffset k = (*) - <$> keyChunkSize k - <*> (pred <$> keyChunkNum k) - -isChunkKey :: Key -> Bool -isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k) - --- Checks if a string looks like at least the start of a key. -isKeyPrefix :: String -> Bool -isKeyPrefix s = [fieldSep, fieldSep] `isInfixOf` s - -fieldSep :: Char -fieldSep = '-' - -{- Converts a key to a string that is suitable for use as a filename. - - The name field is always shown last, separated by doubled fieldSeps, - - and is the only field allowed to contain the fieldSep. -} -key2file :: Key -> FilePath -key2file Key { keyBackendName = b, keySize = s, keyMtime = m, keyChunkSize = cs, keyChunkNum = cn, keyName = n } = - b +++ ('s' ?: s) +++ ('m' ?: m) +++ ('S' ?: cs) +++ ('C' ?: cn) +++ (fieldSep : n) - where - "" +++ y = y - x +++ "" = x - x +++ y = x ++ fieldSep:y - f ?: (Just v) = f : show v - _ ?: _ = "" - -file2key :: FilePath -> Maybe Key -file2key s - | key == Just stubKey || (keyName <$> key) == Just "" || (keyBackendName <$> key) == Just "" = Nothing - | otherwise = key - where - key = startbackend stubKey s - - startbackend k v = sepfield k v addbackend - - sepfield k v a = case span (/= fieldSep) v of - (v', _:r) -> findfields r $ a k v' - _ -> Nothing - - findfields (c:v) (Just k) - | c == fieldSep = addkeyname k v - | otherwise = sepfield k v $ addfield c - findfields _ v = v - - addbackend k v = Just k { keyBackendName = v } - - -- This is a strict parser for security reasons; a key - -- can contain only 4 fields, which all consist only of numbers. - -- Any key containing other fields, or non-numeric data is - -- rejected with Nothing. - -- - -- If a key contained non-numeric fields, they could be used to - -- embed data used in a SHA1 collision attack, which would be a - -- problem since the keys are committed to git. - addfield _ _ v | not (all isDigit v) = Nothing - addfield 's' k v = do - sz <- readish v - return $ k { keySize = Just sz } - addfield 'm' k v = do - mtime <- readish v - return $ k { keyMtime = Just mtime } - addfield 'S' k v = do - chunksize <- readish v - return $ k { keyChunkSize = Just chunksize } - addfield 'C' k v = case readish v of - Just chunknum | chunknum > 0 -> - return $ k { keyChunkNum = Just chunknum } - _ -> Nothing - addfield _ _ _ = Nothing - - addkeyname k v - | validKeyName k v = Just $ k { keyName = v } - | otherwise = Nothing - -{- A key with a backend ending in "E" is an extension preserving key, - - using some hash. - - - - The length of the extension is limited in order to mitigate against - - SHA1 collision attacks (specifically, chosen-prefix attacks). - - In such an attack, the extension of the key could be made to contain - - the collision generation data, with the result that a signed git commit - - including such keys would not be secure. - - - - The maximum extension length ever generated for such a key was 8 - - characters; 20 is used here to give a little future wiggle-room. - - The SHA1 common-prefix attack used 128 bytes of data. - - - - This code is here, and not in Backend.Hash (where it really belongs) - - so that file2key can check it whenever a Key is constructed. - -} -validKeyName :: Key -> String -> Bool -validKeyName k v - | end (keyBackendName k) == "E" = length (takeExtensions v) <= 20 - | otherwise = True - -instance ToJSON Key where - toJSON = toJSON . key2file - -instance FromJSON Key where - parseJSON (String t) = maybe mempty pure $ file2key $ T.unpack t - parseJSON _ = mempty - -instance Proto.Serializable Key where - serialize = key2file - deserialize = file2key - -instance Arbitrary Key where - arbitrary = Key - <$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t") - <*> (listOf1 $ elements ['A'..'Z']) -- BACKEND - <*> ((abs <$>) <$> arbitrary) -- size cannot be negative - <*> arbitrary - <*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative - <*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative - -instance Hashable Key where - hashIO32 = hashIO32 . key2file - hashIO64 = hashIO64 . key2file - -prop_isomorphic_key_encode :: Key -> Bool -prop_isomorphic_key_encode k = Just k == (file2key . key2file) k - -prop_isomorphic_key_decode :: FilePath -> Bool -prop_isomorphic_key_decode f - | normalfieldorder = maybe True (\k -> key2file k == f) (file2key f) - | otherwise = True - where - -- file2key will accept the fields in any order, so don't - -- try the test unless the fields are in the normal order - normalfieldorder = fields `isPrefixOf` "smSC" - fields = map (f !!) $ filter (< length f) $ map succ $ - elemIndices fieldSep f diff --git a/git-annex.cabal b/git-annex.cabal index c394693cfe..87c14939c9 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -848,6 +848,7 @@ Executable git-annex Git.UpdateIndex Git.Url Git.Version + Key Limit Limit.Wanted Logs