diff --git a/Backend/WORM.hs b/Backend/WORM.hs index cc71238505..fdeea6f89a 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -36,7 +36,7 @@ keyValue :: KeySource -> Annex (Maybe Key) keyValue source = do stat <- liftIO $ getFileStatus $ contentLocation source n <- genKeyName $ keyFilename source - return $ Just Key + return $ Just $ stubKey { keyName = n , keyBackendName = name backend , keySize = Just $ fromIntegral $ fileSize stat diff --git a/Crypto.hs b/Crypto.hs index f3a9e3957e..0bfa81db2e 100644 --- a/Crypto.hs +++ b/Crypto.hs @@ -142,11 +142,9 @@ decryptCipher (EncryptedCipher t variant _) = - reversable, nor does it need to be the same type of encryption used - on content. It does need to be repeatable. -} encryptKey :: Mac -> Cipher -> Key -> Key -encryptKey mac c k = Key +encryptKey mac c k = stubKey { keyName = macWithCipher mac c (key2file k) , keyBackendName = "GPG" ++ showMac mac - , keySize = Nothing -- size and mtime omitted - , keyMtime = Nothing -- to avoid leaking data } type Feeder = Handle -> IO () diff --git a/Types/Key.hs b/Types/Key.hs index 26af6220f1..90f66f23ed 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -2,7 +2,7 @@ - - Most things should not need this, using Types instead - - - Copyright 2011 Joey Hess + - Copyright 2011-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -30,6 +30,8 @@ data Key = Key , keyBackendName :: String , keySize :: Maybe Integer , keyMtime :: Maybe EpochTime + , keyChunkSize :: Maybe Integer + , keyChunkNum :: Maybe Integer } deriving (Eq, Ord, Read, Show) {- A filename may be associated with a Key. -} @@ -41,6 +43,8 @@ stubKey = Key , keyBackendName = "" , keySize = Nothing , keyMtime = Nothing + , keyChunkSize = Nothing + , keyChunkNum = Nothing } fieldSep :: Char @@ -50,13 +54,13 @@ fieldSep = '-' - The name field is always shown last, separated by doubled fieldSeps, - and is the only field allowed to contain the fieldSep. -} key2file :: Key -> FilePath -key2file Key { keyBackendName = b, keySize = s, keyMtime = m, keyName = n } = - b +++ ('s' ?: s) +++ ('m' ?: m) +++ (fieldSep : n) +key2file Key { keyBackendName = b, keySize = s, keyMtime = m, keyChunkSize = cs, keyChunkNum = cn, keyName = n } = + b +++ ('s' ?: s) +++ ('m' ?: m) +++ ('S' ?: cs) +++ ('C' ?: cn) +++ (fieldSep : n) where "" +++ y = y x +++ "" = x x +++ y = x ++ fieldSep:y - c ?: (Just v) = c : show v + f ?: (Just v) = f : show v _ ?: _ = "" file2key :: FilePath -> Maybe Key @@ -84,6 +88,13 @@ file2key s addfield 'm' k v = do mtime <- readish v return $ k { keyMtime = Just mtime } + addfield 'S' k v = do + chunksize <- readish v + return $ k { keyChunkSize = Just chunksize } + addfield 'C' k v = case readish v of + Just chunknum | chunknum > 0 -> + return $ k { keyChunkNum = Just chunknum } + _ -> return k addfield _ _ _ = Nothing instance Arbitrary Key where @@ -92,6 +103,8 @@ instance Arbitrary Key where <*> (listOf1 $ elements ['A'..'Z']) -- BACKEND <*> ((abs <$>) <$> arbitrary) -- size cannot be negative <*> arbitrary + <*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative + <*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative prop_idempotent_key_encode :: Key -> Bool prop_idempotent_key_encode k = Just k == (file2key . key2file) k @@ -103,6 +116,6 @@ prop_idempotent_key_decode f where -- file2key will accept the fields in any order, so don't -- try the test unless the fields are in the normal order - normalfieldorder = fields `isPrefixOf` "sm" + normalfieldorder = fields `isPrefixOf` "smSC" fields = map (f !!) $ filter (< length f) $ map succ $ elemIndices fieldSep f diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index 8af4848a16..347b102ac9 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -144,7 +144,7 @@ oldlog2key l readKey1 :: String -> Key readKey1 v | mixup = fromJust $ file2key $ intercalate ":" $ Prelude.tail bits - | otherwise = Key + | otherwise = stubKey { keyName = n , keyBackendName = b , keySize = s diff --git a/doc/design/assistant/chunks.mdwn b/doc/design/assistant/chunks.mdwn index 42a31bd256..c20bb9aab7 100644 --- a/doc/design/assistant/chunks.mdwn +++ b/doc/design/assistant/chunks.mdwn @@ -104,7 +104,7 @@ Problem: Does not solve concurrent uploads with different chunk sizes. When chunking is enabled, always put a chunk number in the Key, along with the chunk size. -So, SHA256-s10000-c1--xxxxxxx for the first chunk of 1 megabyte. +So, SHA256-1048576-c1--xxxxxxx for the first chunk of 1 megabyte. Before any chunks are stored, write a chunkcount file, eg SHA256-s12345-c0--xxxxxxx. Note that this key is the same as the original @@ -148,20 +148,24 @@ could lead to data loss. (Same as in design 2.) # design 4 -Use key SHA256-s10000-c1--xxxxxxx for the first chunk of 1 megabyte. +Use key SHA256-s12345-S1048576-C1--xxxxxxx for the first chunk of 1 megabyte. + +Note that keeping the 's'ize field unchanged is necessary because it +disambiguates eg, WORM keys. So a 'S'ize field is used to hold the chunk +size. Instead of storing the chunk count in the special remote, store it in the git-annex branch. -Look at git-annex:aaa/bbb/SHA256-s12345--xxxxxxx.log.cnk to get the -chunk count and size. File format would be: +The location log does not record locations of individual chunk keys +(too space-inneficient). +Instead, look at git-annex:aaa/bbb/SHA256-s12345--xxxxxxx.log.cnk to get +the chunk count and size for a key. File format would be: - ts uuid chunksize chunkcount 0|1 + ts uuid chunksize chunkcount -Where a trailing 0 means that chunk size is no longer present on the -remote, and a trailing 1 means it is. For future expansion, any other -value /= "0" is also accepted, meaning the chunk is present. For example, -this could be used for [[deltas]], storing the checksums of the chunks. +Where a chunkcount of 0 means that the object is not longer present in the +remote using the specified chunk size. Note that a given remote uuid might have multiple lines, if a key was stored on it twice using different chunk sizes. Also note that even when diff --git a/doc/internals/key_format.mdwn b/doc/internals/key_format.mdwn index 17e20592cd..52fb80395b 100644 --- a/doc/internals/key_format.mdwn +++ b/doc/internals/key_format.mdwn @@ -1,6 +1,6 @@ A git-annex key has this format: - BACKEND-sNNNN-mNNNN--NAME + BACKEND[-sNNNN][-mNNNN][-SNNNN-CNNNN]--NAME For example: @@ -10,12 +10,15 @@ For example: are always upper-cased. * The name field at the end has a format dependent on the backend. It is always the last field, and is prefixed with "--". Unlike other fields, - it may contain "-" in its content. It should not contain newline characters; - otherwise nearly anything goes. + it may contain "-" in its content. It should not contain newline + characters or "/"; otherwise nearly anything goes. * The "-s" field is optional, and is the size of the content in bytes. * The "-m" field is optional, and is the mtime of the file when it was added to git-annex, expressed as seconds from the epoch. This is currently only used by the WORM backend. +* The "-S" and "-C" fields are only used for keys that are chunks + of some other key. "-S" is the size of the chunk, and "-c" is the chunk + number (starting at 1). * Other fields could be added in the future, if needed. git-annex always puts the fields in the order shown above when serializing