add chunk metadata to Key
Added new fields for chunk number, and chunk size. These will not appear in normal keys ever, but will be used for chunked data stored on special remotes. This commit was sponsored by Jouni K Seppanen.
This commit is contained in:
parent
4bbc629cb0
commit
d751591ac8
6 changed files with 40 additions and 22 deletions
|
@ -36,7 +36,7 @@ keyValue :: KeySource -> Annex (Maybe Key)
|
||||||
keyValue source = do
|
keyValue source = do
|
||||||
stat <- liftIO $ getFileStatus $ contentLocation source
|
stat <- liftIO $ getFileStatus $ contentLocation source
|
||||||
n <- genKeyName $ keyFilename source
|
n <- genKeyName $ keyFilename source
|
||||||
return $ Just Key
|
return $ Just $ stubKey
|
||||||
{ keyName = n
|
{ keyName = n
|
||||||
, keyBackendName = name backend
|
, keyBackendName = name backend
|
||||||
, keySize = Just $ fromIntegral $ fileSize stat
|
, keySize = Just $ fromIntegral $ fileSize stat
|
||||||
|
|
|
@ -142,11 +142,9 @@ decryptCipher (EncryptedCipher t variant _) =
|
||||||
- reversable, nor does it need to be the same type of encryption used
|
- reversable, nor does it need to be the same type of encryption used
|
||||||
- on content. It does need to be repeatable. -}
|
- on content. It does need to be repeatable. -}
|
||||||
encryptKey :: Mac -> Cipher -> Key -> Key
|
encryptKey :: Mac -> Cipher -> Key -> Key
|
||||||
encryptKey mac c k = Key
|
encryptKey mac c k = stubKey
|
||||||
{ keyName = macWithCipher mac c (key2file k)
|
{ keyName = macWithCipher mac c (key2file k)
|
||||||
, keyBackendName = "GPG" ++ showMac mac
|
, keyBackendName = "GPG" ++ showMac mac
|
||||||
, keySize = Nothing -- size and mtime omitted
|
|
||||||
, keyMtime = Nothing -- to avoid leaking data
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Feeder = Handle -> IO ()
|
type Feeder = Handle -> IO ()
|
||||||
|
|
23
Types/Key.hs
23
Types/Key.hs
|
@ -2,7 +2,7 @@
|
||||||
-
|
-
|
||||||
- Most things should not need this, using Types instead
|
- Most things should not need this, using Types instead
|
||||||
-
|
-
|
||||||
- Copyright 2011 Joey Hess <joey@kitenet.net>
|
- Copyright 2011-2014 Joey Hess <joey@kitenet.net>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -30,6 +30,8 @@ data Key = Key
|
||||||
, keyBackendName :: String
|
, keyBackendName :: String
|
||||||
, keySize :: Maybe Integer
|
, keySize :: Maybe Integer
|
||||||
, keyMtime :: Maybe EpochTime
|
, keyMtime :: Maybe EpochTime
|
||||||
|
, keyChunkSize :: Maybe Integer
|
||||||
|
, keyChunkNum :: Maybe Integer
|
||||||
} deriving (Eq, Ord, Read, Show)
|
} deriving (Eq, Ord, Read, Show)
|
||||||
|
|
||||||
{- A filename may be associated with a Key. -}
|
{- A filename may be associated with a Key. -}
|
||||||
|
@ -41,6 +43,8 @@ stubKey = Key
|
||||||
, keyBackendName = ""
|
, keyBackendName = ""
|
||||||
, keySize = Nothing
|
, keySize = Nothing
|
||||||
, keyMtime = Nothing
|
, keyMtime = Nothing
|
||||||
|
, keyChunkSize = Nothing
|
||||||
|
, keyChunkNum = Nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
fieldSep :: Char
|
fieldSep :: Char
|
||||||
|
@ -50,13 +54,13 @@ fieldSep = '-'
|
||||||
- The name field is always shown last, separated by doubled fieldSeps,
|
- The name field is always shown last, separated by doubled fieldSeps,
|
||||||
- and is the only field allowed to contain the fieldSep. -}
|
- and is the only field allowed to contain the fieldSep. -}
|
||||||
key2file :: Key -> FilePath
|
key2file :: Key -> FilePath
|
||||||
key2file Key { keyBackendName = b, keySize = s, keyMtime = m, keyName = n } =
|
key2file Key { keyBackendName = b, keySize = s, keyMtime = m, keyChunkSize = cs, keyChunkNum = cn, keyName = n } =
|
||||||
b +++ ('s' ?: s) +++ ('m' ?: m) +++ (fieldSep : n)
|
b +++ ('s' ?: s) +++ ('m' ?: m) +++ ('S' ?: cs) +++ ('C' ?: cn) +++ (fieldSep : n)
|
||||||
where
|
where
|
||||||
"" +++ y = y
|
"" +++ y = y
|
||||||
x +++ "" = x
|
x +++ "" = x
|
||||||
x +++ y = x ++ fieldSep:y
|
x +++ y = x ++ fieldSep:y
|
||||||
c ?: (Just v) = c : show v
|
f ?: (Just v) = f : show v
|
||||||
_ ?: _ = ""
|
_ ?: _ = ""
|
||||||
|
|
||||||
file2key :: FilePath -> Maybe Key
|
file2key :: FilePath -> Maybe Key
|
||||||
|
@ -84,6 +88,13 @@ file2key s
|
||||||
addfield 'm' k v = do
|
addfield 'm' k v = do
|
||||||
mtime <- readish v
|
mtime <- readish v
|
||||||
return $ k { keyMtime = Just mtime }
|
return $ k { keyMtime = Just mtime }
|
||||||
|
addfield 'S' k v = do
|
||||||
|
chunksize <- readish v
|
||||||
|
return $ k { keyChunkSize = Just chunksize }
|
||||||
|
addfield 'C' k v = case readish v of
|
||||||
|
Just chunknum | chunknum > 0 ->
|
||||||
|
return $ k { keyChunkNum = Just chunknum }
|
||||||
|
_ -> return k
|
||||||
addfield _ _ _ = Nothing
|
addfield _ _ _ = Nothing
|
||||||
|
|
||||||
instance Arbitrary Key where
|
instance Arbitrary Key where
|
||||||
|
@ -92,6 +103,8 @@ instance Arbitrary Key where
|
||||||
<*> (listOf1 $ elements ['A'..'Z']) -- BACKEND
|
<*> (listOf1 $ elements ['A'..'Z']) -- BACKEND
|
||||||
<*> ((abs <$>) <$> arbitrary) -- size cannot be negative
|
<*> ((abs <$>) <$> arbitrary) -- size cannot be negative
|
||||||
<*> arbitrary
|
<*> arbitrary
|
||||||
|
<*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative
|
||||||
|
<*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative
|
||||||
|
|
||||||
prop_idempotent_key_encode :: Key -> Bool
|
prop_idempotent_key_encode :: Key -> Bool
|
||||||
prop_idempotent_key_encode k = Just k == (file2key . key2file) k
|
prop_idempotent_key_encode k = Just k == (file2key . key2file) k
|
||||||
|
@ -103,6 +116,6 @@ prop_idempotent_key_decode f
|
||||||
where
|
where
|
||||||
-- file2key will accept the fields in any order, so don't
|
-- file2key will accept the fields in any order, so don't
|
||||||
-- try the test unless the fields are in the normal order
|
-- try the test unless the fields are in the normal order
|
||||||
normalfieldorder = fields `isPrefixOf` "sm"
|
normalfieldorder = fields `isPrefixOf` "smSC"
|
||||||
fields = map (f !!) $ filter (< length f) $ map succ $
|
fields = map (f !!) $ filter (< length f) $ map succ $
|
||||||
elemIndices fieldSep f
|
elemIndices fieldSep f
|
||||||
|
|
|
@ -144,7 +144,7 @@ oldlog2key l
|
||||||
readKey1 :: String -> Key
|
readKey1 :: String -> Key
|
||||||
readKey1 v
|
readKey1 v
|
||||||
| mixup = fromJust $ file2key $ intercalate ":" $ Prelude.tail bits
|
| mixup = fromJust $ file2key $ intercalate ":" $ Prelude.tail bits
|
||||||
| otherwise = Key
|
| otherwise = stubKey
|
||||||
{ keyName = n
|
{ keyName = n
|
||||||
, keyBackendName = b
|
, keyBackendName = b
|
||||||
, keySize = s
|
, keySize = s
|
||||||
|
|
|
@ -104,7 +104,7 @@ Problem: Does not solve concurrent uploads with different chunk sizes.
|
||||||
|
|
||||||
When chunking is enabled, always put a chunk number in the Key,
|
When chunking is enabled, always put a chunk number in the Key,
|
||||||
along with the chunk size.
|
along with the chunk size.
|
||||||
So, SHA256-s10000-c1--xxxxxxx for the first chunk of 1 megabyte.
|
So, SHA256-1048576-c1--xxxxxxx for the first chunk of 1 megabyte.
|
||||||
|
|
||||||
Before any chunks are stored, write a chunkcount file, eg
|
Before any chunks are stored, write a chunkcount file, eg
|
||||||
SHA256-s12345-c0--xxxxxxx. Note that this key is the same as the original
|
SHA256-s12345-c0--xxxxxxx. Note that this key is the same as the original
|
||||||
|
@ -148,20 +148,24 @@ could lead to data loss. (Same as in design 2.)
|
||||||
|
|
||||||
# design 4
|
# design 4
|
||||||
|
|
||||||
Use key SHA256-s10000-c1--xxxxxxx for the first chunk of 1 megabyte.
|
Use key SHA256-s12345-S1048576-C1--xxxxxxx for the first chunk of 1 megabyte.
|
||||||
|
|
||||||
|
Note that keeping the 's'ize field unchanged is necessary because it
|
||||||
|
disambiguates eg, WORM keys. So a 'S'ize field is used to hold the chunk
|
||||||
|
size.
|
||||||
|
|
||||||
Instead of storing the chunk count in the special remote, store it in
|
Instead of storing the chunk count in the special remote, store it in
|
||||||
the git-annex branch.
|
the git-annex branch.
|
||||||
|
|
||||||
Look at git-annex:aaa/bbb/SHA256-s12345--xxxxxxx.log.cnk to get the
|
The location log does not record locations of individual chunk keys
|
||||||
chunk count and size. File format would be:
|
(too space-inneficient).
|
||||||
|
Instead, look at git-annex:aaa/bbb/SHA256-s12345--xxxxxxx.log.cnk to get
|
||||||
|
the chunk count and size for a key. File format would be:
|
||||||
|
|
||||||
ts uuid chunksize chunkcount 0|1
|
ts uuid chunksize chunkcount
|
||||||
|
|
||||||
Where a trailing 0 means that chunk size is no longer present on the
|
Where a chunkcount of 0 means that the object is not longer present in the
|
||||||
remote, and a trailing 1 means it is. For future expansion, any other
|
remote using the specified chunk size.
|
||||||
value /= "0" is also accepted, meaning the chunk is present. For example,
|
|
||||||
this could be used for [[deltas]], storing the checksums of the chunks.
|
|
||||||
|
|
||||||
Note that a given remote uuid might have multiple lines, if a key was
|
Note that a given remote uuid might have multiple lines, if a key was
|
||||||
stored on it twice using different chunk sizes. Also note that even when
|
stored on it twice using different chunk sizes. Also note that even when
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
A git-annex key has this format:
|
A git-annex key has this format:
|
||||||
|
|
||||||
BACKEND-sNNNN-mNNNN--NAME
|
BACKEND[-sNNNN][-mNNNN][-SNNNN-CNNNN]--NAME
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
|
@ -10,12 +10,15 @@ For example:
|
||||||
are always upper-cased.
|
are always upper-cased.
|
||||||
* The name field at the end has a format dependent on the backend. It is
|
* The name field at the end has a format dependent on the backend. It is
|
||||||
always the last field, and is prefixed with "--". Unlike other fields,
|
always the last field, and is prefixed with "--". Unlike other fields,
|
||||||
it may contain "-" in its content. It should not contain newline characters;
|
it may contain "-" in its content. It should not contain newline
|
||||||
otherwise nearly anything goes.
|
characters or "/"; otherwise nearly anything goes.
|
||||||
* The "-s" field is optional, and is the size of the content in bytes.
|
* The "-s" field is optional, and is the size of the content in bytes.
|
||||||
* The "-m" field is optional, and is the mtime of the file when it was
|
* The "-m" field is optional, and is the mtime of the file when it was
|
||||||
added to git-annex, expressed as seconds from the epoch.
|
added to git-annex, expressed as seconds from the epoch.
|
||||||
This is currently only used by the WORM backend.
|
This is currently only used by the WORM backend.
|
||||||
|
* The "-S" and "-C" fields are only used for keys that are chunks
|
||||||
|
of some other key. "-S" is the size of the chunk, and "-c" is the chunk
|
||||||
|
number (starting at 1).
|
||||||
* Other fields could be added in the future, if needed.
|
* Other fields could be added in the future, if needed.
|
||||||
|
|
||||||
git-annex always puts the fields in the order shown above when serializing
|
git-annex always puts the fields in the order shown above when serializing
|
||||||
|
|
Loading…
Reference in a new issue