Directory special remotes now support chunking files written to them

Avoiding writing files larger than a specified size is useful on certian
things. For example, box.com has a file size limit of 100 mb. Could also
be useful on really crappy removable media.
This commit is contained in:
Joey Hess 2012-03-03 18:05:55 -04:00
parent 2841d748a4
commit 3436aba6de
3 changed files with 185 additions and 58 deletions

View file

@ -1,6 +1,6 @@
{- A "remote" that is just a filesystem directory. {- A "remote" that is just a filesystem directory.
- -
- Copyright 2011 Joey Hess <joey@kitenet.net> - Copyright 2011-2012 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -19,6 +19,8 @@ import Utility.FileMode
import Remote.Helper.Special import Remote.Helper.Special
import Remote.Helper.Encryptable import Remote.Helper.Encryptable
import Crypto import Crypto
import Utility.DataUnits
import Data.Int
remote :: RemoteType remote :: RemoteType
remote = RemoteType { remote = RemoteType {
@ -32,24 +34,39 @@ gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex Remote
gen r u c = do gen r u c = do
dir <- getConfig r "directory" (error "missing directory") dir <- getConfig r "directory" (error "missing directory")
cst <- remoteCost r cheapRemoteCost cst <- remoteCost r cheapRemoteCost
let chunksize = chunkSize c
return $ encryptableRemote c return $ encryptableRemote c
(storeEncrypted dir) (storeEncrypted dir chunksize)
(retrieveEncrypted dir) (retrieveEncrypted dir chunksize)
Remote { Remote {
uuid = u, uuid = u,
cost = cst, cost = cst,
name = Git.repoDescribe r, name = Git.repoDescribe r,
storeKey = store dir, storeKey = store dir chunksize,
retrieveKeyFile = retrieve dir, retrieveKeyFile = retrieve dir chunksize,
retrieveKeyFileCheap = retrieveCheap dir, retrieveKeyFileCheap = retrieveCheap dir chunksize,
removeKey = remove dir, removeKey = remove dir chunksize,
hasKey = checkPresent dir, hasKey = checkPresent dir chunksize,
hasKeyCheap = True, hasKeyCheap = True,
whereisKey = Nothing, whereisKey = Nothing,
config = Nothing, config = Nothing,
repo = r, repo = r,
remotetype = remote remotetype = remote
} }
where
type ChunkSize = Maybe Int64
chunkSize :: Maybe RemoteConfig -> ChunkSize
chunkSize Nothing = Nothing
chunkSize (Just m) =
case M.lookup "chunksize" m of
Nothing -> Nothing
Just v -> case readSize dataUnits v of
Nothing -> error "bad chunksize"
Just size
| size <= 0 -> error "bad chunksize"
| otherwise -> Just $ fromInteger size
directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig
directorySetup u c = do directorySetup u c = do
@ -69,69 +86,158 @@ directorySetup u c = do
locations :: FilePath -> Key -> [FilePath] locations :: FilePath -> Key -> [FilePath]
locations d k = map (d </>) (keyPaths k) locations d k = map (d </>) (keyPaths k)
withCheckedFile :: (FilePath -> IO Bool) -> FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool {- An infinite stream of chunks to use for a given file. -}
withCheckedFile _ [] _ _ = return False chunkStream :: FilePath -> [FilePath]
withCheckedFile check d k a = go $ locations d k chunkStream f = map tochunk [1 :: Integer ..]
where
tochunk n = f ++ ".chunk" ++ show n
{- A file that records the number of chunks used. -}
chunkCount :: FilePath -> FilePath
chunkCount f = f ++ ".chunkcount"
withCheckedFiles :: (FilePath -> IO Bool) -> ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withCheckedFiles _ _ [] _ _ = return False
withCheckedFiles check Nothing d k a = go $ locations d k
where where
go [] = return False go [] = return False
go (f:fs) = do go (f:fs) = do
use <- check f use <- check f
if use if use
then a f then a [f]
else go fs else go fs
withCheckedFiles check (Just _) d k a = go $ locations d k
withStoredFile :: FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
withStoredFile = withCheckedFile doesFileExist
store :: FilePath -> Key -> Annex Bool
store d k = do
src <- inRepo $ gitAnnexLocation k
liftIO $ catchBoolIO $ storeHelper d k $ copyFileExternal src
storeEncrypted :: FilePath -> (Cipher, Key) -> Key -> Annex Bool
storeEncrypted d (cipher, enck) k = do
src <- inRepo $ gitAnnexLocation k
liftIO $ catchBoolIO $ storeHelper d enck $ encrypt src
where where
encrypt src dest = do go [] = return False
withEncryptedContent cipher (L.readFile src) $ L.writeFile dest go (f:fs) = do
return True let chunkcount = chunkCount f
use <- check chunkcount
if use
then do
count <- readcount chunkcount
let chunks = take count $ chunkStream f
ok <- all id <$> mapM check chunks
if ok
then a chunks
else return False
else go fs
readcount f = fromMaybe (error $ "cannot parse " ++ f)
. (readish :: String -> Maybe Int)
<$> readFile f
storeHelper :: FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool withStoredFiles :: ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
storeHelper d key a = do withStoredFiles = withCheckedFiles doesFileExist
let dest = Prelude.head $ locations d key
let tmpdest = dest ++ ".tmp" store :: FilePath -> ChunkSize -> Key -> Annex Bool
let dir = parentDir dest store d chunksize k = do
src <- inRepo $ gitAnnexLocation k
liftIO $ catchBoolIO $ storeHelper d chunksize k $ \dests ->
case chunksize of
Nothing -> do
let dest = Prelude.head dests
ok <- copyFileExternal src dest
return $ if ok then [dest] else []
Just _ -> storeSplit chunksize dests =<< L.readFile src
storeEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> Key -> Annex Bool
storeEncrypted d chunksize (cipher, enck) k = do
src <- inRepo $ gitAnnexLocation k
liftIO $ catchBoolIO $ storeHelper d chunksize enck $ encrypt src
where
encrypt src dests = withEncryptedContent cipher (L.readFile src) $ \s ->
case chunksize of
Nothing -> do
let dest = Prelude.head dests
L.writeFile dest s
return [dest]
Just _ -> storeSplit chunksize dests s
{- Splits a ByteString into chunks and writes to dests.
- Note: Must always write at least one file, even for empty ByteString. -}
storeSplit :: ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath]
storeSplit Nothing _ _ = error "bad storeSplit call"
storeSplit _ [] _ = error "bad storeSplit call"
storeSplit (Just chunksize) alldests@(firstdest:_) s
| L.null s = do
-- must always write at least one file, even for empty
L.writeFile firstdest s
return [firstdest]
| otherwise = storeSplit' chunksize alldests s []
storeSplit' :: Int64 -> [FilePath] -> L.ByteString -> [FilePath] -> IO [FilePath]
storeSplit' _ [] _ _ = error "expected an infinite list"
storeSplit' chunksize (d:dests) s c
| L.null s = return $ reverse c
| otherwise = do
let (chunk, rest) = L.splitAt chunksize s
L.writeFile d chunk
storeSplit' chunksize dests rest (d:c)
{- Generates a list of destinations to write to in order to store a key.
- When chunksize is specified, this list will be a list of chunks.
- The action should store the file, and return a list of the destinations
- it stored it to, or [] on error.
- The stored files are only put into their final place once storage is
- complete.
-}
storeHelper :: FilePath -> ChunkSize -> Key -> ([FilePath] -> IO [FilePath]) -> IO Bool
storeHelper d chunksize key a = do
let dir = parentDir desttemplate
createDirectoryIfMissing True dir createDirectoryIfMissing True dir
allowWrite dir allowWrite dir
ok <- a tmpdest stored <- a tmpdests
when ok $ do forM_ stored $ \f -> do
renameFile tmpdest dest let dest = detmpprefix f
renameFile f dest
preventWrite dest preventWrite dest
preventWrite dir when (chunksize /= Nothing) $ do
return ok let chunkcount = chunkCount desttemplate
_ <- tryIO $ allowWrite chunkcount
writeFile chunkcount (show $ length stored)
preventWrite chunkcount
preventWrite dir
return (not $ null stored)
where
desttemplate = Prelude.head $ locations d key
tmpdests = case chunksize of
Nothing -> [desttemplate ++ tmpprefix]
Just _ -> map (++ tmpprefix) (chunkStream desttemplate)
tmpprefix = ".tmp"
detmpprefix f = take (length f - tmpprefixlen) f
tmpprefixlen = length tmpprefix
retrieve :: FilePath -> Key -> FilePath -> Annex Bool retrieve :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
retrieve d k f = liftIO $ withStoredFile d k $ \file -> copyFileExternal file f retrieve d chunksize k f = liftIO $ withStoredFiles chunksize d k go
where
go [file] = copyFileExternal file f
go files = catchBoolIO $ do
L.writeFile f =<< (L.concat <$> mapM L.readFile files)
return True
retrieveCheap :: FilePath -> Key -> FilePath -> Annex Bool retrieveEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveCheap d k f = liftIO $ withStoredFile d k $ \file -> retrieveEncrypted d chunksize (cipher, enck) f =
catchBoolIO $ createSymbolicLink file f >> return True liftIO $ withStoredFiles chunksize d enck $ \files -> catchBoolIO $ do
withDecryptedContent cipher (L.concat <$> mapM L.readFile files) $
retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool L.writeFile f
retrieveEncrypted d (cipher, enck) f =
liftIO $ withStoredFile d enck $ \file -> catchBoolIO $ do
withDecryptedContent cipher (L.readFile file) $ L.writeFile f
return True return True
remove :: FilePath -> Key -> Annex Bool retrieveCheap :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
remove d k = liftIO $ withStoredFile d k $ \file -> catchBoolIO $ do retrieveCheap _ (Just _) _ _ = return False -- no cheap retrieval for chunks
let dir = parentDir file retrieveCheap d _ k f = liftIO $ withStoredFiles Nothing d k go
allowWrite dir where
removeFile file go [file] = catchBoolIO $ createSymbolicLink file f >> return True
removeDirectory dir go _files = return False
return True
checkPresent :: FilePath -> Key -> Annex (Either String Bool) remove :: FilePath -> ChunkSize -> Key -> Annex Bool
checkPresent d k = liftIO $ catchMsgIO $ withStoredFile d k $ remove d chunksize k = liftIO $ withStoredFiles chunksize d k go
const $ return True -- withStoredFile checked that it exists where
go files = all id <$> mapM removefile files
removefile file = catchBoolIO $ do
let dir = parentDir file
allowWrite dir
removeFile file
_ <- tryIO $ removeDirectory dir
return True
checkPresent :: FilePath -> ChunkSize -> Key -> Annex (Either String Bool)
checkPresent d chunksize k = liftIO $ catchMsgIO $ withStoredFiles chunksize d k $
const $ return True -- withStoredFiles checked that it exists

2
debian/changelog vendored
View file

@ -2,6 +2,8 @@ git-annex (3.20120230) UNRELEASED; urgency=low
* "here" can be used to refer to the current repository, * "here" can be used to refer to the current repository,
which can read better than the old "." (which still works too). which can read better than the old "." (which still works too).
* Directory special remotes now support chunking files written to them,
avoiding writing files larger than a specified size.
-- Joey Hess <joeyh@debian.org> Thu, 01 Mar 2012 22:34:27 -0400 -- Joey Hess <joeyh@debian.org> Thu, 01 Mar 2012 22:34:27 -0400

View file

@ -5,6 +5,25 @@ you want to use it to sneakernet files between systems (possibly with
[[encrypted|encryption]] contents). Just set up both systems to use [[encrypted|encryption]] contents). Just set up both systems to use
the drive's mountpoint as a directory remote. the drive's mountpoint as a directory remote.
## configuration
These parameters can be passed to `git annex initremote` to configure the
remote:
* `encryption` - Required. Either "none" to disable encryption of content
stored in the directory,
or a value that can be looked up (using gpg -k) to find a gpg encryption
key that will be given access to the remote. Note that additional gpg
keys can be given access to a remote by rerunning initremote with
the new key id. See [[encryption]].
* `chunksize` - Avoid storing files larger than the specified size in the
directory. For use on directories on mount points that have file size
limitations. The default is to never chunk files.
Note that changing the chunk size of an existing remote is
not recommended.
The value can use specified using any commonly used units.
Example: `chunksize=100 megabytes`
Setup example: Setup example:
# git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none # git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none