Directory special remotes now support chunking files written to them

Avoiding writing files larger than a specified size is useful on certian
things. For example, box.com has a file size limit of 100 mb. Could also
be useful on really crappy removable media.
This commit is contained in:
Joey Hess 2012-03-03 18:05:55 -04:00
parent 2841d748a4
commit 3436aba6de
3 changed files with 185 additions and 58 deletions

View file

@ -1,6 +1,6 @@
{- A "remote" that is just a filesystem directory.
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
- Copyright 2011-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -19,6 +19,8 @@ import Utility.FileMode
import Remote.Helper.Special
import Remote.Helper.Encryptable
import Crypto
import Utility.DataUnits
import Data.Int
remote :: RemoteType
remote = RemoteType {
@ -32,24 +34,39 @@ gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex Remote
gen r u c = do
dir <- getConfig r "directory" (error "missing directory")
cst <- remoteCost r cheapRemoteCost
let chunksize = chunkSize c
return $ encryptableRemote c
(storeEncrypted dir)
(retrieveEncrypted dir)
(storeEncrypted dir chunksize)
(retrieveEncrypted dir chunksize)
Remote {
uuid = u,
cost = cst,
name = Git.repoDescribe r,
storeKey = store dir,
retrieveKeyFile = retrieve dir,
retrieveKeyFileCheap = retrieveCheap dir,
removeKey = remove dir,
hasKey = checkPresent dir,
storeKey = store dir chunksize,
retrieveKeyFile = retrieve dir chunksize,
retrieveKeyFileCheap = retrieveCheap dir chunksize,
removeKey = remove dir chunksize,
hasKey = checkPresent dir chunksize,
hasKeyCheap = True,
whereisKey = Nothing,
config = Nothing,
repo = r,
remotetype = remote
}
where
type ChunkSize = Maybe Int64
chunkSize :: Maybe RemoteConfig -> ChunkSize
chunkSize Nothing = Nothing
chunkSize (Just m) =
case M.lookup "chunksize" m of
Nothing -> Nothing
Just v -> case readSize dataUnits v of
Nothing -> error "bad chunksize"
Just size
| size <= 0 -> error "bad chunksize"
| otherwise -> Just $ fromInteger size
directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig
directorySetup u c = do
@ -69,69 +86,158 @@ directorySetup u c = do
locations :: FilePath -> Key -> [FilePath]
locations d k = map (d </>) (keyPaths k)
withCheckedFile :: (FilePath -> IO Bool) -> FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
withCheckedFile _ [] _ _ = return False
withCheckedFile check d k a = go $ locations d k
{- An infinite stream of chunks to use for a given file. -}
chunkStream :: FilePath -> [FilePath]
chunkStream f = map tochunk [1 :: Integer ..]
where
tochunk n = f ++ ".chunk" ++ show n
{- A file that records the number of chunks used. -}
chunkCount :: FilePath -> FilePath
chunkCount f = f ++ ".chunkcount"
withCheckedFiles :: (FilePath -> IO Bool) -> ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withCheckedFiles _ _ [] _ _ = return False
withCheckedFiles check Nothing d k a = go $ locations d k
where
go [] = return False
go (f:fs) = do
use <- check f
if use
then a f
then a [f]
else go fs
withStoredFile :: FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
withStoredFile = withCheckedFile doesFileExist
store :: FilePath -> Key -> Annex Bool
store d k = do
src <- inRepo $ gitAnnexLocation k
liftIO $ catchBoolIO $ storeHelper d k $ copyFileExternal src
storeEncrypted :: FilePath -> (Cipher, Key) -> Key -> Annex Bool
storeEncrypted d (cipher, enck) k = do
src <- inRepo $ gitAnnexLocation k
liftIO $ catchBoolIO $ storeHelper d enck $ encrypt src
withCheckedFiles check (Just _) d k a = go $ locations d k
where
encrypt src dest = do
withEncryptedContent cipher (L.readFile src) $ L.writeFile dest
return True
go [] = return False
go (f:fs) = do
let chunkcount = chunkCount f
use <- check chunkcount
if use
then do
count <- readcount chunkcount
let chunks = take count $ chunkStream f
ok <- all id <$> mapM check chunks
if ok
then a chunks
else return False
else go fs
readcount f = fromMaybe (error $ "cannot parse " ++ f)
. (readish :: String -> Maybe Int)
<$> readFile f
storeHelper :: FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
storeHelper d key a = do
let dest = Prelude.head $ locations d key
let tmpdest = dest ++ ".tmp"
let dir = parentDir dest
withStoredFiles :: ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withStoredFiles = withCheckedFiles doesFileExist
store :: FilePath -> ChunkSize -> Key -> Annex Bool
store d chunksize k = do
src <- inRepo $ gitAnnexLocation k
liftIO $ catchBoolIO $ storeHelper d chunksize k $ \dests ->
case chunksize of
Nothing -> do
let dest = Prelude.head dests
ok <- copyFileExternal src dest
return $ if ok then [dest] else []
Just _ -> storeSplit chunksize dests =<< L.readFile src
storeEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> Key -> Annex Bool
storeEncrypted d chunksize (cipher, enck) k = do
src <- inRepo $ gitAnnexLocation k
liftIO $ catchBoolIO $ storeHelper d chunksize enck $ encrypt src
where
encrypt src dests = withEncryptedContent cipher (L.readFile src) $ \s ->
case chunksize of
Nothing -> do
let dest = Prelude.head dests
L.writeFile dest s
return [dest]
Just _ -> storeSplit chunksize dests s
{- Splits a ByteString into chunks and writes to dests.
- Note: Must always write at least one file, even for empty ByteString. -}
storeSplit :: ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath]
storeSplit Nothing _ _ = error "bad storeSplit call"
storeSplit _ [] _ = error "bad storeSplit call"
storeSplit (Just chunksize) alldests@(firstdest:_) s
| L.null s = do
-- must always write at least one file, even for empty
L.writeFile firstdest s
return [firstdest]
| otherwise = storeSplit' chunksize alldests s []
storeSplit' :: Int64 -> [FilePath] -> L.ByteString -> [FilePath] -> IO [FilePath]
storeSplit' _ [] _ _ = error "expected an infinite list"
storeSplit' chunksize (d:dests) s c
| L.null s = return $ reverse c
| otherwise = do
let (chunk, rest) = L.splitAt chunksize s
L.writeFile d chunk
storeSplit' chunksize dests rest (d:c)
{- Generates a list of destinations to write to in order to store a key.
- When chunksize is specified, this list will be a list of chunks.
- The action should store the file, and return a list of the destinations
- it stored it to, or [] on error.
- The stored files are only put into their final place once storage is
- complete.
-}
storeHelper :: FilePath -> ChunkSize -> Key -> ([FilePath] -> IO [FilePath]) -> IO Bool
storeHelper d chunksize key a = do
let dir = parentDir desttemplate
createDirectoryIfMissing True dir
allowWrite dir
ok <- a tmpdest
when ok $ do
renameFile tmpdest dest
stored <- a tmpdests
forM_ stored $ \f -> do
let dest = detmpprefix f
renameFile f dest
preventWrite dest
when (chunksize /= Nothing) $ do
let chunkcount = chunkCount desttemplate
_ <- tryIO $ allowWrite chunkcount
writeFile chunkcount (show $ length stored)
preventWrite chunkcount
preventWrite dir
return ok
return (not $ null stored)
where
desttemplate = Prelude.head $ locations d key
tmpdests = case chunksize of
Nothing -> [desttemplate ++ tmpprefix]
Just _ -> map (++ tmpprefix) (chunkStream desttemplate)
tmpprefix = ".tmp"
detmpprefix f = take (length f - tmpprefixlen) f
tmpprefixlen = length tmpprefix
retrieve :: FilePath -> Key -> FilePath -> Annex Bool
retrieve d k f = liftIO $ withStoredFile d k $ \file -> copyFileExternal file f
retrieveCheap :: FilePath -> Key -> FilePath -> Annex Bool
retrieveCheap d k f = liftIO $ withStoredFile d k $ \file ->
catchBoolIO $ createSymbolicLink file f >> return True
retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted d (cipher, enck) f =
liftIO $ withStoredFile d enck $ \file -> catchBoolIO $ do
withDecryptedContent cipher (L.readFile file) $ L.writeFile f
retrieve :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
retrieve d chunksize k f = liftIO $ withStoredFiles chunksize d k go
where
go [file] = copyFileExternal file f
go files = catchBoolIO $ do
L.writeFile f =<< (L.concat <$> mapM L.readFile files)
return True
remove :: FilePath -> Key -> Annex Bool
remove d k = liftIO $ withStoredFile d k $ \file -> catchBoolIO $ do
retrieveEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted d chunksize (cipher, enck) f =
liftIO $ withStoredFiles chunksize d enck $ \files -> catchBoolIO $ do
withDecryptedContent cipher (L.concat <$> mapM L.readFile files) $
L.writeFile f
return True
retrieveCheap :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
retrieveCheap _ (Just _) _ _ = return False -- no cheap retrieval for chunks
retrieveCheap d _ k f = liftIO $ withStoredFiles Nothing d k go
where
go [file] = catchBoolIO $ createSymbolicLink file f >> return True
go _files = return False
remove :: FilePath -> ChunkSize -> Key -> Annex Bool
remove d chunksize k = liftIO $ withStoredFiles chunksize d k go
where
go files = all id <$> mapM removefile files
removefile file = catchBoolIO $ do
let dir = parentDir file
allowWrite dir
removeFile file
removeDirectory dir
_ <- tryIO $ removeDirectory dir
return True
checkPresent :: FilePath -> Key -> Annex (Either String Bool)
checkPresent d k = liftIO $ catchMsgIO $ withStoredFile d k $
const $ return True -- withStoredFile checked that it exists
checkPresent :: FilePath -> ChunkSize -> Key -> Annex (Either String Bool)
checkPresent d chunksize k = liftIO $ catchMsgIO $ withStoredFiles chunksize d k $
const $ return True -- withStoredFiles checked that it exists

2
debian/changelog vendored
View file

@ -2,6 +2,8 @@ git-annex (3.20120230) UNRELEASED; urgency=low
* "here" can be used to refer to the current repository,
which can read better than the old "." (which still works too).
* Directory special remotes now support chunking files written to them,
avoiding writing files larger than a specified size.
-- Joey Hess <joeyh@debian.org> Thu, 01 Mar 2012 22:34:27 -0400

View file

@ -5,6 +5,25 @@ you want to use it to sneakernet files between systems (possibly with
[[encrypted|encryption]] contents). Just set up both systems to use
the drive's mountpoint as a directory remote.
## configuration
These parameters can be passed to `git annex initremote` to configure the
remote:
* `encryption` - Required. Either "none" to disable encryption of content
stored in the directory,
or a value that can be looked up (using gpg -k) to find a gpg encryption
key that will be given access to the remote. Note that additional gpg
keys can be given access to a remote by rerunning initremote with
the new key id. See [[encryption]].
* `chunksize` - Avoid storing files larger than the specified size in the
directory. For use on directories on mount points that have file size
limitations. The default is to never chunk files.
Note that changing the chunk size of an existing remote is
not recommended.
The value can use specified using any commonly used units.
Example: `chunksize=100 megabytes`
Setup example:
# git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none