Directory special remotes now support chunking files written to them
Avoiding writing files larger than a specified size is useful on certian things. For example, box.com has a file size limit of 100 mb. Could also be useful on really crappy removable media.
This commit is contained in:
parent
2841d748a4
commit
3436aba6de
3 changed files with 185 additions and 58 deletions
|
@ -1,6 +1,6 @@
|
|||
{- A "remote" that is just a filesystem directory.
|
||||
-
|
||||
- Copyright 2011 Joey Hess <joey@kitenet.net>
|
||||
- Copyright 2011-2012 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -19,6 +19,8 @@ import Utility.FileMode
|
|||
import Remote.Helper.Special
|
||||
import Remote.Helper.Encryptable
|
||||
import Crypto
|
||||
import Utility.DataUnits
|
||||
import Data.Int
|
||||
|
||||
remote :: RemoteType
|
||||
remote = RemoteType {
|
||||
|
@ -32,24 +34,39 @@ gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex Remote
|
|||
gen r u c = do
|
||||
dir <- getConfig r "directory" (error "missing directory")
|
||||
cst <- remoteCost r cheapRemoteCost
|
||||
let chunksize = chunkSize c
|
||||
return $ encryptableRemote c
|
||||
(storeEncrypted dir)
|
||||
(retrieveEncrypted dir)
|
||||
(storeEncrypted dir chunksize)
|
||||
(retrieveEncrypted dir chunksize)
|
||||
Remote {
|
||||
uuid = u,
|
||||
cost = cst,
|
||||
name = Git.repoDescribe r,
|
||||
storeKey = store dir,
|
||||
retrieveKeyFile = retrieve dir,
|
||||
retrieveKeyFileCheap = retrieveCheap dir,
|
||||
removeKey = remove dir,
|
||||
hasKey = checkPresent dir,
|
||||
storeKey = store dir chunksize,
|
||||
retrieveKeyFile = retrieve dir chunksize,
|
||||
retrieveKeyFileCheap = retrieveCheap dir chunksize,
|
||||
removeKey = remove dir chunksize,
|
||||
hasKey = checkPresent dir chunksize,
|
||||
hasKeyCheap = True,
|
||||
whereisKey = Nothing,
|
||||
config = Nothing,
|
||||
repo = r,
|
||||
remotetype = remote
|
||||
}
|
||||
where
|
||||
|
||||
type ChunkSize = Maybe Int64
|
||||
|
||||
chunkSize :: Maybe RemoteConfig -> ChunkSize
|
||||
chunkSize Nothing = Nothing
|
||||
chunkSize (Just m) =
|
||||
case M.lookup "chunksize" m of
|
||||
Nothing -> Nothing
|
||||
Just v -> case readSize dataUnits v of
|
||||
Nothing -> error "bad chunksize"
|
||||
Just size
|
||||
| size <= 0 -> error "bad chunksize"
|
||||
| otherwise -> Just $ fromInteger size
|
||||
|
||||
directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig
|
||||
directorySetup u c = do
|
||||
|
@ -69,69 +86,158 @@ directorySetup u c = do
|
|||
locations :: FilePath -> Key -> [FilePath]
|
||||
locations d k = map (d </>) (keyPaths k)
|
||||
|
||||
withCheckedFile :: (FilePath -> IO Bool) -> FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
|
||||
withCheckedFile _ [] _ _ = return False
|
||||
withCheckedFile check d k a = go $ locations d k
|
||||
{- An infinite stream of chunks to use for a given file. -}
|
||||
chunkStream :: FilePath -> [FilePath]
|
||||
chunkStream f = map tochunk [1 :: Integer ..]
|
||||
where
|
||||
tochunk n = f ++ ".chunk" ++ show n
|
||||
|
||||
{- A file that records the number of chunks used. -}
|
||||
chunkCount :: FilePath -> FilePath
|
||||
chunkCount f = f ++ ".chunkcount"
|
||||
|
||||
withCheckedFiles :: (FilePath -> IO Bool) -> ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
|
||||
withCheckedFiles _ _ [] _ _ = return False
|
||||
withCheckedFiles check Nothing d k a = go $ locations d k
|
||||
where
|
||||
go [] = return False
|
||||
go (f:fs) = do
|
||||
use <- check f
|
||||
if use
|
||||
then a f
|
||||
then a [f]
|
||||
else go fs
|
||||
|
||||
withStoredFile :: FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
|
||||
withStoredFile = withCheckedFile doesFileExist
|
||||
|
||||
store :: FilePath -> Key -> Annex Bool
|
||||
store d k = do
|
||||
src <- inRepo $ gitAnnexLocation k
|
||||
liftIO $ catchBoolIO $ storeHelper d k $ copyFileExternal src
|
||||
|
||||
storeEncrypted :: FilePath -> (Cipher, Key) -> Key -> Annex Bool
|
||||
storeEncrypted d (cipher, enck) k = do
|
||||
src <- inRepo $ gitAnnexLocation k
|
||||
liftIO $ catchBoolIO $ storeHelper d enck $ encrypt src
|
||||
withCheckedFiles check (Just _) d k a = go $ locations d k
|
||||
where
|
||||
encrypt src dest = do
|
||||
withEncryptedContent cipher (L.readFile src) $ L.writeFile dest
|
||||
return True
|
||||
go [] = return False
|
||||
go (f:fs) = do
|
||||
let chunkcount = chunkCount f
|
||||
use <- check chunkcount
|
||||
if use
|
||||
then do
|
||||
count <- readcount chunkcount
|
||||
let chunks = take count $ chunkStream f
|
||||
ok <- all id <$> mapM check chunks
|
||||
if ok
|
||||
then a chunks
|
||||
else return False
|
||||
else go fs
|
||||
readcount f = fromMaybe (error $ "cannot parse " ++ f)
|
||||
. (readish :: String -> Maybe Int)
|
||||
<$> readFile f
|
||||
|
||||
storeHelper :: FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
|
||||
storeHelper d key a = do
|
||||
let dest = Prelude.head $ locations d key
|
||||
let tmpdest = dest ++ ".tmp"
|
||||
let dir = parentDir dest
|
||||
withStoredFiles :: ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
|
||||
withStoredFiles = withCheckedFiles doesFileExist
|
||||
|
||||
store :: FilePath -> ChunkSize -> Key -> Annex Bool
|
||||
store d chunksize k = do
|
||||
src <- inRepo $ gitAnnexLocation k
|
||||
liftIO $ catchBoolIO $ storeHelper d chunksize k $ \dests ->
|
||||
case chunksize of
|
||||
Nothing -> do
|
||||
let dest = Prelude.head dests
|
||||
ok <- copyFileExternal src dest
|
||||
return $ if ok then [dest] else []
|
||||
Just _ -> storeSplit chunksize dests =<< L.readFile src
|
||||
|
||||
storeEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> Key -> Annex Bool
|
||||
storeEncrypted d chunksize (cipher, enck) k = do
|
||||
src <- inRepo $ gitAnnexLocation k
|
||||
liftIO $ catchBoolIO $ storeHelper d chunksize enck $ encrypt src
|
||||
where
|
||||
encrypt src dests = withEncryptedContent cipher (L.readFile src) $ \s ->
|
||||
case chunksize of
|
||||
Nothing -> do
|
||||
let dest = Prelude.head dests
|
||||
L.writeFile dest s
|
||||
return [dest]
|
||||
Just _ -> storeSplit chunksize dests s
|
||||
|
||||
{- Splits a ByteString into chunks and writes to dests.
|
||||
- Note: Must always write at least one file, even for empty ByteString. -}
|
||||
storeSplit :: ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath]
|
||||
storeSplit Nothing _ _ = error "bad storeSplit call"
|
||||
storeSplit _ [] _ = error "bad storeSplit call"
|
||||
storeSplit (Just chunksize) alldests@(firstdest:_) s
|
||||
| L.null s = do
|
||||
-- must always write at least one file, even for empty
|
||||
L.writeFile firstdest s
|
||||
return [firstdest]
|
||||
| otherwise = storeSplit' chunksize alldests s []
|
||||
storeSplit' :: Int64 -> [FilePath] -> L.ByteString -> [FilePath] -> IO [FilePath]
|
||||
storeSplit' _ [] _ _ = error "expected an infinite list"
|
||||
storeSplit' chunksize (d:dests) s c
|
||||
| L.null s = return $ reverse c
|
||||
| otherwise = do
|
||||
let (chunk, rest) = L.splitAt chunksize s
|
||||
L.writeFile d chunk
|
||||
storeSplit' chunksize dests rest (d:c)
|
||||
|
||||
{- Generates a list of destinations to write to in order to store a key.
|
||||
- When chunksize is specified, this list will be a list of chunks.
|
||||
- The action should store the file, and return a list of the destinations
|
||||
- it stored it to, or [] on error.
|
||||
- The stored files are only put into their final place once storage is
|
||||
- complete.
|
||||
-}
|
||||
storeHelper :: FilePath -> ChunkSize -> Key -> ([FilePath] -> IO [FilePath]) -> IO Bool
|
||||
storeHelper d chunksize key a = do
|
||||
let dir = parentDir desttemplate
|
||||
createDirectoryIfMissing True dir
|
||||
allowWrite dir
|
||||
ok <- a tmpdest
|
||||
when ok $ do
|
||||
renameFile tmpdest dest
|
||||
stored <- a tmpdests
|
||||
forM_ stored $ \f -> do
|
||||
let dest = detmpprefix f
|
||||
renameFile f dest
|
||||
preventWrite dest
|
||||
when (chunksize /= Nothing) $ do
|
||||
let chunkcount = chunkCount desttemplate
|
||||
_ <- tryIO $ allowWrite chunkcount
|
||||
writeFile chunkcount (show $ length stored)
|
||||
preventWrite chunkcount
|
||||
preventWrite dir
|
||||
return ok
|
||||
return (not $ null stored)
|
||||
where
|
||||
desttemplate = Prelude.head $ locations d key
|
||||
tmpdests = case chunksize of
|
||||
Nothing -> [desttemplate ++ tmpprefix]
|
||||
Just _ -> map (++ tmpprefix) (chunkStream desttemplate)
|
||||
tmpprefix = ".tmp"
|
||||
detmpprefix f = take (length f - tmpprefixlen) f
|
||||
tmpprefixlen = length tmpprefix
|
||||
|
||||
retrieve :: FilePath -> Key -> FilePath -> Annex Bool
|
||||
retrieve d k f = liftIO $ withStoredFile d k $ \file -> copyFileExternal file f
|
||||
|
||||
retrieveCheap :: FilePath -> Key -> FilePath -> Annex Bool
|
||||
retrieveCheap d k f = liftIO $ withStoredFile d k $ \file ->
|
||||
catchBoolIO $ createSymbolicLink file f >> return True
|
||||
|
||||
retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool
|
||||
retrieveEncrypted d (cipher, enck) f =
|
||||
liftIO $ withStoredFile d enck $ \file -> catchBoolIO $ do
|
||||
withDecryptedContent cipher (L.readFile file) $ L.writeFile f
|
||||
retrieve :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
|
||||
retrieve d chunksize k f = liftIO $ withStoredFiles chunksize d k go
|
||||
where
|
||||
go [file] = copyFileExternal file f
|
||||
go files = catchBoolIO $ do
|
||||
L.writeFile f =<< (L.concat <$> mapM L.readFile files)
|
||||
return True
|
||||
|
||||
remove :: FilePath -> Key -> Annex Bool
|
||||
remove d k = liftIO $ withStoredFile d k $ \file -> catchBoolIO $ do
|
||||
retrieveEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> FilePath -> Annex Bool
|
||||
retrieveEncrypted d chunksize (cipher, enck) f =
|
||||
liftIO $ withStoredFiles chunksize d enck $ \files -> catchBoolIO $ do
|
||||
withDecryptedContent cipher (L.concat <$> mapM L.readFile files) $
|
||||
L.writeFile f
|
||||
return True
|
||||
|
||||
retrieveCheap :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
|
||||
retrieveCheap _ (Just _) _ _ = return False -- no cheap retrieval for chunks
|
||||
retrieveCheap d _ k f = liftIO $ withStoredFiles Nothing d k go
|
||||
where
|
||||
go [file] = catchBoolIO $ createSymbolicLink file f >> return True
|
||||
go _files = return False
|
||||
|
||||
remove :: FilePath -> ChunkSize -> Key -> Annex Bool
|
||||
remove d chunksize k = liftIO $ withStoredFiles chunksize d k go
|
||||
where
|
||||
go files = all id <$> mapM removefile files
|
||||
removefile file = catchBoolIO $ do
|
||||
let dir = parentDir file
|
||||
allowWrite dir
|
||||
removeFile file
|
||||
removeDirectory dir
|
||||
_ <- tryIO $ removeDirectory dir
|
||||
return True
|
||||
|
||||
checkPresent :: FilePath -> Key -> Annex (Either String Bool)
|
||||
checkPresent d k = liftIO $ catchMsgIO $ withStoredFile d k $
|
||||
const $ return True -- withStoredFile checked that it exists
|
||||
checkPresent :: FilePath -> ChunkSize -> Key -> Annex (Either String Bool)
|
||||
checkPresent d chunksize k = liftIO $ catchMsgIO $ withStoredFiles chunksize d k $
|
||||
const $ return True -- withStoredFiles checked that it exists
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -2,6 +2,8 @@ git-annex (3.20120230) UNRELEASED; urgency=low
|
|||
|
||||
* "here" can be used to refer to the current repository,
|
||||
which can read better than the old "." (which still works too).
|
||||
* Directory special remotes now support chunking files written to them,
|
||||
avoiding writing files larger than a specified size.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Thu, 01 Mar 2012 22:34:27 -0400
|
||||
|
||||
|
|
|
@ -5,6 +5,25 @@ you want to use it to sneakernet files between systems (possibly with
|
|||
[[encrypted|encryption]] contents). Just set up both systems to use
|
||||
the drive's mountpoint as a directory remote.
|
||||
|
||||
## configuration
|
||||
|
||||
These parameters can be passed to `git annex initremote` to configure the
|
||||
remote:
|
||||
|
||||
* `encryption` - Required. Either "none" to disable encryption of content
|
||||
stored in the directory,
|
||||
or a value that can be looked up (using gpg -k) to find a gpg encryption
|
||||
key that will be given access to the remote. Note that additional gpg
|
||||
keys can be given access to a remote by rerunning initremote with
|
||||
the new key id. See [[encryption]].
|
||||
* `chunksize` - Avoid storing files larger than the specified size in the
|
||||
directory. For use on directories on mount points that have file size
|
||||
limitations. The default is to never chunk files.
|
||||
Note that changing the chunk size of an existing remote is
|
||||
not recommended.
|
||||
The value can use specified using any commonly used units.
|
||||
Example: `chunksize=100 megabytes`
|
||||
|
||||
Setup example:
|
||||
|
||||
# git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none
|
||||
|
|
Loading…
Reference in a new issue