Directory special remotes now support chunking files written to them
Avoiding writing files larger than a specified size is useful on certian things. For example, box.com has a file size limit of 100 mb. Could also be useful on really crappy removable media.
This commit is contained in:
parent
2841d748a4
commit
3436aba6de
3 changed files with 185 additions and 58 deletions
|
@ -1,6 +1,6 @@
|
||||||
{- A "remote" that is just a filesystem directory.
|
{- A "remote" that is just a filesystem directory.
|
||||||
-
|
-
|
||||||
- Copyright 2011 Joey Hess <joey@kitenet.net>
|
- Copyright 2011-2012 Joey Hess <joey@kitenet.net>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -19,6 +19,8 @@ import Utility.FileMode
|
||||||
import Remote.Helper.Special
|
import Remote.Helper.Special
|
||||||
import Remote.Helper.Encryptable
|
import Remote.Helper.Encryptable
|
||||||
import Crypto
|
import Crypto
|
||||||
|
import Utility.DataUnits
|
||||||
|
import Data.Int
|
||||||
|
|
||||||
remote :: RemoteType
|
remote :: RemoteType
|
||||||
remote = RemoteType {
|
remote = RemoteType {
|
||||||
|
@ -32,24 +34,39 @@ gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex Remote
|
||||||
gen r u c = do
|
gen r u c = do
|
||||||
dir <- getConfig r "directory" (error "missing directory")
|
dir <- getConfig r "directory" (error "missing directory")
|
||||||
cst <- remoteCost r cheapRemoteCost
|
cst <- remoteCost r cheapRemoteCost
|
||||||
|
let chunksize = chunkSize c
|
||||||
return $ encryptableRemote c
|
return $ encryptableRemote c
|
||||||
(storeEncrypted dir)
|
(storeEncrypted dir chunksize)
|
||||||
(retrieveEncrypted dir)
|
(retrieveEncrypted dir chunksize)
|
||||||
Remote {
|
Remote {
|
||||||
uuid = u,
|
uuid = u,
|
||||||
cost = cst,
|
cost = cst,
|
||||||
name = Git.repoDescribe r,
|
name = Git.repoDescribe r,
|
||||||
storeKey = store dir,
|
storeKey = store dir chunksize,
|
||||||
retrieveKeyFile = retrieve dir,
|
retrieveKeyFile = retrieve dir chunksize,
|
||||||
retrieveKeyFileCheap = retrieveCheap dir,
|
retrieveKeyFileCheap = retrieveCheap dir chunksize,
|
||||||
removeKey = remove dir,
|
removeKey = remove dir chunksize,
|
||||||
hasKey = checkPresent dir,
|
hasKey = checkPresent dir chunksize,
|
||||||
hasKeyCheap = True,
|
hasKeyCheap = True,
|
||||||
whereisKey = Nothing,
|
whereisKey = Nothing,
|
||||||
config = Nothing,
|
config = Nothing,
|
||||||
repo = r,
|
repo = r,
|
||||||
remotetype = remote
|
remotetype = remote
|
||||||
}
|
}
|
||||||
|
where
|
||||||
|
|
||||||
|
type ChunkSize = Maybe Int64
|
||||||
|
|
||||||
|
chunkSize :: Maybe RemoteConfig -> ChunkSize
|
||||||
|
chunkSize Nothing = Nothing
|
||||||
|
chunkSize (Just m) =
|
||||||
|
case M.lookup "chunksize" m of
|
||||||
|
Nothing -> Nothing
|
||||||
|
Just v -> case readSize dataUnits v of
|
||||||
|
Nothing -> error "bad chunksize"
|
||||||
|
Just size
|
||||||
|
| size <= 0 -> error "bad chunksize"
|
||||||
|
| otherwise -> Just $ fromInteger size
|
||||||
|
|
||||||
directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig
|
directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig
|
||||||
directorySetup u c = do
|
directorySetup u c = do
|
||||||
|
@ -69,69 +86,158 @@ directorySetup u c = do
|
||||||
locations :: FilePath -> Key -> [FilePath]
|
locations :: FilePath -> Key -> [FilePath]
|
||||||
locations d k = map (d </>) (keyPaths k)
|
locations d k = map (d </>) (keyPaths k)
|
||||||
|
|
||||||
withCheckedFile :: (FilePath -> IO Bool) -> FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
|
{- An infinite stream of chunks to use for a given file. -}
|
||||||
withCheckedFile _ [] _ _ = return False
|
chunkStream :: FilePath -> [FilePath]
|
||||||
withCheckedFile check d k a = go $ locations d k
|
chunkStream f = map tochunk [1 :: Integer ..]
|
||||||
|
where
|
||||||
|
tochunk n = f ++ ".chunk" ++ show n
|
||||||
|
|
||||||
|
{- A file that records the number of chunks used. -}
|
||||||
|
chunkCount :: FilePath -> FilePath
|
||||||
|
chunkCount f = f ++ ".chunkcount"
|
||||||
|
|
||||||
|
withCheckedFiles :: (FilePath -> IO Bool) -> ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
|
||||||
|
withCheckedFiles _ _ [] _ _ = return False
|
||||||
|
withCheckedFiles check Nothing d k a = go $ locations d k
|
||||||
where
|
where
|
||||||
go [] = return False
|
go [] = return False
|
||||||
go (f:fs) = do
|
go (f:fs) = do
|
||||||
use <- check f
|
use <- check f
|
||||||
if use
|
if use
|
||||||
then a f
|
then a [f]
|
||||||
else go fs
|
else go fs
|
||||||
|
withCheckedFiles check (Just _) d k a = go $ locations d k
|
||||||
withStoredFile :: FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
|
|
||||||
withStoredFile = withCheckedFile doesFileExist
|
|
||||||
|
|
||||||
store :: FilePath -> Key -> Annex Bool
|
|
||||||
store d k = do
|
|
||||||
src <- inRepo $ gitAnnexLocation k
|
|
||||||
liftIO $ catchBoolIO $ storeHelper d k $ copyFileExternal src
|
|
||||||
|
|
||||||
storeEncrypted :: FilePath -> (Cipher, Key) -> Key -> Annex Bool
|
|
||||||
storeEncrypted d (cipher, enck) k = do
|
|
||||||
src <- inRepo $ gitAnnexLocation k
|
|
||||||
liftIO $ catchBoolIO $ storeHelper d enck $ encrypt src
|
|
||||||
where
|
where
|
||||||
encrypt src dest = do
|
go [] = return False
|
||||||
withEncryptedContent cipher (L.readFile src) $ L.writeFile dest
|
go (f:fs) = do
|
||||||
return True
|
let chunkcount = chunkCount f
|
||||||
|
use <- check chunkcount
|
||||||
|
if use
|
||||||
|
then do
|
||||||
|
count <- readcount chunkcount
|
||||||
|
let chunks = take count $ chunkStream f
|
||||||
|
ok <- all id <$> mapM check chunks
|
||||||
|
if ok
|
||||||
|
then a chunks
|
||||||
|
else return False
|
||||||
|
else go fs
|
||||||
|
readcount f = fromMaybe (error $ "cannot parse " ++ f)
|
||||||
|
. (readish :: String -> Maybe Int)
|
||||||
|
<$> readFile f
|
||||||
|
|
||||||
storeHelper :: FilePath -> Key -> (FilePath -> IO Bool) -> IO Bool
|
withStoredFiles :: ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
|
||||||
storeHelper d key a = do
|
withStoredFiles = withCheckedFiles doesFileExist
|
||||||
let dest = Prelude.head $ locations d key
|
|
||||||
let tmpdest = dest ++ ".tmp"
|
store :: FilePath -> ChunkSize -> Key -> Annex Bool
|
||||||
let dir = parentDir dest
|
store d chunksize k = do
|
||||||
|
src <- inRepo $ gitAnnexLocation k
|
||||||
|
liftIO $ catchBoolIO $ storeHelper d chunksize k $ \dests ->
|
||||||
|
case chunksize of
|
||||||
|
Nothing -> do
|
||||||
|
let dest = Prelude.head dests
|
||||||
|
ok <- copyFileExternal src dest
|
||||||
|
return $ if ok then [dest] else []
|
||||||
|
Just _ -> storeSplit chunksize dests =<< L.readFile src
|
||||||
|
|
||||||
|
storeEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> Key -> Annex Bool
|
||||||
|
storeEncrypted d chunksize (cipher, enck) k = do
|
||||||
|
src <- inRepo $ gitAnnexLocation k
|
||||||
|
liftIO $ catchBoolIO $ storeHelper d chunksize enck $ encrypt src
|
||||||
|
where
|
||||||
|
encrypt src dests = withEncryptedContent cipher (L.readFile src) $ \s ->
|
||||||
|
case chunksize of
|
||||||
|
Nothing -> do
|
||||||
|
let dest = Prelude.head dests
|
||||||
|
L.writeFile dest s
|
||||||
|
return [dest]
|
||||||
|
Just _ -> storeSplit chunksize dests s
|
||||||
|
|
||||||
|
{- Splits a ByteString into chunks and writes to dests.
|
||||||
|
- Note: Must always write at least one file, even for empty ByteString. -}
|
||||||
|
storeSplit :: ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath]
|
||||||
|
storeSplit Nothing _ _ = error "bad storeSplit call"
|
||||||
|
storeSplit _ [] _ = error "bad storeSplit call"
|
||||||
|
storeSplit (Just chunksize) alldests@(firstdest:_) s
|
||||||
|
| L.null s = do
|
||||||
|
-- must always write at least one file, even for empty
|
||||||
|
L.writeFile firstdest s
|
||||||
|
return [firstdest]
|
||||||
|
| otherwise = storeSplit' chunksize alldests s []
|
||||||
|
storeSplit' :: Int64 -> [FilePath] -> L.ByteString -> [FilePath] -> IO [FilePath]
|
||||||
|
storeSplit' _ [] _ _ = error "expected an infinite list"
|
||||||
|
storeSplit' chunksize (d:dests) s c
|
||||||
|
| L.null s = return $ reverse c
|
||||||
|
| otherwise = do
|
||||||
|
let (chunk, rest) = L.splitAt chunksize s
|
||||||
|
L.writeFile d chunk
|
||||||
|
storeSplit' chunksize dests rest (d:c)
|
||||||
|
|
||||||
|
{- Generates a list of destinations to write to in order to store a key.
|
||||||
|
- When chunksize is specified, this list will be a list of chunks.
|
||||||
|
- The action should store the file, and return a list of the destinations
|
||||||
|
- it stored it to, or [] on error.
|
||||||
|
- The stored files are only put into their final place once storage is
|
||||||
|
- complete.
|
||||||
|
-}
|
||||||
|
storeHelper :: FilePath -> ChunkSize -> Key -> ([FilePath] -> IO [FilePath]) -> IO Bool
|
||||||
|
storeHelper d chunksize key a = do
|
||||||
|
let dir = parentDir desttemplate
|
||||||
createDirectoryIfMissing True dir
|
createDirectoryIfMissing True dir
|
||||||
allowWrite dir
|
allowWrite dir
|
||||||
ok <- a tmpdest
|
stored <- a tmpdests
|
||||||
when ok $ do
|
forM_ stored $ \f -> do
|
||||||
renameFile tmpdest dest
|
let dest = detmpprefix f
|
||||||
|
renameFile f dest
|
||||||
preventWrite dest
|
preventWrite dest
|
||||||
|
when (chunksize /= Nothing) $ do
|
||||||
|
let chunkcount = chunkCount desttemplate
|
||||||
|
_ <- tryIO $ allowWrite chunkcount
|
||||||
|
writeFile chunkcount (show $ length stored)
|
||||||
|
preventWrite chunkcount
|
||||||
preventWrite dir
|
preventWrite dir
|
||||||
return ok
|
return (not $ null stored)
|
||||||
|
where
|
||||||
|
desttemplate = Prelude.head $ locations d key
|
||||||
|
tmpdests = case chunksize of
|
||||||
|
Nothing -> [desttemplate ++ tmpprefix]
|
||||||
|
Just _ -> map (++ tmpprefix) (chunkStream desttemplate)
|
||||||
|
tmpprefix = ".tmp"
|
||||||
|
detmpprefix f = take (length f - tmpprefixlen) f
|
||||||
|
tmpprefixlen = length tmpprefix
|
||||||
|
|
||||||
retrieve :: FilePath -> Key -> FilePath -> Annex Bool
|
retrieve :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
|
||||||
retrieve d k f = liftIO $ withStoredFile d k $ \file -> copyFileExternal file f
|
retrieve d chunksize k f = liftIO $ withStoredFiles chunksize d k go
|
||||||
|
where
|
||||||
retrieveCheap :: FilePath -> Key -> FilePath -> Annex Bool
|
go [file] = copyFileExternal file f
|
||||||
retrieveCheap d k f = liftIO $ withStoredFile d k $ \file ->
|
go files = catchBoolIO $ do
|
||||||
catchBoolIO $ createSymbolicLink file f >> return True
|
L.writeFile f =<< (L.concat <$> mapM L.readFile files)
|
||||||
|
|
||||||
retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool
|
|
||||||
retrieveEncrypted d (cipher, enck) f =
|
|
||||||
liftIO $ withStoredFile d enck $ \file -> catchBoolIO $ do
|
|
||||||
withDecryptedContent cipher (L.readFile file) $ L.writeFile f
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
remove :: FilePath -> Key -> Annex Bool
|
retrieveEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> FilePath -> Annex Bool
|
||||||
remove d k = liftIO $ withStoredFile d k $ \file -> catchBoolIO $ do
|
retrieveEncrypted d chunksize (cipher, enck) f =
|
||||||
|
liftIO $ withStoredFiles chunksize d enck $ \files -> catchBoolIO $ do
|
||||||
|
withDecryptedContent cipher (L.concat <$> mapM L.readFile files) $
|
||||||
|
L.writeFile f
|
||||||
|
return True
|
||||||
|
|
||||||
|
retrieveCheap :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
|
||||||
|
retrieveCheap _ (Just _) _ _ = return False -- no cheap retrieval for chunks
|
||||||
|
retrieveCheap d _ k f = liftIO $ withStoredFiles Nothing d k go
|
||||||
|
where
|
||||||
|
go [file] = catchBoolIO $ createSymbolicLink file f >> return True
|
||||||
|
go _files = return False
|
||||||
|
|
||||||
|
remove :: FilePath -> ChunkSize -> Key -> Annex Bool
|
||||||
|
remove d chunksize k = liftIO $ withStoredFiles chunksize d k go
|
||||||
|
where
|
||||||
|
go files = all id <$> mapM removefile files
|
||||||
|
removefile file = catchBoolIO $ do
|
||||||
let dir = parentDir file
|
let dir = parentDir file
|
||||||
allowWrite dir
|
allowWrite dir
|
||||||
removeFile file
|
removeFile file
|
||||||
removeDirectory dir
|
_ <- tryIO $ removeDirectory dir
|
||||||
return True
|
return True
|
||||||
|
|
||||||
checkPresent :: FilePath -> Key -> Annex (Either String Bool)
|
checkPresent :: FilePath -> ChunkSize -> Key -> Annex (Either String Bool)
|
||||||
checkPresent d k = liftIO $ catchMsgIO $ withStoredFile d k $
|
checkPresent d chunksize k = liftIO $ catchMsgIO $ withStoredFiles chunksize d k $
|
||||||
const $ return True -- withStoredFile checked that it exists
|
const $ return True -- withStoredFiles checked that it exists
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -2,6 +2,8 @@ git-annex (3.20120230) UNRELEASED; urgency=low
|
||||||
|
|
||||||
* "here" can be used to refer to the current repository,
|
* "here" can be used to refer to the current repository,
|
||||||
which can read better than the old "." (which still works too).
|
which can read better than the old "." (which still works too).
|
||||||
|
* Directory special remotes now support chunking files written to them,
|
||||||
|
avoiding writing files larger than a specified size.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Thu, 01 Mar 2012 22:34:27 -0400
|
-- Joey Hess <joeyh@debian.org> Thu, 01 Mar 2012 22:34:27 -0400
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,25 @@ you want to use it to sneakernet files between systems (possibly with
|
||||||
[[encrypted|encryption]] contents). Just set up both systems to use
|
[[encrypted|encryption]] contents). Just set up both systems to use
|
||||||
the drive's mountpoint as a directory remote.
|
the drive's mountpoint as a directory remote.
|
||||||
|
|
||||||
|
## configuration
|
||||||
|
|
||||||
|
These parameters can be passed to `git annex initremote` to configure the
|
||||||
|
remote:
|
||||||
|
|
||||||
|
* `encryption` - Required. Either "none" to disable encryption of content
|
||||||
|
stored in the directory,
|
||||||
|
or a value that can be looked up (using gpg -k) to find a gpg encryption
|
||||||
|
key that will be given access to the remote. Note that additional gpg
|
||||||
|
keys can be given access to a remote by rerunning initremote with
|
||||||
|
the new key id. See [[encryption]].
|
||||||
|
* `chunksize` - Avoid storing files larger than the specified size in the
|
||||||
|
directory. For use on directories on mount points that have file size
|
||||||
|
limitations. The default is to never chunk files.
|
||||||
|
Note that changing the chunk size of an existing remote is
|
||||||
|
not recommended.
|
||||||
|
The value can use specified using any commonly used units.
|
||||||
|
Example: `chunksize=100 megabytes`
|
||||||
|
|
||||||
Setup example:
|
Setup example:
|
||||||
|
|
||||||
# git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none
|
# git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none
|
||||||
|
|
Loading…
Reference in a new issue