directory special remote: Made more efficient and robust.

Files are now written to a tmp directory in the remote, and once all
chunks are written, etc, it's moved into the final place atomically.

For now, checkpresent still checks every single chunk of a file, because
the old method could leave partially transferred files with some chunks
present and others not.
This commit is contained in:
Joey Hess 2012-11-19 13:18:23 -04:00
parent d3dfeeb3d9
commit 5f977cc725
5 changed files with 47 additions and 42 deletions

View file

@ -12,7 +12,6 @@ import Command
import Annex.Content import Annex.Content
import Utility.Rsync import Utility.Rsync
import Logs.Transfer import Logs.Transfer
import Types.Remote
import qualified Fields import qualified Fields
def :: [Command] def :: [Command]

View file

@ -48,7 +48,7 @@ gen r u c = do
storeKey = store dir chunksize, storeKey = store dir chunksize,
retrieveKeyFile = retrieve dir chunksize, retrieveKeyFile = retrieve dir chunksize,
retrieveKeyFileCheap = retrieveCheap dir chunksize, retrieveKeyFileCheap = retrieveCheap dir chunksize,
removeKey = remove dir chunksize, removeKey = remove dir,
hasKey = checkPresent dir chunksize, hasKey = checkPresent dir chunksize,
hasKeyCheap = True, hasKeyCheap = True,
whereisKey = Nothing, whereisKey = Nothing,
@ -73,10 +73,19 @@ directorySetup u c = do
gitConfigSpecialRemote u c' "directory" dir gitConfigSpecialRemote u c' "directory" dir
return $ M.delete "directory" c' return $ M.delete "directory" c'
{- Locations to try to access a given Key in the Directory. -} {- Locations to try to access a given Key in the Directory.
- We try more than since we used to write to different hash directories. -}
locations :: FilePath -> Key -> [FilePath] locations :: FilePath -> Key -> [FilePath]
locations d k = map (d </>) (keyPaths k) locations d k = map (d </>) (keyPaths k)
{- Directory where the file(s) for a key are stored. -}
storeDir :: FilePath -> Key -> FilePath
storeDir d k = addTrailingPathSeparator $ d </> hashDirLower k </> keyFile k
{- Where we store temporary data for a key as it's being uploaded. -}
tmpDir :: FilePath -> Key -> FilePath
tmpDir d k = addTrailingPathSeparator $ d </> "tmp" </> keyFile k
withCheckedFiles :: (FilePath -> IO Bool) -> ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool withCheckedFiles :: (FilePath -> IO Bool) -> ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
withCheckedFiles _ _ [] _ _ = return False withCheckedFiles _ _ [] _ _ = return False
withCheckedFiles check Nothing d k a = go $ locations d k withCheckedFiles check Nothing d k a = go $ locations d k
@ -159,18 +168,22 @@ storeSplit' meterupdate chunksize (d:dests) bs c = do
storeHelper :: FilePath -> ChunkSize -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool storeHelper :: FilePath -> ChunkSize -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool
storeHelper d chunksize key storer = check <&&> go storeHelper d chunksize key storer = check <&&> go
where where
basedest = Prelude.head $ locations d key tmpdir = tmpDir d key
dir = parentDir basedest destdir = storeDir d key
{- The size is not exactly known when encrypting the key; {- The size is not exactly known when encrypting the key;
- this assumes that at least the size of the key is - this assumes that at least the size of the key is
- needed as free space. -} - needed as free space. -}
check = checkDiskSpace (Just dir) key 0 check = do
go = liftIO $ catchBoolIO $ do liftIO $ createDirectoryIfMissing True tmpdir
createDirectoryIfMissing True dir checkDiskSpace (Just tmpdir) key 0
allowWrite dir go = liftIO $ catchBoolIO $
preventWrite dir `after` storeChunks basedest chunksize storer recorder finalizer storeChunks key tmpdir destdir chunksize storer recorder finalizer
finalizer f dest = do finalizer tmp dest = do
renameFile f dest void $ tryIO $ allowWrite dest -- may already exist
void $ tryIO $ removeDirectoryRecursive dest -- or not exist
createDirectoryIfMissing True (parentDir dest)
renameDirectory tmp dest
mapM_ preventWrite =<< dirContents dest
preventWrite dest preventWrite dest
recorder f s = do recorder f s = do
void $ tryIO $ allowWrite f void $ tryIO $ allowWrite f
@ -201,16 +214,13 @@ retrieveCheap d _ k f = liftIO $ withStoredFiles Nothing d k go
go [file] = catchBoolIO $ createSymbolicLink file f >> return True go [file] = catchBoolIO $ createSymbolicLink file f >> return True
go _files = return False go _files = return False
remove :: FilePath -> ChunkSize -> Key -> Annex Bool remove :: FilePath -> Key -> Annex Bool
remove d chunksize k = liftIO $ withStoredFiles chunksize d k go remove d k = liftIO $ catchBoolIO $ do
where
go = all id <$$> mapM removefile
removefile file = catchBoolIO $ do
let dir = parentDir file
allowWrite dir allowWrite dir
removeFile file removeDirectoryRecursive dir
_ <- tryIO $ removeDirectory dir
return True return True
where
dir = storeDir d k
checkPresent :: FilePath -> ChunkSize -> Key -> Annex (Either String Bool) checkPresent :: FilePath -> ChunkSize -> Key -> Annex (Either String Bool)
checkPresent d chunksize k = liftIO $ catchMsgIO $ withStoredFiles chunksize d k $ checkPresent d chunksize k = liftIO $ catchMsgIO $ withStoredFiles chunksize d k $

View file

@ -58,33 +58,28 @@ chunkStream = map (\n -> ".chunk" ++ show n) [1 :: Integer ..]
{- Given the base destination to use to store a value, {- Given the base destination to use to store a value,
- generates a stream of temporary destinations (just one when not chunking) - generates a stream of temporary destinations (just one when not chunking)
- and passes it to an action, which should chunk and store the data, - and passes it to an action, which should chunk and store the data,
- and return the destinations it stored to, or [] on error. - and return the destinations it stored to, or [] on error. Then
- - calls the storer to write the chunk count (if chunking). Finally, the
- Then calles the finalizer to rename the temporary destinations into - fianlizer is called to rename the tmp into the dest
- their final places (and do any other cleanup), and writes the chunk count - (and do any other cleanup).
- (if chunking)
-} -}
storeChunks :: FilePath -> ChunkSize -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO Bool storeChunks :: Key -> FilePath -> FilePath -> ChunkSize -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO Bool
storeChunks basedest chunksize storer recorder finalizer = storeChunks key tmp dest chunksize storer recorder finalizer =
either (const $ return False) return either (const $ return False) return
=<< (E.try go :: IO (Either E.SomeException Bool)) =<< (E.try go :: IO (Either E.SomeException Bool))
where where
go = do go = do
stored <- storer tmpdests stored <- storer tmpdests
forM_ stored $ \d -> do
let dest = detmpprefix d
finalizer d dest
when (chunksize /= Nothing) $ do when (chunksize /= Nothing) $ do
let chunkcount = basedest ++ chunkCount let chunkcount = basef ++ chunkCount
recorder chunkcount (show $ length stored) recorder chunkcount (show $ length stored)
finalizer tmp dest
return (not $ null stored) return (not $ null stored)
tmpprefix = ".tmp" basef = tmp ++ keyFile key
detmpprefix f = take (length f - tmpprefixlen) f
tmpprefixlen = length tmpprefix
tmpdests tmpdests
| chunksize == Nothing = [basedest ++ tmpprefix] | chunksize == Nothing = [basef]
| otherwise = map (++ tmpprefix) $ map (basedest ++) chunkStream | otherwise = map (basef ++ ) chunkStream
{- Given a list of destinations to use, chunks the data according to the {- Given a list of destinations to use, chunks the data according to the
- ChunkSize, and runs the storer action to store each chunk. Returns - ChunkSize, and runs the storer action to store each chunk. Returns

View file

@ -90,7 +90,7 @@ store r k _f p = metered (Just p) k $ \meterupdate ->
let url = davLocation baseurl k let url = davLocation baseurl k
f <- inRepo $ gitAnnexLocation k f <- inRepo $ gitAnnexLocation k
liftIO $ withMeteredFile f meterupdate $ liftIO $ withMeteredFile f meterupdate $
storeHelper r url user pass storeHelper r k url user pass
storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted r (cipher, enck) k p = metered (Just p) k $ \meterupdate -> storeEncrypted r (cipher, enck) k p = metered (Just p) k $ \meterupdate ->
@ -98,12 +98,12 @@ storeEncrypted r (cipher, enck) k p = metered (Just p) k $ \meterupdate ->
let url = davLocation baseurl enck let url = davLocation baseurl enck
f <- inRepo $ gitAnnexLocation k f <- inRepo $ gitAnnexLocation k
liftIO $ encrypt cipher (streamMeteredFile f meterupdate) $ liftIO $ encrypt cipher (streamMeteredFile f meterupdate) $
readBytes $ storeHelper r url user pass readBytes $ storeHelper r enck url user pass
storeHelper :: Remote -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool storeHelper :: Remote -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool
storeHelper r urlbase user pass b = catchBoolIO $ do storeHelper r k urlbase user pass b = catchBoolIO $ do
davMkdir (urlParent urlbase) user pass davMkdir (urlParent urlbase) user pass
storeChunks urlbase chunksize storer recorder finalizer storeChunks k undefined undefined chunksize storer recorder finalizer
where where
chunksize = chunkSize $ config r chunksize = chunkSize $ config r
storer urls = storeChunked chunksize urls storehttp b storer urls = storeChunked chunksize urls storehttp b

1
debian/changelog vendored
View file

@ -10,6 +10,7 @@ git-annex (3.20121113) UNRELEASED; urgency=low
* Getting a file from chunked directory special remotes no longer buffers * Getting a file from chunked directory special remotes no longer buffers
it all in memory. it all in memory.
* S3: Added progress display for uploading and downloading. * S3: Added progress display for uploading and downloading.
* directory special remote: Made more efficient and robust.
-- Joey Hess <joeyh@debian.org> Tue, 13 Nov 2012 13:17:07 -0400 -- Joey Hess <joeyh@debian.org> Tue, 13 Nov 2012 13:17:07 -0400