import+export from directory special remote fully working

Had to add two more API calls to override export APIs that are not safe
for use in combination with import.

It's unfortunate that removeExportDirectory is documented to be allowed
to remove non-empty directories. I'm not entirely sure why it's that
way, my best guess is it was intended to make it easy to implement with
just rm -rf.
This commit is contained in:
Joey Hess 2019-03-05 14:20:14 -04:00
parent 554b7b7f3e
commit 8c54604e67
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 129 additions and 77 deletions

View file

@ -80,6 +80,8 @@ gen r u c gc = do
{ listImportableContents = listImportableContentsM dir
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM dir
, storeExportWithContentIdentifier = storeExportWithContentIdentifierM dir
, removeExportWithContentIdentifier = removeExportWithContentIdentifierM dir
, removeExportDirectoryWhenEmpty = Nothing
}
, whereisKey = Nothing
, remoteFsck = Nothing
@ -329,7 +331,7 @@ retrieveExportWithContentIdentifierM :: FilePath -> ExportLocation -> ContentIde
retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
catchDefaultIO Nothing $ precheck $ docopy postcheck
where
f = dir </> fromExportLocation loc
f = exportPath dir loc
docopy cont = do
#ifndef mingw32_HOST_OS
@ -388,49 +390,57 @@ retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
storeExportWithContentIdentifierM :: FilePath -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex (Maybe ContentIdentifier)
storeExportWithContentIdentifierM dir src _k loc overwritablecids p =
liftIO $ catchDefaultIO Nothing $ do
createDirectoryIfMissing True destdir
docopy checkoverwrite
catchDefaultIO Nothing $ do
liftIO $ createDirectoryIfMissing True destdir
docopy
where
dest = dir </> fromExportLocation loc
dest = exportPath dir loc
(destdir, base) = splitFileName dest
template = relatedTemplate (base ++ ".tmp")
docopy cont = withTmpFileIn destdir template $ \tmpf tmph -> do
withMeteredFile src p (L.hPut tmph)
hFlush tmph
getFileStatus tmpf >>= mkContentIdentifier tmpf >>= \case
docopy = withTmpFileIn destdir template $ \tmpf tmph -> do
liftIO $ withMeteredFile src p (L.hPut tmph)
liftIO $ hFlush tmph
liftIO (getFileStatus tmpf) >>= liftIO . mkContentIdentifier tmpf >>= \case
Nothing -> return Nothing
Just newcid -> cont newcid $ do
rename tmpf dest
return (Just newcid)
-- If the destination file already exists, it should only
-- be overwritten when its ContentIdentifier is in overwritablecids
-- or is the same as the ContentIdentifier of the replacement.
--
-- This should avoid races to the extent possible. However,
-- if something has the destination file open for write,
-- it could write to it after it's been overwritten with the new
-- content, and its write would be lost, and we don't need to
-- detect that. (In similar situations, git doesn't either!)
--
-- It follows that if something is written to the destination file
-- shortly before, it's acceptable to overwrite anyway, as that's
-- nearly indistinguishable from the above case.
--
-- So, it suffices to check if the destination file's current
-- content can be overwritten, and immediately overwrite it.
checkoverwrite newcid finalize = do
destst <- getFileStatus dest
if isRegularFile destst
then catchDefaultIO Nothing (mkContentIdentifier dest destst) >>= \case
Just newcid ->
checkExportContent dir loc (newcid:overwritablecids) Nothing $ do
liftIO $ rename tmpf dest
return (Just newcid)
removeExportWithContentIdentifierM :: FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> Annex Bool
removeExportWithContentIdentifierM dir k loc removeablecids =
checkExportContent dir loc removeablecids False $
removeExportM dir k loc
-- Checks if the content at an ExportLocation is in the knowncids,
-- and only runs the callback that modifies it if it's safe to do so.
--
-- This should avoid races to the extent possible. However,
-- if something has the file open for write, it could write to the handle
-- after the callback has overwritten or deleted it, and its write would
-- be lost, and we don't need to detect that.
-- (In similar situations, git doesn't either!)
--
-- It follows that if something is written to the destination file
-- shortly before, it's acceptable to run the callback anyway, as that's
-- nearly indistinguishable from the above case.
--
-- So, it suffices to check if the destination file's current
-- content is known, and immediately run the callback.
checkExportContent :: FilePath -> ExportLocation -> [ContentIdentifier] -> a -> Annex a -> Annex a
checkExportContent dir loc knowncids unsafe callback =
tryWhenExists (liftIO $ getFileStatus dest) >>= \case
Just destst
| not (isRegularFile destst) -> return unsafe
| otherwise -> catchDefaultIO Nothing (liftIO $ mkContentIdentifier dest destst) >>= \case
Just destcid
| destcid `elem` overwritablecids ->
finalize
| destcid == newcid -> finalize
| destcid `elem` knowncids -> callback
-- dest exists with other content
| otherwise -> return Nothing
-- dest does not exist, not overwriting
Nothing -> finalize
else return Nothing
| otherwise -> return unsafe
-- should never happen
Nothing -> return unsafe
-- dest does not exist
Nothing -> callback
where
dest = exportPath dir loc

View file

@ -59,6 +59,8 @@ instance HasImportUnsupported (ImportActions Annex) where
{ listImportableContents = return Nothing
, retrieveExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
, storeExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
, removeExportWithContentIdentifier = \_ _ _ -> return False
, removeExportDirectoryWhenEmpty = Just $ \_ -> return False
}
exportIsSupported :: RemoteConfig -> RemoteGitConfig -> Annex Bool
@ -132,40 +134,45 @@ adjustExportImport r = case M.lookup "exporttree" (config r) of
isimport r' exportdbv = do
lcklckv <- liftIO newEmptyTMVarIO
dbtv <- liftIO newEmptyTMVarIO
let store f k loc p = do
-- Only open the database once it's needed,
-- and take an exclusive write lock.
-- The write lock will then remain held while the
-- process is running.
db <- liftIO (atomically (tryReadTMVar dbtv)) >>= \case
Just (db, _lck) -> return db
-- let only one thread take the lock
Nothing -> ifM (liftIO $ atomically $ tryPutTMVar lcklckv ())
( do
lck <- takeExclusiveLock gitAnnexContentIdentifierLock
db <- ContentIdentifier.openDb
liftIO $ atomically (putTMVar dbtv (db, lck))
return db
-- loser waits for winner to open
-- the db and can then also use its
-- handle
, liftIO $ fst <$> atomically (readTMVar dbtv)
)
-- Only open the database once it's needed, and take an
-- exclusive write lock. The write lock will then remain
-- held while the process is running.
let getdb = liftIO (atomically (tryReadTMVar dbtv)) >>= \case
Just (db, _lck) -> return db
-- let only one thread take the lock
Nothing -> ifM (liftIO $ atomically $ tryPutTMVar lcklckv ())
( do
lck <- takeExclusiveLock gitAnnexContentIdentifierLock
db <- ContentIdentifier.openDb
liftIO $ atomically (putTMVar dbtv (db, lck))
return db
-- loser waits for winner to open the db and
-- can then also use its handle
, liftIO $ fst <$> atomically (readTMVar dbtv)
)
let getknowncids db loc = do
exportdb <- getexportdb exportdbv
ks <- liftIO $ Export.getExportedKey exportdb loc
oldcids <- liftIO $ concat
liftIO $ concat
<$> mapM (ContentIdentifier.getContentIdentifiers db (uuid r')) ks
storeExportWithContentIdentifier (importActions r') f k loc oldcids p >>= \case
Nothing -> return False
Just newcid -> do
liftIO $ ContentIdentifier.recordContentIdentifier db (uuid r') newcid k
recordContentIdentifier (uuid r') newcid k
return True
return $ r'
{ exportActions = (exportActions r')
{ storeExport = store
{ storeExport = \f k loc p -> do
db <- getdb
knowncids <- getknowncids db loc
storeExportWithContentIdentifier (importActions r') f k loc knowncids p >>= \case
Nothing -> return False
Just newcid -> do
liftIO $ ContentIdentifier.recordContentIdentifier db (uuid r') newcid k
recordContentIdentifier (uuid r') newcid k
return True
, removeExport = \k loc -> do
db <- getdb
removeExportWithContentIdentifier (importActions r') k loc
=<< getknowncids db loc
, removeExportDirectory = removeExportDirectoryWhenEmpty (importActions r')
}
}