import+export from directory special remote fully working
Had to add two more API calls to override export APIs that are not safe for use in combination with import. It's unfortunate that removeExportDirectory is documented to be allowed to remove non-empty directories. I'm not entirely sure why it's that way, my best guess is it was intended to make it easy to implement with just rm -rf.
This commit is contained in:
parent
554b7b7f3e
commit
8c54604e67
6 changed files with 129 additions and 77 deletions
|
@ -80,6 +80,8 @@ gen r u c gc = do
|
||||||
{ listImportableContents = listImportableContentsM dir
|
{ listImportableContents = listImportableContentsM dir
|
||||||
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM dir
|
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM dir
|
||||||
, storeExportWithContentIdentifier = storeExportWithContentIdentifierM dir
|
, storeExportWithContentIdentifier = storeExportWithContentIdentifierM dir
|
||||||
|
, removeExportWithContentIdentifier = removeExportWithContentIdentifierM dir
|
||||||
|
, removeExportDirectoryWhenEmpty = Nothing
|
||||||
}
|
}
|
||||||
, whereisKey = Nothing
|
, whereisKey = Nothing
|
||||||
, remoteFsck = Nothing
|
, remoteFsck = Nothing
|
||||||
|
@ -329,7 +331,7 @@ retrieveExportWithContentIdentifierM :: FilePath -> ExportLocation -> ContentIde
|
||||||
retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
|
retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
|
||||||
catchDefaultIO Nothing $ precheck $ docopy postcheck
|
catchDefaultIO Nothing $ precheck $ docopy postcheck
|
||||||
where
|
where
|
||||||
f = dir </> fromExportLocation loc
|
f = exportPath dir loc
|
||||||
|
|
||||||
docopy cont = do
|
docopy cont = do
|
||||||
#ifndef mingw32_HOST_OS
|
#ifndef mingw32_HOST_OS
|
||||||
|
@ -388,49 +390,57 @@ retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
|
||||||
|
|
||||||
storeExportWithContentIdentifierM :: FilePath -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex (Maybe ContentIdentifier)
|
storeExportWithContentIdentifierM :: FilePath -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex (Maybe ContentIdentifier)
|
||||||
storeExportWithContentIdentifierM dir src _k loc overwritablecids p =
|
storeExportWithContentIdentifierM dir src _k loc overwritablecids p =
|
||||||
liftIO $ catchDefaultIO Nothing $ do
|
catchDefaultIO Nothing $ do
|
||||||
createDirectoryIfMissing True destdir
|
liftIO $ createDirectoryIfMissing True destdir
|
||||||
docopy checkoverwrite
|
docopy
|
||||||
where
|
where
|
||||||
dest = dir </> fromExportLocation loc
|
dest = exportPath dir loc
|
||||||
(destdir, base) = splitFileName dest
|
(destdir, base) = splitFileName dest
|
||||||
template = relatedTemplate (base ++ ".tmp")
|
template = relatedTemplate (base ++ ".tmp")
|
||||||
|
|
||||||
docopy cont = withTmpFileIn destdir template $ \tmpf tmph -> do
|
docopy = withTmpFileIn destdir template $ \tmpf tmph -> do
|
||||||
withMeteredFile src p (L.hPut tmph)
|
liftIO $ withMeteredFile src p (L.hPut tmph)
|
||||||
hFlush tmph
|
liftIO $ hFlush tmph
|
||||||
getFileStatus tmpf >>= mkContentIdentifier tmpf >>= \case
|
liftIO (getFileStatus tmpf) >>= liftIO . mkContentIdentifier tmpf >>= \case
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
Just newcid -> cont newcid $ do
|
Just newcid ->
|
||||||
rename tmpf dest
|
checkExportContent dir loc (newcid:overwritablecids) Nothing $ do
|
||||||
return (Just newcid)
|
liftIO $ rename tmpf dest
|
||||||
|
return (Just newcid)
|
||||||
-- If the destination file already exists, it should only
|
|
||||||
-- be overwritten when its ContentIdentifier is in overwritablecids
|
removeExportWithContentIdentifierM :: FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> Annex Bool
|
||||||
-- or is the same as the ContentIdentifier of the replacement.
|
removeExportWithContentIdentifierM dir k loc removeablecids =
|
||||||
--
|
checkExportContent dir loc removeablecids False $
|
||||||
-- This should avoid races to the extent possible. However,
|
removeExportM dir k loc
|
||||||
-- if something has the destination file open for write,
|
|
||||||
-- it could write to it after it's been overwritten with the new
|
-- Checks if the content at an ExportLocation is in the knowncids,
|
||||||
-- content, and its write would be lost, and we don't need to
|
-- and only runs the callback that modifies it if it's safe to do so.
|
||||||
-- detect that. (In similar situations, git doesn't either!)
|
--
|
||||||
--
|
-- This should avoid races to the extent possible. However,
|
||||||
-- It follows that if something is written to the destination file
|
-- if something has the file open for write, it could write to the handle
|
||||||
-- shortly before, it's acceptable to overwrite anyway, as that's
|
-- after the callback has overwritten or deleted it, and its write would
|
||||||
-- nearly indistinguishable from the above case.
|
-- be lost, and we don't need to detect that.
|
||||||
--
|
-- (In similar situations, git doesn't either!)
|
||||||
-- So, it suffices to check if the destination file's current
|
--
|
||||||
-- content can be overwritten, and immediately overwrite it.
|
-- It follows that if something is written to the destination file
|
||||||
checkoverwrite newcid finalize = do
|
-- shortly before, it's acceptable to run the callback anyway, as that's
|
||||||
destst <- getFileStatus dest
|
-- nearly indistinguishable from the above case.
|
||||||
if isRegularFile destst
|
--
|
||||||
then catchDefaultIO Nothing (mkContentIdentifier dest destst) >>= \case
|
-- So, it suffices to check if the destination file's current
|
||||||
|
-- content is known, and immediately run the callback.
|
||||||
|
checkExportContent :: FilePath -> ExportLocation -> [ContentIdentifier] -> a -> Annex a -> Annex a
|
||||||
|
checkExportContent dir loc knowncids unsafe callback =
|
||||||
|
tryWhenExists (liftIO $ getFileStatus dest) >>= \case
|
||||||
|
Just destst
|
||||||
|
| not (isRegularFile destst) -> return unsafe
|
||||||
|
| otherwise -> catchDefaultIO Nothing (liftIO $ mkContentIdentifier dest destst) >>= \case
|
||||||
Just destcid
|
Just destcid
|
||||||
| destcid `elem` overwritablecids ->
|
| destcid `elem` knowncids -> callback
|
||||||
finalize
|
|
||||||
| destcid == newcid -> finalize
|
|
||||||
-- dest exists with other content
|
-- dest exists with other content
|
||||||
| otherwise -> return Nothing
|
| otherwise -> return unsafe
|
||||||
-- dest does not exist, not overwriting
|
-- should never happen
|
||||||
Nothing -> finalize
|
Nothing -> return unsafe
|
||||||
else return Nothing
|
-- dest does not exist
|
||||||
|
Nothing -> callback
|
||||||
|
where
|
||||||
|
dest = exportPath dir loc
|
||||||
|
|
|
@ -59,6 +59,8 @@ instance HasImportUnsupported (ImportActions Annex) where
|
||||||
{ listImportableContents = return Nothing
|
{ listImportableContents = return Nothing
|
||||||
, retrieveExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
|
, retrieveExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
|
||||||
, storeExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
|
, storeExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
|
||||||
|
, removeExportWithContentIdentifier = \_ _ _ -> return False
|
||||||
|
, removeExportDirectoryWhenEmpty = Just $ \_ -> return False
|
||||||
}
|
}
|
||||||
|
|
||||||
exportIsSupported :: RemoteConfig -> RemoteGitConfig -> Annex Bool
|
exportIsSupported :: RemoteConfig -> RemoteGitConfig -> Annex Bool
|
||||||
|
@ -132,40 +134,45 @@ adjustExportImport r = case M.lookup "exporttree" (config r) of
|
||||||
isimport r' exportdbv = do
|
isimport r' exportdbv = do
|
||||||
lcklckv <- liftIO newEmptyTMVarIO
|
lcklckv <- liftIO newEmptyTMVarIO
|
||||||
dbtv <- liftIO newEmptyTMVarIO
|
dbtv <- liftIO newEmptyTMVarIO
|
||||||
let store f k loc p = do
|
-- Only open the database once it's needed, and take an
|
||||||
-- Only open the database once it's needed,
|
-- exclusive write lock. The write lock will then remain
|
||||||
-- and take an exclusive write lock.
|
-- held while the process is running.
|
||||||
-- The write lock will then remain held while the
|
let getdb = liftIO (atomically (tryReadTMVar dbtv)) >>= \case
|
||||||
-- process is running.
|
Just (db, _lck) -> return db
|
||||||
db <- liftIO (atomically (tryReadTMVar dbtv)) >>= \case
|
-- let only one thread take the lock
|
||||||
Just (db, _lck) -> return db
|
Nothing -> ifM (liftIO $ atomically $ tryPutTMVar lcklckv ())
|
||||||
-- let only one thread take the lock
|
( do
|
||||||
Nothing -> ifM (liftIO $ atomically $ tryPutTMVar lcklckv ())
|
lck <- takeExclusiveLock gitAnnexContentIdentifierLock
|
||||||
( do
|
db <- ContentIdentifier.openDb
|
||||||
lck <- takeExclusiveLock gitAnnexContentIdentifierLock
|
liftIO $ atomically (putTMVar dbtv (db, lck))
|
||||||
db <- ContentIdentifier.openDb
|
return db
|
||||||
liftIO $ atomically (putTMVar dbtv (db, lck))
|
-- loser waits for winner to open the db and
|
||||||
return db
|
-- can then also use its handle
|
||||||
-- loser waits for winner to open
|
, liftIO $ fst <$> atomically (readTMVar dbtv)
|
||||||
-- the db and can then also use its
|
)
|
||||||
-- handle
|
|
||||||
, liftIO $ fst <$> atomically (readTMVar dbtv)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
let getknowncids db loc = do
|
||||||
exportdb <- getexportdb exportdbv
|
exportdb <- getexportdb exportdbv
|
||||||
ks <- liftIO $ Export.getExportedKey exportdb loc
|
ks <- liftIO $ Export.getExportedKey exportdb loc
|
||||||
oldcids <- liftIO $ concat
|
liftIO $ concat
|
||||||
<$> mapM (ContentIdentifier.getContentIdentifiers db (uuid r')) ks
|
<$> mapM (ContentIdentifier.getContentIdentifiers db (uuid r')) ks
|
||||||
storeExportWithContentIdentifier (importActions r') f k loc oldcids p >>= \case
|
|
||||||
Nothing -> return False
|
|
||||||
Just newcid -> do
|
|
||||||
liftIO $ ContentIdentifier.recordContentIdentifier db (uuid r') newcid k
|
|
||||||
recordContentIdentifier (uuid r') newcid k
|
|
||||||
return True
|
|
||||||
|
|
||||||
return $ r'
|
return $ r'
|
||||||
{ exportActions = (exportActions r')
|
{ exportActions = (exportActions r')
|
||||||
{ storeExport = store
|
{ storeExport = \f k loc p -> do
|
||||||
|
db <- getdb
|
||||||
|
knowncids <- getknowncids db loc
|
||||||
|
storeExportWithContentIdentifier (importActions r') f k loc knowncids p >>= \case
|
||||||
|
Nothing -> return False
|
||||||
|
Just newcid -> do
|
||||||
|
liftIO $ ContentIdentifier.recordContentIdentifier db (uuid r') newcid k
|
||||||
|
recordContentIdentifier (uuid r') newcid k
|
||||||
|
return True
|
||||||
|
, removeExport = \k loc -> do
|
||||||
|
db <- getdb
|
||||||
|
removeExportWithContentIdentifier (importActions r') k loc
|
||||||
|
=<< getknowncids db loc
|
||||||
|
, removeExportDirectory = removeExportDirectoryWhenEmpty (importActions r')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -272,7 +272,7 @@ data ImportActions a = ImportActions
|
||||||
-- Exports content to an ExportLocation, and returns the
|
-- Exports content to an ExportLocation, and returns the
|
||||||
-- ContentIdentifier corresponding to the content it stored.
|
-- ContentIdentifier corresponding to the content it stored.
|
||||||
--
|
--
|
||||||
-- This has to be used rather than storeExport when a special remote
|
-- This is used rather than storeExport when a special remote
|
||||||
-- supports imports, since files on such a special remote can be
|
-- supports imports, since files on such a special remote can be
|
||||||
-- changed at any time.
|
-- changed at any time.
|
||||||
--
|
--
|
||||||
|
@ -293,4 +293,23 @@ data ImportActions a = ImportActions
|
||||||
-- ^ old content that it's safe to overwrite
|
-- ^ old content that it's safe to overwrite
|
||||||
-> MeterUpdate
|
-> MeterUpdate
|
||||||
-> a (Maybe ContentIdentifier)
|
-> a (Maybe ContentIdentifier)
|
||||||
|
-- This is used rather than removeExport when a special remote
|
||||||
|
-- supports imports.
|
||||||
|
--
|
||||||
|
-- It should only remove a file from the remote when it has one
|
||||||
|
-- of the ContentIdentifiers passed to it, unless listContents
|
||||||
|
-- can recover an overwritten file.
|
||||||
|
--
|
||||||
|
-- It needs to handle races similar to storeExportWithContentIdentifier.
|
||||||
|
, removeExportWithContentIdentifier
|
||||||
|
:: Key
|
||||||
|
-> ExportLocation
|
||||||
|
-> [ContentIdentifier]
|
||||||
|
-> a Bool
|
||||||
|
-- Removes a directory from the export, but only when it's empty.
|
||||||
|
-- Used instead of removeExportDirectory when a special remote
|
||||||
|
-- supports imports.
|
||||||
|
--
|
||||||
|
-- If the directory is not empty, it should succeed.
|
||||||
|
, removeExportDirectoryWhenEmpty :: Maybe (ExportDirectory -> a Bool)
|
||||||
}
|
}
|
||||||
|
|
|
@ -202,7 +202,7 @@ replying with `UNSUPPORTED-REQUEST` is acceptable.
|
||||||
empty directories, this does not need to be implemented.
|
empty directories, this does not need to be implemented.
|
||||||
The directory will be in the form of a relative path, and may contain path
|
The directory will be in the form of a relative path, and may contain path
|
||||||
separators, whitespace, and other special characters.
|
separators, whitespace, and other special characters.
|
||||||
Typically the directory will be empty, but it could possbly contain
|
Typically the directory will be empty, but it could possibly contain
|
||||||
files or other directories, and it's ok to remove those.
|
files or other directories, and it's ok to remove those.
|
||||||
The remote responds with either `REMOVEEXPORTDIRECTORY-SUCCESS`
|
The remote responds with either `REMOVEEXPORTDIRECTORY-SUCCESS`
|
||||||
or `REMOVEEXPORTDIRECTORY-FAILURE`.
|
or `REMOVEEXPORTDIRECTORY-FAILURE`.
|
||||||
|
|
|
@ -215,6 +215,10 @@ This is an extension to the ExportActions api.
|
||||||
|
|
||||||
storeExportWithContentIdentifier :: FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex (Maybe ContentIdentifier)
|
storeExportWithContentIdentifier :: FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex (Maybe ContentIdentifier)
|
||||||
|
|
||||||
|
removeExportWithContentIdentifier :: Key -> ExportLocation -> [ContentIdentifier] -> Annex Bool
|
||||||
|
|
||||||
|
removeExportDirectoryWhenEmpty :: Maybe (ExportDirectory -> Annex Bool)
|
||||||
|
|
||||||
listContents finds the current set of files that are stored in the remote,
|
listContents finds the current set of files that are stored in the remote,
|
||||||
some of which may have been written by other programs than git-annex,
|
some of which may have been written by other programs than git-annex,
|
||||||
along with their content identifiers. It returns a list of those, often in
|
along with their content identifiers. It returns a list of those, often in
|
||||||
|
@ -236,6 +240,11 @@ downloaded may not match the requested content identifier (eg when
|
||||||
something else wrote to it while it was being retrieved), and fail
|
something else wrote to it while it was being retrieved), and fail
|
||||||
in that case.
|
in that case.
|
||||||
|
|
||||||
|
When a remote supports imports and exports, storeExport and removeExport
|
||||||
|
should not be used when exporting to it, and instead
|
||||||
|
storeExportWithContentIdentifier and removeExportWithContentIdentifier
|
||||||
|
be used.
|
||||||
|
|
||||||
storeExportWithContentIdentifier stores content and returns the
|
storeExportWithContentIdentifier stores content and returns the
|
||||||
content identifier corresponding to what it stored. It can either get
|
content identifier corresponding to what it stored. It can either get
|
||||||
the content identifier in reply to the store (as S3 does with versioning),
|
the content identifier in reply to the store (as S3 does with versioning),
|
||||||
|
@ -248,11 +257,21 @@ to it, to avoid overwriting a file that was modified by something else.
|
||||||
But alternatively, if listContents can later recover the modified file, it can
|
But alternatively, if listContents can later recover the modified file, it can
|
||||||
overwrite the modified file.
|
overwrite the modified file.
|
||||||
|
|
||||||
storeExportWithContentIdentifier needs to handle the case when there's a
|
Similarly, removeExportWithContentIdentifier must only remove a file
|
||||||
race with a concurrent writer. It needs to avoid getting the wrong
|
on the remote if it has the same content identifier that's passed to it,
|
||||||
ContentIdentifier for data written by the other writer. It may detect such
|
or if listContent can later recover the modified file.
|
||||||
races and fail, or it could succeed and overwrite the other file, so long
|
Otherwise it should fail. (Like removeExport, removeExportWithContentIdentifier
|
||||||
as it can later be recovered by listContents.
|
also succeeds if the file is not present.)
|
||||||
|
|
||||||
|
Both storeExportWithContentIdentifier and removeExportWithContentIdentifier
|
||||||
|
need to handle the case when there's a race with a concurrent writer.
|
||||||
|
They can detect such races and fail. Or, if overwritten/deleted modified
|
||||||
|
files can later be recovered by listContents, it's acceptable to not detect
|
||||||
|
the race.
|
||||||
|
|
||||||
|
removeExportDirectoryWhenEmpty is used instead of removeExportDirectory.
|
||||||
|
It should only remove empty directories, and succeeds if there are files
|
||||||
|
in the directory.
|
||||||
|
|
||||||
## multiple git-annex repos accessing a special remote
|
## multiple git-annex repos accessing a special remote
|
||||||
|
|
||||||
|
|
|
@ -18,9 +18,6 @@ this.
|
||||||
from the contentidentifier database would see values that were earlier
|
from the contentidentifier database would see values that were earlier
|
||||||
written by the same process.
|
written by the same process.
|
||||||
|
|
||||||
* Test export to importtree=yes remote and make sure it uses
|
|
||||||
storeExportWithContentIdentifier correctly.
|
|
||||||
|
|
||||||
* Need to support annex-tracking-branch configuration, which documentation
|
* Need to support annex-tracking-branch configuration, which documentation
|
||||||
says makes git-annex sync and assistant do imports.
|
says makes git-annex sync and assistant do imports.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue