Merge branch 'export'

This commit is contained in:
Joey Hess 2017-09-07 15:53:34 -04:00
commit 2823c6bd06
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
52 changed files with 1357 additions and 250 deletions

View file

@ -21,6 +21,7 @@ module Annex.Branch (
maybeChange,
commit,
forceCommit,
getBranch,
files,
withIndex,
performTransitions,

View file

@ -354,8 +354,12 @@ shouldVerify :: VerifyConfig -> Annex Bool
shouldVerify AlwaysVerify = return True
shouldVerify NoVerify = return False
shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig
shouldVerify (RemoteVerify r) = shouldVerify DefaultVerify
<&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))
shouldVerify (RemoteVerify r) =
(shouldVerify DefaultVerify
<&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r)))
-- Export remotes are not key/value stores, so always verify
-- content from them even when verification is disabled.
<||> Types.Remote.isExportSupported r
{- Checks if there is enough free disk space to download a key
- to its temp file.

View file

@ -36,6 +36,7 @@ module Annex.Locations (
gitAnnexFsckDbDir,
gitAnnexFsckDbLock,
gitAnnexFsckResultsLog,
gitAnnexExportDbDir,
gitAnnexScheduleState,
gitAnnexTransferDir,
gitAnnexCredsDir,
@ -290,6 +291,15 @@ gitAnnexFsckDbLock u r = gitAnnexFsckDir u r </> "fsck.lck"
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath
gitAnnexFsckResultsLog u r = gitAnnexDir r </> "fsckresults" </> fromUUID u
{- .git/annex/export/uuid/ is used to store information about
- exports to special remotes. -}
gitAnnexExportDir :: UUID -> Git.Repo -> FilePath
gitAnnexExportDir u r = gitAnnexDir r </> "export" </> fromUUID u
{- Directory containing database used to record export info. -}
gitAnnexExportDbDir :: UUID -> Git.Repo -> FilePath
gitAnnexExportDbDir u r = gitAnnexExportDir u r </> "db"
{- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> FilePath

View file

@ -81,7 +81,7 @@ autoEnable = do
(Just name, Right t) -> whenM (canenable u) $ do
showSideAction $ "Auto enabling special remote " ++ name
dummycfg <- liftIO dummyRemoteGitConfig
res <- tryNonAsync $ setup t Enable (Just u) Nothing c dummycfg
res <- tryNonAsync $ setup t (Enable c) (Just u) Nothing c dummycfg
case res of
Left e -> warning (show e)
Right _ -> return ()

View file

@ -52,7 +52,7 @@ makeRsyncRemote name location = makeRemote name location $ const $ void $
go Nothing = setupSpecialRemote name Rsync.remote config Nothing
(Nothing, R.Init, Annex.SpecialRemote.newConfig name)
go (Just (u, c)) = setupSpecialRemote name Rsync.remote config Nothing
(Just u, R.Enable, c)
(Just u, R.Enable c, c)
config = M.fromList
[ ("encryption", "shared")
, ("rsyncurl", location)
@ -91,7 +91,7 @@ enableSpecialRemote name remotetype mcreds config = do
r <- Annex.SpecialRemote.findExisting name
case r of
Nothing -> error $ "Cannot find a special remote named " ++ name
Just (u, c) -> setupSpecialRemote' False name remotetype config mcreds (Just u, R.Enable, c)
Just (u, c) -> setupSpecialRemote' False name remotetype config mcreds (Just u, R.Enable c, c)
setupSpecialRemote :: RemoteName -> RemoteType -> R.RemoteConfig -> Maybe CredPair -> (Maybe UUID, R.SetupStage, R.RemoteConfig) -> Annex RemoteName
setupSpecialRemote = setupSpecialRemote' True

View file

@ -1,5 +1,10 @@
git-annex (6.20170819) UNRELEASED; urgency=medium
* git-annex export: New command, can create and efficiently update
exports of trees to special remotes.
* Use git-annex initremote with exporttree=yes to set up a special remote
for use by git-annex export.
* Implemented export to directory special remotes.
* Support building with feed-1.0, while still supporting older versions.
* init: Display an additional message when it detects a filesystem that
allows writing to files whose write bit is not set.

View file

@ -95,6 +95,7 @@ import qualified Command.AddUrl
import qualified Command.ImportFeed
import qualified Command.RmUrl
import qualified Command.Import
import qualified Command.Export
import qualified Command.Map
import qualified Command.Direct
import qualified Command.Indirect
@ -141,6 +142,7 @@ cmds testoptparser testrunner =
, Command.ImportFeed.cmd
, Command.RmUrl.cmd
, Command.Import.cmd
, Command.Export.cmd
, Command.Init.cmd
, Command.Describe.cmd
, Command.InitRemote.cmd

View file

@ -77,12 +77,12 @@ withFilesNotInGit skipdotfiles a params
go l = seekActions $ prepFiltered a $
return $ concat $ segmentPaths params l
withFilesInRefs :: (FilePath -> Key -> CommandStart) -> CmdParams -> CommandSeek
withFilesInRefs :: (FilePath -> Key -> CommandStart) -> [Git.Ref] -> CommandSeek
withFilesInRefs a = mapM_ go
where
go r = do
matcher <- Limit.getMatcher
(l, cleanup) <- inRepo $ LsTree.lsTree (Git.Ref r)
(l, cleanup) <- inRepo $ LsTree.lsTree r
forM_ l $ \i -> do
let f = getTopFilePath $ LsTree.file i
v <- catKey (LsTree.sha i)

View file

@ -94,6 +94,8 @@ paramAddress :: String
paramAddress = "ADDRESS"
paramItem :: String
paramItem = "ITEM"
paramTreeish :: String
paramTreeish = "TREEISH"
paramKeyValue :: String
paramKeyValue = "K=V"
paramNothing :: String

View file

@ -81,11 +81,11 @@ startSpecialRemote name config (Just (u, c)) = do
gc <- maybe (liftIO dummyRemoteGitConfig)
(return . Remote.gitconfig)
=<< Remote.byUUID u
next $ performSpecialRemote t u fullconfig gc
next $ performSpecialRemote t u c fullconfig gc
performSpecialRemote :: RemoteType -> UUID -> R.RemoteConfig -> RemoteGitConfig -> CommandPerform
performSpecialRemote t u c gc = do
(c', u') <- R.setup t R.Enable (Just u) Nothing c gc
performSpecialRemote :: RemoteType -> UUID -> R.RemoteConfig -> R.RemoteConfig -> RemoteGitConfig -> CommandPerform
performSpecialRemote t u oldc c gc = do
(c', u') <- R.setup t (R.Enable oldc) (Just u) Nothing c gc
next $ cleanupSpecialRemote u' c'
cleanupSpecialRemote :: UUID -> R.RemoteConfig -> CommandCleanup

317
Command/Export.hs Normal file
View file

@ -0,0 +1,317 @@
{- git-annex command
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE TupleSections #-}
module Command.Export where
import Command
import qualified Git
import qualified Git.DiffTree
import qualified Git.LsTree
import qualified Git.Ref
import Git.Types
import Git.FilePath
import Git.Sha
import Types.Key
import Types.Remote
import Annex.Content
import Annex.CatFile
import Logs.Location
import Logs.Export
import Database.Export
import Messages.Progress
import Utility.Tmp
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
cmd :: Command
cmd = command "export" SectionCommon
"export content to a remote"
paramTreeish (seek <$$> optParser)
data ExportOptions = ExportOptions
{ exportTreeish :: Git.Ref
, exportRemote :: DeferredParse Remote
}
optParser :: CmdParamsDesc -> Parser ExportOptions
optParser _ = ExportOptions
<$> (Git.Ref <$> parsetreeish)
<*> (parseRemoteOption <$> parseToOption)
where
parsetreeish = argument str
( metavar paramTreeish
)
-- An export includes both annexed files and files stored in git.
-- For the latter, a SHA1 key is synthesized.
data ExportKey = AnnexKey Key | GitKey Key
deriving (Show, Eq, Ord)
asKey :: ExportKey -> Key
asKey (AnnexKey k) = k
asKey (GitKey k) = k
exportKey :: Git.Sha -> Annex ExportKey
exportKey sha = mk <$> catKey sha
where
mk (Just k) = AnnexKey k
mk Nothing = GitKey $ Key
{ keyName = show sha
, keyVariety = SHA1Key (HasExt False)
, keySize = Nothing
, keyMtime = Nothing
, keyChunkSize = Nothing
, keyChunkNum = Nothing
}
-- To handle renames which swap files, the exported file is first renamed
-- to a stable temporary name based on the key.
exportTempName :: ExportKey -> ExportLocation
exportTempName ek = ExportLocation $
".git-annex-tmp-content-" ++ key2file (asKey (ek))
seek :: ExportOptions -> CommandSeek
seek o = do
r <- getParsed (exportRemote o)
unlessM (isExportSupported r) $
giveup "That remote does not support exports."
new <- fromMaybe (giveup "unknown tree") <$>
-- Dereference the tree pointed to by the branch, commit,
-- or tag.
inRepo (Git.Ref.tree (exportTreeish o))
old <- getExport (uuid r)
recordExportBeginning (uuid r) new
db <- openDb (uuid r)
-- Clean up after incomplete export of a tree, in which
-- the next block of code below may have renamed some files to
-- temp files. Diff from the incomplete tree to the new tree,
-- and delete any temp files that the new tree can't use.
forM_ (concatMap incompleteExportedTreeish old) $ \incomplete ->
mapdiff (\diff -> startRecoverIncomplete r db (Git.DiffTree.srcsha diff) (Git.DiffTree.file diff))
incomplete
new
-- Diff the old and new trees, and delete or rename to new name all
-- changed files in the export. After this, every file that remains
-- in the export will have the content from the new treeish.
--
-- (Also, when there was an export conflict, this resolves it.)
case map exportedTreeish old of
[] -> return ()
[oldtreesha] -> do
diffmap <- mkDiffMap oldtreesha new
let seekdiffmap a = seekActions $ pure $ map a (M.toList diffmap)
-- Rename old files to temp, or delete.
seekdiffmap $ \(ek, (moldf, mnewf)) ->
case (moldf, mnewf) of
(Just oldf, Just _newf) ->
startMoveToTempName r db oldf ek
(Just oldf, Nothing) ->
startUnexport' r db oldf ek
_ -> stop
-- Rename from temp to new files.
seekdiffmap $ \(ek, (moldf, mnewf)) ->
case (moldf, mnewf) of
(Just _oldf, Just newf) ->
startMoveFromTempName r db ek newf
_ -> stop
ts -> do
warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
forM_ ts $ \oldtreesha -> do
-- Unexport both the srcsha and the dstsha,
-- because the wrong content may have
-- been renamed to the dstsha due to the
-- export conflict.
let unexportboth d =
[ Git.DiffTree.srcsha d
, Git.DiffTree.dstsha d
]
-- Don't rename to temp, because the
-- content is unknown; delete instead.
mapdiff
(\diff -> startUnexport r db (Git.DiffTree.file diff) (unexportboth diff))
oldtreesha new
-- Waiting until now to record the export guarantees that,
-- if this export is interrupted, there are no files left over
-- from a previous export, that are not part of this export.
recordExport (uuid r) $ ExportChange
{ oldTreeish = map exportedTreeish old
, newTreeish = new
}
-- Export everything that is not yet exported.
(l, cleanup') <- inRepo $ Git.LsTree.lsTree new
seekActions $ pure $ map (startExport r db) l
void $ liftIO cleanup'
closeDb db
where
mapdiff a oldtreesha newtreesha = do
(diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive oldtreesha newtreesha
seekActions $ pure $ map a diff
void $ liftIO cleanup
-- Map of old and new filenames for each changed ExportKey in a diff.
type DiffMap = M.Map ExportKey (Maybe TopFilePath, Maybe TopFilePath)
mkDiffMap :: Git.Ref -> Git.Ref -> Annex DiffMap
mkDiffMap old new = do
(diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive old new
diffmap <- M.fromListWith combinedm . concat <$> forM diff mkdm
void $ liftIO cleanup
return diffmap
where
combinedm (srca, dsta) (srcb, dstb) = (srca <|> srcb, dsta <|> dstb)
mkdm i = do
srcek <- getk (Git.DiffTree.srcsha i)
dstek <- getk (Git.DiffTree.dstsha i)
return $ catMaybes
[ (, (Just (Git.DiffTree.file i), Nothing)) <$> srcek
, (, (Nothing, Just (Git.DiffTree.file i))) <$> dstek
]
getk sha
| sha == nullSha = return Nothing
| otherwise = Just <$> exportKey sha
startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart
startExport r db ti = do
ek <- exportKey (Git.LsTree.sha ti)
stopUnless (liftIO $ notElem loc <$> getExportLocation db (asKey ek)) $ do
showStart "export" f
next $ performExport r db ek (Git.LsTree.sha ti) loc
where
loc = ExportLocation $ toInternalGitPath f
f = getTopFilePath $ Git.LsTree.file ti
performExport :: Remote -> ExportHandle -> ExportKey -> Sha -> ExportLocation -> CommandPerform
performExport r db ek contentsha loc = do
let storer = storeExport $ exportActions r
sent <- case ek of
AnnexKey k -> ifM (inAnnex k)
( metered Nothing k $ \m -> do
let rollback = void $ performUnexport r db [ek] loc
sendAnnex k rollback
(\f -> storer f k loc m)
, do
showNote "not available"
return False
)
-- Sending a non-annexed file.
GitKey sha1k -> metered Nothing sha1k $ \m ->
withTmpFile "export" $ \tmp h -> do
b <- catObject contentsha
liftIO $ L.hPut h b
liftIO $ hClose h
storer tmp sha1k loc m
if sent
then next $ cleanupExport r db ek loc
else stop
cleanupExport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> CommandCleanup
cleanupExport r db ek loc = do
liftIO $ addExportLocation db (asKey ek) loc
logChange (asKey ek) (uuid r) InfoPresent
return True
startUnexport :: Remote -> ExportHandle -> TopFilePath -> [Git.Sha] -> CommandStart
startUnexport r db f shas = do
eks <- forM (filter (/= nullSha) shas) exportKey
if null eks
then stop
else do
showStart "unexport" f'
next $ performUnexport r db eks loc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
startUnexport' :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
startUnexport' r db f ek = do
showStart "unexport" f'
next $ performUnexport r db [ek] loc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
performUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandPerform
performUnexport r db eks loc = do
ifM (allM (\ek -> removeExport (exportActions r) (asKey ek) loc) eks)
( next $ cleanupUnexport r db eks loc
, stop
)
cleanupUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandCleanup
cleanupUnexport r db eks loc = do
liftIO $ do
forM_ eks $ \ek ->
removeExportLocation db (asKey ek) loc
-- Flush so that getExportLocation sees this and any
-- other removals of the key.
flushDbQueue db
remaininglocs <- liftIO $
concat <$> forM eks (\ek -> getExportLocation db (asKey ek))
when (null remaininglocs) $
forM_ eks $ \ek ->
logChange (asKey ek) (uuid r) InfoMissing
return True
startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart
startRecoverIncomplete r db sha oldf
| sha == nullSha = stop
| otherwise = do
ek <- exportKey sha
let loc@(ExportLocation f) = exportTempName ek
showStart "unexport" f
liftIO $ removeExportLocation db (asKey ek) oldloc
next $ performUnexport r db [ek] loc
where
oldloc = ExportLocation $ toInternalGitPath oldf'
oldf' = getTopFilePath oldf
startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
startMoveToTempName r db f ek = do
let tmploc@(ExportLocation tmpf) = exportTempName ek
showStart "rename" (f' ++ " -> " ++ tmpf)
next $ performRename r db ek loc tmploc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
startMoveFromTempName :: Remote -> ExportHandle -> ExportKey -> TopFilePath -> CommandStart
startMoveFromTempName r db ek f = do
let tmploc@(ExportLocation tmpf) = exportTempName ek
stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do
showStart "rename" (tmpf ++ " -> " ++ f')
next $ performRename r db ek tmploc loc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
performRename :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandPerform
performRename r db ek src dest = do
ifM (renameExport (exportActions r) (asKey ek) src dest)
( next $ cleanupRename db ek src dest
-- In case the special remote does not support renaming,
-- unexport the src instead.
, performUnexport r db [ek] src
)
cleanupRename :: ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandCleanup
cleanupRename db ek src dest = do
liftIO $ do
removeExportLocation db (asKey ek) src
addExportLocation db (asKey ek) dest
-- Flush so that getExportLocation sees this.
flushDbQueue db
return True

View file

@ -9,6 +9,7 @@ module Command.FindRef where
import Command
import qualified Command.Find as Find
import qualified Git
cmd :: Command
cmd = withGlobalOptions nonWorkTreeMatchingOptions $ Find.mkCommand $
@ -17,4 +18,4 @@ cmd = withGlobalOptions nonWorkTreeMatchingOptions $ Find.mkCommand $
paramRef (seek <$$> Find.optParser)
seek :: Find.FindOptions -> CommandSeek
seek o = Find.start o `withFilesInRefs` Find.findThese o
seek o = Find.start o `withFilesInRefs` (map Git.Ref $ Find.findThese o)

88
Database/Export.hs Normal file
View file

@ -0,0 +1,88 @@
{- Sqlite database used for exports to special remotes.
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}
module Database.Export (
ExportHandle,
openDb,
closeDb,
addExportLocation,
removeExportLocation,
flushDbQueue,
getExportLocation,
ExportedId,
) where
import Database.Types
import qualified Database.Queue as H
import Database.Init
import Annex.Locations
import Annex.Common hiding (delete)
import Types.Remote (ExportLocation(..))
import Database.Persist.TH
import Database.Esqueleto hiding (Key)
newtype ExportHandle = ExportHandle H.DbQueue
share [mkPersist sqlSettings, mkMigrate "migrateExport"] [persistLowerCase|
Exported
key IKey
file SFilePath
KeyFileIndex key file
|]
{- Opens the database, creating it if it doesn't exist yet. -}
openDb :: UUID -> Annex ExportHandle
openDb u = do
dbdir <- fromRepo (gitAnnexExportDbDir u)
let db = dbdir </> "db"
unlessM (liftIO $ doesFileExist db) $ do
initDb db $ void $
runMigrationSilent migrateExport
h <- liftIO $ H.openDbQueue H.SingleWriter db "exported"
return $ ExportHandle h
closeDb :: ExportHandle -> Annex ()
closeDb (ExportHandle h) = liftIO $ H.closeDbQueue h
queueDb :: ExportHandle -> SqlPersistM () -> IO ()
queueDb (ExportHandle h) = H.queueDb h checkcommit
where
-- commit queue after 1000 changes
checkcommit sz _lastcommittime
| sz > 1000 = return True
| otherwise = return False
addExportLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
addExportLocation h k (ExportLocation f) = queueDb h $
void $ insertUnique $ Exported (toIKey k) (toSFilePath f)
removeExportLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
removeExportLocation h k (ExportLocation f) = queueDb h $
delete $ from $ \r -> do
where_ (r ^. ExportedKey ==. val ik &&. r ^. ExportedFile ==. val ef)
where
ik = toIKey k
ef = toSFilePath f
flushDbQueue :: ExportHandle -> IO ()
flushDbQueue (ExportHandle h) = H.flushDbQueue h
{- Note that this does not see recently queued changes. -}
getExportLocation :: ExportHandle -> Key -> IO [ExportLocation]
getExportLocation (ExportHandle h) k = H.queryDbQueue h $ do
l <- select $ from $ \r -> do
where_ (r ^. ExportedKey ==. val ik)
return (r ^. ExportedFile)
return $ map (ExportLocation . fromSFilePath . unValue) l
where
ik = toIKey k

View file

@ -63,7 +63,7 @@ openDb u = do
initDb db $ void $
runMigrationSilent migrateFsck
lockFileCached =<< fromRepo (gitAnnexFsckDbLock u)
h <- liftIO $ H.openDbQueue db "fscked"
h <- liftIO $ H.openDbQueue H.MultiWriter db "fscked"
return $ FsckHandle h u
closeDb :: FsckHandle -> Annex ()

View file

@ -9,6 +9,7 @@
module Database.Handle (
DbHandle,
DbConcurrency(..),
openDb,
TableName,
queryDb,
@ -35,27 +36,49 @@ import System.IO
{- A DbHandle is a reference to a worker thread that communicates with
- the database. It has a MVar which Jobs are submitted to. -}
data DbHandle = DbHandle (Async ()) (MVar Job)
data DbHandle = DbHandle DbConcurrency (Async ()) (MVar Job)
{- Name of a table that should exist once the database is initialized. -}
type TableName = String
{- Sqlite only allows a single write to a database at a time; a concurrent
- write will crash.
-
- While a DbHandle serializes concurrent writes from
- multiple threads. But, when a database can be written to by
- multiple processes concurrently, use MultiWriter to make writes
- to the database be done robustly.
-
- The downside of using MultiWriter is that after writing a change to the
- database, the a query using the same DbHandle will not immediately see
- the change! This is because the change is actually written using a
- separate database connection, and caching can prevent seeing the change.
- Also, consider that if multiple processes are writing to a database,
- you can't rely on seeing values you've just written anyway, as another
- process may change them.
-
- When a database can only be written to by a single process, use
- SingleWriter. Changes written to the database will always be immediately
- visible then.
-}
data DbConcurrency = SingleWriter | MultiWriter
{- Opens the database, but does not perform any migrations. Only use
- if the database is known to exist and have the right tables. -}
openDb :: FilePath -> TableName -> IO DbHandle
openDb db tablename = do
- once the database is known to exist and have the right tables. -}
openDb :: DbConcurrency -> FilePath -> TableName -> IO DbHandle
openDb dbconcurrency db tablename = do
jobs <- newEmptyMVar
worker <- async (workerThread (T.pack db) tablename jobs)
-- work around https://github.com/yesodweb/persistent/issues/474
liftIO $ fileEncoding stderr
return $ DbHandle worker jobs
return $ DbHandle dbconcurrency worker jobs
{- This is optional; when the DbHandle gets garbage collected it will
- auto-close. -}
closeDb :: DbHandle -> IO ()
closeDb (DbHandle worker jobs) = do
closeDb (DbHandle _ worker jobs) = do
putMVar jobs CloseJob
wait worker
@ -68,9 +91,12 @@ closeDb (DbHandle worker jobs) = do
- Only one action can be run at a time against a given DbHandle.
- If called concurrently in the same process, this will block until
- it is able to run.
-
- Note that when the DbHandle was opened in MultiWriter mode, recent
- writes may not be seen by queryDb.
-}
queryDb :: DbHandle -> SqlPersistM a -> IO a
queryDb (DbHandle _ jobs) a = do
queryDb (DbHandle _ _ jobs) a = do
res <- newEmptyMVar
putMVar jobs $ QueryJob $
liftIO . putMVar res =<< tryNonAsync a
@ -79,9 +105,9 @@ queryDb (DbHandle _ jobs) a = do
{- Writes a change to the database.
-
- If a database is opened multiple times and there's a concurrent writer,
- the write could fail. Retries repeatedly for up to 10 seconds,
- which should avoid all but the most exceptional problems.
- In MultiWriter mode, catches failure to write to the database,
- and retries repeatedly for up to 10 seconds, which should avoid
- all but the most exceptional problems.
-}
commitDb :: DbHandle -> SqlPersistM () -> IO ()
commitDb h wa = robustly Nothing 100 (commitDb' h wa)
@ -97,15 +123,22 @@ commitDb h wa = robustly Nothing 100 (commitDb' h wa)
robustly (Just e) (n-1) a
commitDb' :: DbHandle -> SqlPersistM () -> IO (Either SomeException ())
commitDb' (DbHandle _ jobs) a = do
commitDb' (DbHandle MultiWriter _ jobs) a = do
res <- newEmptyMVar
putMVar jobs $ ChangeJob $ \runner ->
putMVar jobs $ RobustChangeJob $ \runner ->
liftIO $ putMVar res =<< tryNonAsync (runner a)
takeMVar res
commitDb' (DbHandle SingleWriter _ jobs) a = do
res <- newEmptyMVar
putMVar jobs $ ChangeJob $
liftIO . putMVar res =<< tryNonAsync a
takeMVar res
`catchNonAsync` (const $ error "sqlite commit crashed")
data Job
= QueryJob (SqlPersistM ())
| ChangeJob ((SqlPersistM () -> IO ()) -> IO ())
| ChangeJob (SqlPersistM ())
| RobustChangeJob ((SqlPersistM () -> IO ()) -> IO ())
| CloseJob
workerThread :: T.Text -> TableName -> MVar Job -> IO ()
@ -127,10 +160,12 @@ workerThread db tablename jobs =
Left BlockedIndefinitelyOnMVar -> return ()
Right CloseJob -> return ()
Right (QueryJob a) -> a >> loop
-- change is run in a separate database connection
Right (ChangeJob a) -> a >> loop
-- Change is run in a separate database connection
-- since sqlite only supports a single writer at a
-- time, and it may crash the database connection
Right (ChangeJob a) -> liftIO (a (runSqliteRobustly tablename db)) >> loop
-- that the write is made to.
Right (RobustChangeJob a) -> liftIO (a (runSqliteRobustly tablename db)) >> loop
-- like runSqlite, but calls settle on the raw sql Connection.
runSqliteRobustly :: TableName -> T.Text -> (SqlPersistM a) -> IO a

View file

@ -124,7 +124,7 @@ openDb createdb _ = catchPermissionDenied permerr $ withExclusiveLock gitAnnexKe
open db
(False, False) -> return DbUnavailable
where
open db = liftIO $ DbOpen <$> H.openDbQueue db SQL.containedTable
open db = liftIO $ DbOpen <$> H.openDbQueue H.MultiWriter db SQL.containedTable
-- If permissions don't allow opening the database, treat it as if
-- it does not exist.
permerr e = case createdb of

View file

@ -9,6 +9,7 @@
module Database.Queue (
DbQueue,
DbConcurrency(..),
openDbQueue,
queryDbQueue,
closeDbQueue,
@ -35,9 +36,9 @@ data DbQueue = DQ DbHandle (MVar Queue)
{- Opens the database queue, but does not perform any migrations. Only use
- if the database is known to exist and have the right tables; ie after
- running initDb. -}
openDbQueue :: FilePath -> TableName -> IO DbQueue
openDbQueue db tablename = DQ
<$> openDb db tablename
openDbQueue :: DbConcurrency -> FilePath -> TableName -> IO DbQueue
openDbQueue dbconcurrency db tablename = DQ
<$> openDb dbconcurrency db tablename
<*> (newMVar =<< emptyQueue)
{- This or flushDbQueue must be called, eg at program exit to ensure
@ -60,8 +61,11 @@ flushDbQueue (DQ hdl qvar) = do
{- Makes a query using the DbQueue's database connection.
- This should not be used to make changes to the database!
-
- Queries will not return changes that have been recently queued,
- Queries will not see changes that have been recently queued,
- so use with care.
-
- Also, when the database was opened in MultiWriter mode,
- queries may not see changes even after flushDbQueue.
-}
queryDbQueue :: DbQueue -> SqlPersistM a -> IO a
queryDbQueue (DQ hdl _) = queryDb hdl

View file

@ -14,6 +14,7 @@ module Git.Tree (
recordTree,
TreeItem(..),
adjustTree,
treeMode,
) where
import Common
@ -94,12 +95,15 @@ mkTree (MkTreeHandle cp) l = CoProcess.query cp send receive
send h = do
forM_ l $ \i -> hPutStr h $ case i of
TreeBlob f fm s -> mkTreeOutput fm BlobObject s f
RecordedSubTree f s _ -> mkTreeOutput 0o040000 TreeObject s f
RecordedSubTree f s _ -> mkTreeOutput treeMode TreeObject s f
NewSubTree _ _ -> error "recordSubTree internal error; unexpected NewSubTree"
TreeCommit f fm s -> mkTreeOutput fm CommitObject s f
hPutStr h "\NUL" -- signal end of tree to --batch
receive h = getSha "mktree" (hGetLine h)
treeMode :: FileMode
treeMode = 0o040000
mkTreeOutput :: FileMode -> ObjectType -> Sha -> TopFilePath -> String
mkTreeOutput fm ot s f = concat
[ showOct fm ""

View file

@ -42,6 +42,7 @@ topLevelUUIDBasedLogs =
, activityLog
, differenceLog
, multicastLog
, exportLog
]
{- All the ways to get a key from a presence log file -}
@ -97,6 +98,9 @@ differenceLog = "difference.log"
multicastLog :: FilePath
multicastLog = "multicast.log"
exportLog :: FilePath
exportLog = "export.log"
{- The pathname of the location log file for a given key. -}
locationLogFile :: GitConfig -> Key -> String
locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log"

123
Logs/Export.hs Normal file
View file

@ -0,0 +1,123 @@
{- git-annex export log
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Export where
import qualified Data.Map as M
import Annex.Common
import qualified Annex.Branch
import qualified Git
import qualified Git.Branch
import Git.Tree
import Git.Sha
import Git.FilePath
import Logs
import Logs.UUIDBased
import Annex.UUID
data Exported = Exported
{ exportedTreeish :: Git.Ref
, incompleteExportedTreeish :: [Git.Ref]
}
deriving (Eq)
-- | Get what's been exported to a special remote.
--
-- If the list contains multiple items, there was an export conflict,
-- and different trees were exported to the same special remote.
getExport :: UUID -> Annex [Exported]
getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap
. parseLogNew parseExportLog
<$> Annex.Branch.get exportLog
where
get (ExportLog exported u)
| u == remoteuuid = Just exported
| otherwise = Nothing
data ExportChange = ExportChange
{ oldTreeish :: [Git.Ref]
, newTreeish :: Git.Ref
}
-- | Record a change in what's exported to a special remote.
--
-- This is called before an export begins uploading new files to the
-- remote, but after it's cleaned up any files that need to be deleted
-- from the old treeish.
--
-- Any entries in the log for the oldTreeish will be updated to the
-- newTreeish. This way, when multiple repositories are exporting to
-- the same special remote, there's no conflict as long as they move
-- forward in lock-step.
--
-- Also, the newTreeish is grafted into the git-annex branch. This is done
-- to ensure that it's available later.
recordExport :: UUID -> ExportChange -> Annex ()
recordExport remoteuuid ec = do
c <- liftIO currentVectorClock
u <- getUUID
let val = ExportLog (Exported (newTreeish ec) []) remoteuuid
Annex.Branch.change exportLog $
showLogNew formatExportLog
. changeLog c u val
. M.mapWithKey (updateothers c u)
. parseLogNew parseExportLog
where
updateothers c u theiru le@(LogEntry _ (ExportLog exported@(Exported { exportedTreeish = t }) remoteuuid'))
| u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le
| otherwise = LogEntry c (ExportLog (exported { exportedTreeish = newTreeish ec }) theiru)
-- | Record the beginning of an export, to allow cleaning up from
-- interrupted exports.
--
-- This is called before any changes are made to the remote.
recordExportBeginning :: UUID -> Git.Ref -> Annex ()
recordExportBeginning remoteuuid newtree = do
c <- liftIO currentVectorClock
u <- getUUID
ExportLog old _ <- fromMaybe (ExportLog (Exported emptyTree []) remoteuuid)
. M.lookup u . simpleMap
. parseLogNew parseExportLog
<$> Annex.Branch.get exportLog
let new = old { incompleteExportedTreeish = newtree:incompleteExportedTreeish old }
Annex.Branch.change exportLog $
showLogNew formatExportLog
. changeLog c u (ExportLog new remoteuuid)
. parseLogNew parseExportLog
graftTreeish newtree
data ExportLog = ExportLog Exported UUID
formatExportLog :: ExportLog -> String
formatExportLog (ExportLog exported remoteuuid) = unwords $
[ Git.fromRef (exportedTreeish exported)
, fromUUID remoteuuid
] ++ map Git.fromRef (incompleteExportedTreeish exported)
parseExportLog :: String -> Maybe ExportLog
parseExportLog s = case words s of
(et:u:it) -> Just $
ExportLog (Exported (Git.Ref et) (map Git.Ref it)) (toUUID u)
_ -> Nothing
-- To prevent git-annex branch merge conflicts, the treeish is
-- first grafted in and then removed in a subsequent commit.
graftTreeish :: Git.Ref -> Annex ()
graftTreeish treeish = do
branchref <- Annex.Branch.getBranch
Tree t <- inRepo $ getTree branchref
t' <- inRepo $ recordTree $ Tree $
RecordedSubTree (asTopFilePath graftpoint) treeish [] : t
commit <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit
"export tree" [branchref] t'
origtree <- inRepo $ recordTree (Tree t)
commit' <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit
"export tree cleanup" [commit] origtree
inRepo $ Git.Branch.update' Annex.Branch.fullname commit'
where
graftpoint = "export.tree"

View file

@ -65,10 +65,16 @@ trustMap = maybe trustMapLoad return =<< Annex.getState Annex.trustmap
trustMapLoad :: Annex TrustMap
trustMapLoad = do
overrides <- Annex.getState Annex.forcetrust
l <- remoteList
-- Exports are never trusted, since they are not key/value stores.
exports <- filterM Types.Remote.isExportSupported l
let exportoverrides = M.fromList $
map (\r -> (Types.Remote.uuid r, UnTrusted)) exports
logged <- trustMapRaw
configured <- M.fromList . catMaybes
<$> (map configuredtrust <$> remoteList)
let m = M.union overrides $ M.union configured logged
let configured = M.fromList $ mapMaybe configuredtrust l
let m = M.union exportoverrides $
M.union overrides $
M.union configured logged
Annex.changeState $ \s -> s { Annex.trustmap = Just m }
return m
where

View file

@ -53,6 +53,7 @@ module Remote (
checkAvailable,
isXMPPRemote,
claimingUrl,
isExportSupported,
) where
import Data.Ord

View file

@ -26,6 +26,7 @@ import Backend.URL
import Annex.Perms
import Annex.UUID
import qualified Annex.Url as Url
import Remote.Helper.Export
import Network.URI
@ -35,12 +36,13 @@ import qualified Data.ByteString.Lazy as B
#endif
remote :: RemoteType
remote = RemoteType {
typename = "bittorrent",
enumerate = list,
generate = gen,
setup = error "not supported"
}
remote = RemoteType
{ typename = "bittorrent"
, enumerate = list
, generate = gen
, setup = error "not supported"
, exportSupported = exportUnsupported
}
-- There is only one bittorrent remote, and it always exists.
list :: Bool -> Annex [Git.Repo]
@ -61,6 +63,7 @@ gen r _ c gc =
, lockContent = Nothing
, checkPresent = checkKey
, checkPresentCheap = False
, exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -25,6 +25,7 @@ import Config.Cost
import qualified Remote.Helper.Ssh as Ssh
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Export
import Utility.Hash
import Utility.UserInfo
import Annex.UUID
@ -34,12 +35,13 @@ import Utility.Metered
type BupRepo = String
remote :: RemoteType
remote = RemoteType {
typename = "bup",
enumerate = const (findSpecialRemotes "buprepo"),
generate = gen,
setup = bupSetup
}
remote = RemoteType
{ typename = "bup"
, enumerate = const (findSpecialRemotes "buprepo")
, generate = gen
, setup = bupSetup
, exportSupported = exportUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@ -61,6 +63,7 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = bupLocal buprepo
, exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -19,6 +19,7 @@ import qualified Git
import Config
import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Export
import Annex.Ssh
import Annex.UUID
import Utility.SshHost
@ -29,12 +30,13 @@ data DdarRepo = DdarRepo
}
remote :: RemoteType
remote = RemoteType {
typename = "ddar",
enumerate = const (findSpecialRemotes "ddarrepo"),
generate = gen,
setup = ddarSetup
}
remote = RemoteType
{ typename = "ddar"
, enumerate = const (findSpecialRemotes "ddarrepo")
, generate = gen
, setup = ddarSetup
, exportSupported = exportUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@ -60,6 +62,7 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = ddarLocal ddarrepo
, exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -1,6 +1,6 @@
{- A "remote" that is just a filesystem directory.
-
- Copyright 2011-2014 Joey Hess <id@joeyh.name>
- Copyright 2011-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -25,18 +25,21 @@ import Config.Cost
import Config
import Utility.FileMode
import Remote.Helper.Special
import Remote.Helper.Export
import qualified Remote.Directory.LegacyChunked as Legacy
import Annex.Content
import Annex.UUID
import Utility.Metered
import Utility.Tmp
remote :: RemoteType
remote = RemoteType {
typename = "directory",
enumerate = const (findSpecialRemotes "directory"),
generate = gen,
setup = directorySetup
}
remote = RemoteType
{ typename = "directory"
, enumerate = const (findSpecialRemotes "directory")
, generate = gen
, setup = directorySetup
, exportSupported = exportIsSupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@ -58,6 +61,13 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = True
, exportActions = ExportActions
{ storeExport = storeExportDirectory dir
, retrieveExport = retrieveExportDirectory dir
, removeExport = removeExportDirectory dir
, checkPresentExport = checkPresentExportDirectory dir
, renameExport = renameExportDirectory dir
}
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
@ -111,24 +121,21 @@ getLocation d k = do
storeDir :: FilePath -> Key -> FilePath
storeDir d k = addTrailingPathSeparator $ d </> hashDirLower def k </> keyFile k
{- Where we store temporary data for a key, in the directory, as it's being
- written. -}
tmpDir :: FilePath -> Key -> FilePath
tmpDir d k = addTrailingPathSeparator $ d </> "tmp" </> keyFile k
{- Check if there is enough free disk space in the remote's directory to
- store the key. Note that the unencrypted key size is checked. -}
prepareStore :: FilePath -> ChunkConfig -> Preparer Storer
prepareStore d chunkconfig = checkPrepare checker
prepareStore d chunkconfig = checkPrepare (checkDiskSpaceDirectory d)
(byteStorer $ store d chunkconfig)
where
checker k = do
annexdir <- fromRepo gitAnnexObjectDir
samefilesystem <- liftIO $ catchDefaultIO False $
(\a b -> deviceID a == deviceID b)
<$> getFileStatus d
<*> getFileStatus annexdir
checkDiskSpace (Just d) k 0 samefilesystem
checkDiskSpaceDirectory :: FilePath -> Key -> Annex Bool
checkDiskSpaceDirectory d k = do
annexdir <- fromRepo gitAnnexObjectDir
samefilesystem <- liftIO $ catchDefaultIO False $
(\a b -> deviceID a == deviceID b)
<$> getFileStatus d
<*> getFileStatus annexdir
checkDiskSpace (Just d) k 0 samefilesystem
store :: FilePath -> ChunkConfig -> Key -> L.ByteString -> MeterUpdate -> Annex Bool
store d chunkconfig k b p = liftIO $ do
@ -141,7 +148,7 @@ store d chunkconfig k b p = liftIO $ do
finalizeStoreGeneric tmpdir destdir
return True
where
tmpdir = tmpDir d k
tmpdir = addTrailingPathSeparator $ d </> "tmp" </> keyFile k
destdir = storeDir d k
{- Passed a temp directory that contains the files that should be placed
@ -211,11 +218,66 @@ removeDirGeneric topdir dir = do
checkKey :: FilePath -> ChunkConfig -> CheckPresent
checkKey d (LegacyChunks _) k = Legacy.checkKey d locations k
checkKey d _ k = liftIO $
ifM (anyM doesFileExist (locations d k))
checkKey d _ k = checkPresentGeneric d (locations d k)
checkPresentGeneric :: FilePath -> [FilePath] -> Annex Bool
checkPresentGeneric d ps = liftIO $
ifM (anyM doesFileExist ps)
( return True
, ifM (doesDirectoryExist d)
( return False
, giveup $ "directory " ++ d ++ " is not accessible"
)
)
storeExportDirectory :: FilePath -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool
storeExportDirectory d src _k loc p = liftIO $ catchBoolIO $ do
createDirectoryIfMissing True (takeDirectory dest)
-- Write via temp file so that checkPresentGeneric will not
-- see it until it's fully stored.
viaTmp (\tmp () -> withMeteredFile src p (L.writeFile tmp)) dest ()
return True
where
dest = exportPath d loc
retrieveExportDirectory :: FilePath -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex (Bool, Verification)
retrieveExportDirectory d _k loc dest p = unVerified $ liftIO $ catchBoolIO $ do
withMeteredFile src p (L.writeFile dest)
return True
where
src = exportPath d loc
removeExportDirectory :: FilePath -> Key -> ExportLocation -> Annex Bool
removeExportDirectory d _k loc = liftIO $ do
nukeFile src
removeExportLocation d loc
return True
where
src = exportPath d loc
checkPresentExportDirectory :: FilePath -> Key -> ExportLocation -> Annex Bool
checkPresentExportDirectory d _k loc =
checkPresentGeneric d [exportPath d loc]
renameExportDirectory :: FilePath -> Key -> ExportLocation -> ExportLocation -> Annex Bool
renameExportDirectory d _k oldloc newloc = liftIO $ catchBoolIO $ do
createDirectoryIfMissing True (takeDirectory dest)
renameFile src dest
removeExportLocation d oldloc
return True
where
src = exportPath d oldloc
dest = exportPath d newloc
exportPath :: FilePath -> ExportLocation -> FilePath
exportPath d (ExportLocation loc) = d </> loc
{- Removes the ExportLocation directory and its parents, so long as
- they're empty, up to but not including the topdir. -}
removeExportLocation :: FilePath -> ExportLocation -> IO ()
removeExportLocation topdir (ExportLocation loc) = go (Just loc) (Right ())
where
go _ (Left _e) = return ()
go Nothing _ = return ()
go (Just loc') _ = go (upFrom loc')
=<< tryIO (removeDirectory $ exportPath topdir (ExportLocation loc'))

View file

@ -18,6 +18,7 @@ import Config
import Git.Config (isTrue, boolConfig)
import Git.Env
import Remote.Helper.Special
import Remote.Helper.Export
import Remote.Helper.ReadOnly
import Remote.Helper.Messages
import Utility.Metered
@ -39,12 +40,13 @@ import System.Log.Logger (debugM)
import qualified Data.Map as M
remote :: RemoteType
remote = RemoteType {
typename = "external",
enumerate = const (findSpecialRemotes "externaltype"),
generate = gen,
setup = externalSetup
}
remote = RemoteType
{ typename = "external"
, enumerate = const (findSpecialRemotes "externaltype")
, generate = gen
, setup = externalSetup
, exportSupported = exportUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc
@ -85,6 +87,7 @@ gen r u c gc
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
, exportActions = exportUnsupported
, whereisKey = towhereis
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -38,6 +38,7 @@ import Remote.Helper.Git
import Remote.Helper.Encryptable
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Export
import qualified Remote.Helper.Ssh as Ssh
import Utility.Metered
import Annex.UUID
@ -51,14 +52,15 @@ import Utility.Gpg
import Utility.SshHost
remote :: RemoteType
remote = RemoteType {
typename = "gcrypt",
remote = RemoteType
{ typename = "gcrypt"
-- Remote.Git takes care of enumerating gcrypt remotes too,
-- and will call our gen on them.
enumerate = const (return []),
generate = gen,
setup = gCryptSetup
}
, enumerate = const (return [])
, generate = gen
, setup = gCryptSetup
, exportSupported = exportUnsupported
}
chainGen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
chainGen gcryptr u c gc = do
@ -114,6 +116,7 @@ gen' r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = repoCheap r
, exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -50,6 +50,7 @@ import Utility.Batch
import Utility.SimpleProtocol
import Remote.Helper.Git
import Remote.Helper.Messages
import Remote.Helper.Export
import qualified Remote.Helper.Ssh as Ssh
import qualified Remote.GCrypt
import qualified Remote.P2P
@ -66,12 +67,13 @@ import qualified Data.Map as M
import Network.URI
remote :: RemoteType
remote = RemoteType {
typename = "git",
enumerate = list,
generate = gen,
setup = gitSetup
}
remote = RemoteType
{ typename = "git"
, enumerate = list
, generate = gen
, setup = gitSetup
, exportSupported = exportUnsupported
}
list :: Bool -> Annex [Git.Repo]
list autoinit = do
@ -110,7 +112,7 @@ gitSetup Init mu _ c _ = do
if isNothing mu || mu == Just u
then return (c, u)
else error "git remote did not have specified uuid"
gitSetup Enable (Just u) _ c _ = do
gitSetup (Enable _) (Just u) _ c _ = do
inRepo $ Git.Command.run
[ Param "remote"
, Param "add"
@ -118,7 +120,7 @@ gitSetup Enable (Just u) _ c _ = do
, Param $ fromMaybe (giveup "no location") (M.lookup "location" c)
]
return (c, u)
gitSetup Enable Nothing _ _ _ = error "unable to enable git remote with no specified uuid"
gitSetup (Enable _) Nothing _ _ _ = error "unable to enable git remote with no specified uuid"
{- It's assumed to be cheap to read the config of non-URL remotes, so this is
- done each time git-annex is run in a way that uses remotes.
@ -157,6 +159,7 @@ gen r u c gc
, lockContent = Just (lockKey new)
, checkPresent = inAnnex new
, checkPresentCheap = repoCheap r
, exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = if Git.repoIsUrl r
then Nothing

View file

@ -18,6 +18,7 @@ import Config
import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Export
import qualified Remote.Helper.AWS as AWS
import Creds
import Utility.Metered
@ -29,12 +30,13 @@ type Vault = String
type Archive = FilePath
remote :: RemoteType
remote = RemoteType {
typename = "glacier",
enumerate = const (findSpecialRemotes "glacier"),
generate = gen,
setup = glacierSetup
}
remote = RemoteType
{ typename = "glacier"
, enumerate = const (findSpecialRemotes "glacier")
, generate = gen
, setup = glacierSetup
, exportSupported = exportUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost
@ -57,6 +59,7 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
, exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
@ -87,8 +90,9 @@ glacierSetup' ss u mcreds c gc = do
(c', encsetup) <- encryptionSetup c gc
c'' <- setRemoteCredPair encsetup c' gc (AWS.creds u) mcreds
let fullconfig = c'' `M.union` defaults
when (ss == Init) $
genVault fullconfig gc u
case ss of
Init -> genVault fullconfig gc u
_ -> return ()
gitConfigSpecialRemote u fullconfig "glacier" "true"
return (fullconfig, u)
where

View file

@ -15,6 +15,7 @@ module Remote.Helper.Encryptable (
embedCreds,
cipherKey,
extractCipher,
isEncrypted,
describeEncryption,
) where
@ -57,7 +58,7 @@ encryptionSetup c gc = do
encryption = M.lookup "encryption" c
-- Generate a new cipher, depending on the chosen encryption scheme
genCipher cmd = case encryption of
_ | M.member "cipher" c || M.member "cipherkeys" c || M.member "pubkeys" c -> cannotchange
_ | hasEncryptionConfig c -> cannotchange
Just "none" -> return (c, NoEncryption)
Just "shared" -> encsetup $ genSharedCipher cmd
-- hybrid encryption is the default when a keyid is
@ -167,6 +168,15 @@ extractCipher c = case (M.lookup "cipher" c,
where
readkeys = KeyIds . splitc ','
isEncrypted :: RemoteConfig -> Bool
isEncrypted c = case M.lookup "encryption" c of
Just "none" -> False
Just _ -> True
Nothing -> hasEncryptionConfig c
hasEncryptionConfig :: RemoteConfig -> Bool
hasEncryptionConfig c = M.member "cipher" c || M.member "cipherkeys" c || M.member "pubkeys" c
describeEncryption :: RemoteConfig -> String
describeEncryption c = case extractCipher c of
Nothing -> "none"

126
Remote/Helper/Export.hs Normal file
View file

@ -0,0 +1,126 @@
{- exports to remotes
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE FlexibleInstances #-}
module Remote.Helper.Export where
import Annex.Common
import Types.Remote
import Types.Backend
import Types.Key
import Backend
import Remote.Helper.Encryptable (isEncrypted)
import Database.Export
import qualified Data.Map as M
-- | Use for remotes that do not support exports.
class HasExportUnsupported a where
exportUnsupported :: a
instance HasExportUnsupported (RemoteConfig -> RemoteGitConfig -> Annex Bool) where
exportUnsupported = \_ _ -> return False
instance HasExportUnsupported (ExportActions Annex) where
exportUnsupported = ExportActions
{ storeExport = \_ _ _ _ -> return False
, retrieveExport = \_ _ _ _ -> return (False, UnVerified)
, removeExport = \_ _ -> return False
, checkPresentExport = \_ _ -> return False
, renameExport = \_ _ _ -> return False
}
exportIsSupported :: RemoteConfig -> RemoteGitConfig -> Annex Bool
exportIsSupported = \_ _ -> return True
-- | Prevent or allow exporttree=yes when setting up a new remote,
-- depending on exportSupported and other configuration.
adjustExportableRemoteType :: RemoteType -> RemoteType
adjustExportableRemoteType rt = rt { setup = setup' }
where
setup' st mu cp c gc = do
let cont = setup rt st mu cp c gc
ifM (exportSupported rt c gc)
( case st of
Init -> case M.lookup "exporttree" c of
Just "yes" | isEncrypted c ->
giveup "cannot enable both encryption and exporttree"
_ -> cont
Enable oldc
| M.lookup "exporttree" c /= M.lookup "exporttree" oldc ->
giveup "cannot change exporttree of existing special remote"
| otherwise -> cont
, case M.lookup "exporttree" c of
Just "yes" -> giveup "exporttree=yes is not supported by this special remote"
_ -> cont
)
-- | If the remote is exportSupported, and exporttree=yes, adjust the
-- remote to be an export.
adjustExportable :: Remote -> Annex Remote
adjustExportable r = case M.lookup "exporttree" (config r) of
Just "yes" -> ifM (isExportSupported r)
( isexport
, notexport
)
_ -> notexport
where
notexport = return $ r { exportActions = exportUnsupported }
isexport = do
db <- openDb (uuid r)
return $ r
-- Storing a key on an export would need a way to
-- look up the file(s) that the currently exported
-- tree uses for a key; there's not currently an
-- inexpensive way to do that (getExportLocation
-- only finds files that have been stored on the
-- export already).
{ storeKey = \_ _ _ -> do
warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
return False
-- Keys can be retrieved, but since an export
-- is not a true key/value store, the content of
-- the key has to be able to be strongly verified.
, retrieveKeyFile = \k _af dest p ->
if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k))
then do
locs <- liftIO $ getExportLocation db k
case locs of
[] -> do
warning "unknown export location"
return (False, UnVerified)
(l:_) -> retrieveExport (exportActions r) k l dest p
else do
warning $ "exported content cannot be verified due to using the " ++ formatKeyVariety (keyVariety k) ++ " backend"
return (False, UnVerified)
, retrieveKeyFileCheap = \_ _ _ -> return False
-- Remove all files a key was exported to.
, removeKey = \k -> do
locs <- liftIO $ getExportLocation db k
oks <- forM locs $ \loc -> do
ok <- removeExport (exportActions r) k loc
when ok $
liftIO $ removeExportLocation db k loc
return ok
liftIO $ flushDbQueue db
return (and oks)
-- Can't lock content on exports, since they're
-- not key/value stores, and someone else could
-- change what's exported to a file at any time.
, lockContent = Nothing
-- Check if any of the files a key was exported
-- to are present. This doesn't guarantee the
-- export contains the right content.
, checkPresent = \k ->
anyM (checkPresentExport (exportActions r) k)
=<< liftIO (getExportLocation db k)
, mkUnavailable = return Nothing
, getInfo = do
is <- getInfo r
return (is++[("export", "yes")])
}

View file

@ -16,6 +16,7 @@ import Config.Cost
import Annex.UUID
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Export
import Utility.Env
import Messages.Progress
@ -25,12 +26,13 @@ type Action = String
type HookName = String
remote :: RemoteType
remote = RemoteType {
typename = "hook",
enumerate = const (findSpecialRemotes "hooktype"),
generate = gen,
setup = hookSetup
}
remote = RemoteType
{ typename = "hook"
, enumerate = const (findSpecialRemotes "hooktype")
, generate = gen
, setup = hookSetup
, exportSupported = exportUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@ -51,6 +53,7 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
, exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -18,6 +18,7 @@ import Types.Remote
import Annex.UUID
import Remote.Helper.Hooks
import Remote.Helper.ReadOnly
import Remote.Helper.Export
import qualified Git
import qualified Git.Config
@ -42,7 +43,7 @@ import qualified Remote.Hook
import qualified Remote.External
remoteTypes :: [RemoteType]
remoteTypes =
remoteTypes = map adjustExportableRemoteType
[ Remote.Git.remote
, Remote.GCrypt.remote
, Remote.P2P.remote
@ -100,8 +101,9 @@ remoteGen m t r = do
u <- getRepoUUID r
gc <- Annex.getRemoteGitConfig r
let c = fromMaybe M.empty $ M.lookup u m
mrmt <- generate t r u c gc
return $ adjustReadOnly . addHooks <$> mrmt
generate t r u c gc >>= maybe
(return Nothing)
(Just <$$> adjustExportable . adjustReadOnly . addHooks)
{- Updates a local git Remote, re-reading its git config. -}
updateRemote :: Remote -> Annex (Maybe Remote)

View file

@ -24,6 +24,7 @@ import Annex.UUID
import Config
import Config.Cost
import Remote.Helper.Git
import Remote.Helper.Export
import Messages.Progress
import Utility.Metered
import Utility.AuthToken
@ -33,14 +34,15 @@ import Control.Concurrent
import Control.Concurrent.STM
remote :: RemoteType
remote = RemoteType {
typename = "p2p",
remote = RemoteType
{ typename = "p2p"
-- Remote.Git takes care of enumerating P2P remotes,
-- and will call chainGen on them.
enumerate = const (return []),
generate = \_ _ _ _ -> return Nothing,
setup = error "P2P remotes are set up using git-annex p2p"
}
, enumerate = const (return [])
, generate = \_ _ _ _ -> return Nothing
, setup = error "P2P remotes are set up using git-annex p2p"
, exportSupported = exportUnsupported
}
chainGen :: P2PAddress -> Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
chainGen addr r u c gc = do
@ -57,6 +59,7 @@ chainGen addr r u c gc = do
, lockContent = Just (lock u addr connpool)
, checkPresent = checkpresent u addr connpool
, checkPresentCheap = False
, exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -28,6 +28,7 @@ import Annex.UUID
import Annex.Ssh
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Export
import Remote.Rsync.RsyncUrl
import Crypto
import Utility.Rsync
@ -43,12 +44,13 @@ import Utility.SshHost
import qualified Data.Map as M
remote :: RemoteType
remote = RemoteType {
typename = "rsync",
enumerate = const (findSpecialRemotes "rsyncurl"),
generate = gen,
setup = rsyncSetup
}
remote = RemoteType
{ typename = "rsync"
, enumerate = const (findSpecialRemotes "rsyncurl")
, generate = gen
, setup = rsyncSetup
, exportSupported = exportUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@ -73,6 +75,7 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
, exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -39,6 +39,7 @@ import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Http
import Remote.Helper.Messages
import Remote.Helper.Export
import qualified Remote.Helper.AWS as AWS
import Creds
import Annex.UUID
@ -53,12 +54,13 @@ import Utility.Url (checkBoth, managerSettings, closeManager)
type BucketName = String
remote :: RemoteType
remote = RemoteType {
typename = "S3",
enumerate = const (findSpecialRemotes "s3"),
generate = gen,
setup = s3Setup
}
remote = RemoteType
{ typename = "S3"
, enumerate = const (findSpecialRemotes "s3")
, generate = gen
, setup = s3Setup
, exportSupported = exportUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@ -84,6 +86,7 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
, exportActions = exportUnsupported
, whereisKey = Just (getWebUrls info)
, remoteFsck = Nothing
, repairRepo = Nothing
@ -127,8 +130,9 @@ s3Setup' ss u mcreds c gc
(c', encsetup) <- encryptionSetup c gc
c'' <- setRemoteCredPair encsetup c' gc (AWS.creds u) mcreds
let fullconfig = c'' `M.union` defaults
when (ss == Init) $
genBucket fullconfig gc u
case ss of
Init -> genBucket fullconfig gc u
_ -> return ()
use fullconfig
archiveorg = do

View file

@ -34,6 +34,7 @@ import qualified Git
import Config
import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Export
import Annex.UUID
import Annex.Content
import Logs.RemoteState
@ -51,12 +52,13 @@ type IntroducerFurl = String
type Capability = String
remote :: RemoteType
remote = RemoteType {
typename = "tahoe",
enumerate = const (findSpecialRemotes "tahoe"),
generate = gen,
setup = tahoeSetup
}
remote = RemoteType
{ typename = "tahoe"
, enumerate = const (findSpecialRemotes "tahoe")
, generate = gen
, setup = tahoeSetup
, exportSupported = exportUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
@ -75,6 +77,7 @@ gen r u c gc = do
, lockContent = Nothing
, checkPresent = checkKey u hdl
, checkPresentCheap = False
, exportActions = exportUnsupported
, whereisKey = Just (getWhereisKey u)
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -10,6 +10,7 @@ module Remote.Web (remote, getWebUrls) where
import Annex.Common
import Types.Remote
import Remote.Helper.Messages
import Remote.Helper.Export
import qualified Git
import qualified Git.Construct
import Annex.Content
@ -22,12 +23,13 @@ import Annex.Quvi
import qualified Utility.Quvi as Quvi
remote :: RemoteType
remote = RemoteType {
typename = "web",
enumerate = list,
generate = gen,
setup = error "not supported"
}
remote = RemoteType
{ typename = "web"
, enumerate = list
, generate = gen
, setup = error "not supported"
, exportSupported = exportUnsupported
}
-- There is only one web remote, and it always exists.
-- (If the web should cease to exist, remove this module and redistribute
@ -50,6 +52,7 @@ gen r _ c gc =
, lockContent = Nothing
, checkPresent = checkKey
, checkPresentCheap = False
, exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -28,6 +28,7 @@ import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Http
import Remote.Helper.Export
import qualified Remote.Helper.Chunked.Legacy as Legacy
import Creds
import Utility.Metered
@ -40,12 +41,13 @@ import Network.HTTP.Client (HttpExceptionContent(..), responseStatus)
#endif
remote :: RemoteType
remote = RemoteType {
typename = "webdav",
enumerate = const (findSpecialRemotes "webdav"),
generate = gen,
setup = webdavSetup
}
remote = RemoteType
{ typename = "webdav"
, enumerate = const (findSpecialRemotes "webdav")
, generate = gen
, setup = webdavSetup
, exportSupported = exportUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
@ -68,6 +70,7 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
, exportActions = exportUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -2,7 +2,7 @@
-
- Most things should not need this, using Types instead
-
- Copyright 2011-2014 Joey Hess <id@joeyh.name>
- Copyright 2011-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -18,6 +18,9 @@ module Types.Remote
, Availability(..)
, Verification(..)
, unVerified
, ExportLocation(..)
, isExportSupported
, ExportActions(..)
)
where
@ -34,7 +37,7 @@ import Types.UrlContents
import Types.NumCopies
import Config.Cost
import Utility.Metered
import Git.Types
import Git.Types (RemoteName)
import Utility.SafeCommand
import Utility.Url
@ -42,92 +45,96 @@ type RemoteConfigKey = String
type RemoteConfig = M.Map RemoteConfigKey String
data SetupStage = Init | Enable
deriving (Eq)
data SetupStage = Init | Enable RemoteConfig
{- There are different types of remotes. -}
data RemoteTypeA a = RemoteType {
data RemoteTypeA a = RemoteType
-- human visible type name
typename :: String,
{ typename :: String
-- enumerates remotes of this type
-- The Bool is True if automatic initialization of remotes is desired
enumerate :: Bool -> a [Git.Repo],
-- generates a remote of this type
generate :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> a (Maybe (RemoteA a)),
, enumerate :: Bool -> a [Git.Repo]
-- generates a remote of this type from the current git config
, generate :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> a (Maybe (RemoteA a))
-- initializes or enables a remote
setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID)
}
, setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID)
-- check if a remote of this type is able to support export
, exportSupported :: RemoteConfig -> RemoteGitConfig -> a Bool
}
instance Eq (RemoteTypeA a) where
x == y = typename x == typename y
{- An individual remote. -}
data RemoteA a = Remote {
data RemoteA a = Remote
-- each Remote has a unique uuid
uuid :: UUID,
{ uuid :: UUID
-- each Remote has a human visible name
name :: RemoteName,
, name :: RemoteName
-- Remotes have a use cost; higher is more expensive
cost :: Cost,
, cost :: Cost
-- Transfers a key's contents from disk to the remote.
-- The key should not appear to be present on the remote until
-- all of its contents have been transferred.
storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool,
, storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool
-- Retrieves a key's contents to a file.
-- (The MeterUpdate does not need to be used if it writes
-- sequentially to the file.)
retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a (Bool, Verification),
, retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a (Bool, Verification)
-- Retrieves a key's contents to a tmp file, if it can be done cheaply.
-- It's ok to create a symlink or hardlink.
retrieveKeyFileCheap :: Key -> AssociatedFile -> FilePath -> a Bool,
, retrieveKeyFileCheap :: Key -> AssociatedFile -> FilePath -> a Bool
-- Removes a key's contents (succeeds if the contents are not present)
removeKey :: Key -> a Bool,
, removeKey :: Key -> a Bool
-- Uses locking to prevent removal of a key's contents,
-- thus producing a VerifiedCopy, which is passed to the callback.
-- If unable to lock, does not run the callback, and throws an
-- error.
-- This is optional; remotes do not have to support locking.
lockContent :: forall r. Maybe (Key -> (VerifiedCopy -> a r) -> a r),
, lockContent :: forall r. Maybe (Key -> (VerifiedCopy -> a r) -> a r)
-- Checks if a key is present in the remote.
-- Throws an exception if the remote cannot be accessed.
checkPresent :: Key -> a Bool,
, checkPresent :: Key -> a Bool
-- Some remotes can checkPresent without an expensive network
-- operation.
checkPresentCheap :: Bool,
, checkPresentCheap :: Bool
-- Some remotes support exports of trees.
, exportActions :: ExportActions a
-- Some remotes can provide additional details for whereis.
whereisKey :: Maybe (Key -> a [String]),
, whereisKey :: Maybe (Key -> a [String])
-- Some remotes can run a fsck operation on the remote,
-- without transferring all the data to the local repo
-- The parameters are passed to the fsck command on the remote.
remoteFsck :: Maybe ([CommandParam] -> a (IO Bool)),
, remoteFsck :: Maybe ([CommandParam] -> a (IO Bool))
-- Runs an action to repair the remote's git repository.
repairRepo :: Maybe (a Bool -> a (IO Bool)),
, repairRepo :: Maybe (a Bool -> a (IO Bool))
-- a Remote has a persistent configuration store
config :: RemoteConfig,
, config :: RemoteConfig
-- git repo for the Remote
repo :: Git.Repo,
, repo :: Git.Repo
-- a Remote's configuration from git
gitconfig :: RemoteGitConfig,
, gitconfig :: RemoteGitConfig
-- a Remote can be assocated with a specific local filesystem path
localpath :: Maybe FilePath,
, localpath :: Maybe FilePath
-- a Remote can be known to be readonly
readonly :: Bool,
, readonly :: Bool
-- a Remote can be globally available. (Ie, "in the cloud".)
availability :: Availability,
, availability :: Availability
-- the type of the remote
remotetype :: RemoteTypeA a,
, remotetype :: RemoteTypeA a
-- For testing, makes a version of this remote that is not
-- available for use. All its actions should fail.
mkUnavailable :: a (Maybe (RemoteA a)),
, mkUnavailable :: a (Maybe (RemoteA a))
-- Information about the remote, for git annex info to display.
getInfo :: a [(String, String)],
, getInfo :: a [(String, String)]
-- Some remotes can download from an url (or uri).
claimUrl :: Maybe (URLString -> a Bool),
, claimUrl :: Maybe (URLString -> a Bool)
-- Checks that the url is accessible, and gets information about
-- its contents, without downloading the full content.
-- Throws an exception if the url is inaccessible.
checkUrl :: Maybe (URLString -> a UrlContents)
}
, checkUrl :: Maybe (URLString -> a UrlContents)
}
instance Show (RemoteA a) where
show remote = "Remote { name =\"" ++ name remote ++ "\" }"
@ -150,3 +157,33 @@ unVerified :: Monad m => m Bool -> m (Bool, Verification)
unVerified a = do
ok <- a
return (ok, UnVerified)
-- A location on a remote that a key can be exported to.
-- The FilePath will be relative, and may contain unix-style path
-- separators.
newtype ExportLocation = ExportLocation FilePath
deriving (Show, Eq)
isExportSupported :: RemoteA a -> a Bool
isExportSupported r = exportSupported (remotetype r) (config r) (gitconfig r)
data ExportActions a = ExportActions
-- Exports content to an ExportLocation.
-- The exported file should not appear to be present on the remote
-- until all of its contents have been transferred.
{ storeExport :: FilePath -> Key -> ExportLocation -> MeterUpdate -> a Bool
-- Retrieves exported content to a file.
-- (The MeterUpdate does not need to be used if it writes
-- sequentially to the file.)
, retrieveExport :: Key -> ExportLocation -> FilePath -> MeterUpdate -> a (Bool, Verification)
-- Removes an exported file (succeeds if the contents are not present)
, removeExport :: Key -> ExportLocation -> a Bool
-- Checks if anything is exported to the remote at the specified
-- ExportLocation.
-- Throws an exception if the remote cannot be accessed.
, checkPresentExport :: Key -> ExportLocation -> a Bool
-- Renames an already exported file.
-- This may fail, if the file doesn't exist, or the remote does not
-- support renames.
, renameExport :: Key -> ExportLocation -> ExportLocation -> a Bool
}

View file

@ -21,7 +21,7 @@ import Types.UUID
-- This order may seem backwards, but we generally want to list dead
-- remotes last and trusted ones first.
data TrustLevel = Trusted | SemiTrusted | UnTrusted | DeadTrusted
deriving (Eq, Enum, Ord, Bounded)
deriving (Eq, Enum, Ord, Bounded, Show)
instance Default TrustLevel where
def = SemiTrusted

View file

@ -28,7 +28,7 @@ type Template = String
{- Runs an action like writeFile, writing to a temp file first and
- then moving it into place. The temp file is stored in the same
- directory as the final file to avoid cross-device renames. -}
viaTmp :: (MonadMask m, MonadIO m) => (FilePath -> String -> m ()) -> FilePath -> String -> m ()
viaTmp :: (MonadMask m, MonadIO m) => (FilePath -> v -> m ()) -> FilePath -> v -> m ()
viaTmp a file content = bracketIO setup cleanup use
where
(dir, base) = splitFileName file

View file

@ -15,13 +15,13 @@ when they want to export a tree. (It would also be possible to drop all content
from an existing special remote and reuse it, but there does not seem much
benefit in doing so.)
Add a new `initremote` configuration `exporttree=true`, that cannot be
Add a new `initremote` configuration `exporttree=yes`, that cannot be
changed by `enableremote`:
git annex initremote myexport type=... exporttree=true
git annex initremote myexport type=... exporttree=yes
It does not make sense to encrypt an export, so exporttree=true requires
(and can even imply) encryption=false.
It does not make sense to encrypt an export, so exporttree=yes requires
encryption=none.
Note that the particular tree to export is not specified yet. This is
because the tree that is exported to a special remote may change.
@ -69,11 +69,6 @@ To efficiently update an export, git-annex can diff the tree
that was exported with the new tree. The naive approach is to upload
new and modified files and remove deleted files.
Note that a file may have been partially uploaded to an export, and then
the export updated to a tree without that file. So, need to try to delete
all removed files, even if location tracking does not say that the special
remote contains them.
With rename detection, if the special remote supports moving files,
more efficient updates can be done. It gets complicated; consider two files
that swap names.
@ -81,33 +76,6 @@ that swap names.
If the special remote supports copying files, that would also make some
updates more efficient.
## resuming exports
Resuming an interrupted export needs to work well.
There are two cases here:
1. Some of the files in the tree have been uploaded; others have not.
2. A file has been partially uploaded.
These two cases need to be disentangled somehow in order to handle
them. One way is to use the location log as follows:
* Before a file is uploaded, look up what key is currently exported
using that filename. If there is one, update the location log,
saying it's not present in the special remote.
* Upload the file.
* Update the location log for the newly exported key.
Note that this method does not allow resuming a partial upload by appending to
a file, because we don't know if the file actually started to be uploaded, or
if the file instead still has the old key's content. Instead, the whole
file needs to be re-uploaded.
Alternative: Keep an index file that's the current state of the export.
See comment #4 of [[todo/export]]. Not sure if that works? Perhaps it
would be overkill if it's only used to support resuming partial uploads.
## changes to special remote interface
This needs some additional methods added to special remotes, and to
@ -115,6 +83,10 @@ the [[external_special_remote_protocol]].
Here's the changes to the latter:
* `EXPORTSUPPORTED`
Used to check if a special remote supports exports. The remote
responds with either `EXPORTSUPPORTED-SUCCESS` or
`EXPORTSUPPORTED-FAILURE`
* `EXPORT Name`
Comes immediately before each of the following requests,
specifying the name of the exported file. It will be in the form
@ -123,6 +95,9 @@ Here's the changes to the latter:
* `TRANSFEREXPORT STORE|RETRIEVE Key File`
Requests the transfer of a File on local disk to or from the previously
provided Name on the special remote.
Note that it's important that, while a file is being stored,
CHECKPRESENTEXPORT not indicate it's present until all the data has
been transferred.
The remote responds with either `TRANSFER-SUCCESS` or
`TRANSFER-FAILURE`, and a remote where exports do not make sense
may always fail.
@ -139,9 +114,8 @@ Here's the changes to the latter:
* `RENAMEEXPORT Key NewName`
Requests the remote rename a file stored on it from the previously
provided Name to the NewName.
The remote responds with `RENAMEEXPORT-SUCCESS`,
`RENAMEEXPORT-FAILURE`, or with `RENAMEEXPORT-UNSUPPORTED` if an efficient
rename cannot be done.
The remote responds with `RENAMEEXPORT-SUCCESS` or with
`RENAMEEXPORT-FAILURE` if an efficient rename cannot be done.
To support old external special remote programs that have not been updated
to support exports, git-annex will need to handle an `ERROR` response
@ -162,19 +136,19 @@ key/value stores. The content of a file can change, and if multiple
repositories can export a special remote, they can be out of sync about
what files are exported to it.
To avoid such problems, when updating an exported file on a special remote,
the key could be recorded there too. But, this would have to be done
atomically, and checked atomically when downloading the file. Special
remotes lack atomicity guarantees for file storage, let alone for file
retrieval.
Possible solution: Make exporttree=true cause the special remote to
Possible solution: Make exporttree=yes cause the special remote to
be untrusted, and rely on annex.verify to catch cases where the content
of a file on a special remote has changed. This would work well enough
except for when the WORM or URL backend is used. So, prevent the user
from exporting such keys. Also, force verification on for such special
remotes, don't let it be turned off.
The same file contents may be in a treeish multiple times under different
filenames. That complicates using location tracking. One file may have been
exported and the other not, and location tracking says that the content
is present in the export. A sqlite database is needed to keep track of
this.
## recording exported filenames in git-annex branch
In order to download the content of a key from a file exported
@ -229,10 +203,101 @@ In this case, git-annex knows both exported trees. Have the user provide
a tree that resolves the conflict as they desire (it could be the same as
one of the exported trees, or some merge of them or an entirely new tree).
The UI to do this can just be another `git annex export $tree --to remote`.
To resolve, diff each exported tree in turn against the resolving tree. If a
file differs, re-export that file. In some cases this will do unncessary
re-uploads, but it's reasonably efficient.
To resolve, diff each exported tree in turn against the resolving tree
and delete all files that differ. Then, upload all missing files.
The documentation should suggest strongly only exporting to a given special
remote from a single repository, or having some other rule that avoids
export conflicts.
## when to update export.log for efficient resuming of exports
When should `export.log` be updated? Possibilities:
* Before performing any work, to set the goal.
* After the export is fully successful, to record the current state.
* After some mid-point.
Lots of things could go wrong during an export. A file might fail to be
transferred or only part of it be transferred; a file's content might not
be present to transfer at all. The export could be interrupted part way.
Updating the export.log at the right point in time is important to handle
these cases efficiently.
If the export.log is updated first, then it's only a goal and does not tell
us what's been done already.
If the export.log is updated only after complete success, then the common
case of some files not having content locally present will prevent it from
being updated. When we resume, we again don't know what's been done
already.
If the export.log is updated after deleting any files from the
remote that are not the same in the new treeish as in the old treeish,
and as long as TRANSFEREXPORT STORE is atomic, then when resuming we can
trust CHECKPRESENTEXPORT to only find files that have the correct content
for the current treeish. (Unless a conflicting export was made from
elsewhere, but in that case, the conflict resolution will have to fix up
later.)
## handling renames efficiently
To handle two files that swap names, a temp name is required.
Difficulty with a temp name is picking a name that won't ever be used by
any exported file.
Interrupted exports also complicate this. While a name could be picked that
is in neither the old nor the new tree, an export could be interrupted,
leaving the file at the temp name. There needs to be something to clean
that up when the export is resumed, even if it's resumed with a different
tree.
Could use something like ".git-annex-tmp-content-$key" as the temp name.
This hides it from casual view, which is good, and it's not depedent on the
tree, so no state needs to be maintained to clean it up. Also, using the
key in the name simplifies calculation of complicated renames (eg, renaming
A to B, B to C, C to A)
Export can first try to rename all files that are deleted/modified
to their key's temp name (falling back to deleting since not all
special remotes support rename), and then, in a second pass, rename
from the temp name to the new name. Followed by deleting the temp name
of all keys whose files are deleted in the diff. That is more renames and
deletes than strictly necessary, but it will statelessly clean up
an interruped export as long as it's run again with the same new tree.
But, an export of tree B should clean up after
an interrupted export of tree A. Some state is needed to handle this.
Before starting the export of tree A, record it somewhere. Then when
resuming, diff A..B, and delete the temp names of the keys in the
diff. (Can't rename here, because we don't know what was the content
of a file when an export was interrupted.)
So, before an export does anything, need to record the tree that's about
to be exported to export.log, not as an exported tree, but as a goal.
Then on resume, the temp files for that can be cleaned up.
## renames and export conflicts
What is there's an export conflict going on at the same time that a file
in the export gets renamed?
Suppose that there are two git repos A and B, each exporting to the same
remote. A and B are not currently communicating. A exports T1 which
contains F. B exports T2, which has a different content for F.
Then A exports T3, which renames F to G. If that rename is done
on the remote, then A will think it's successfully exported T3,
but G will have F's content from T2, not from T1.
When A and B reconnect, the export conflict will be detected.
To resolve the export conflict, it says above to:
> To resolve, diff each exported tree in turn against the resolving tree
> and delete all files that differ. Then, upload all missing files.
Assume that the resolving tree is T3. So B's export of T2 is diffed against
T3. F differs and is deleted (no change). G differs and is deleted,
which fixes up the problem that the wrong content was renamed to G.
G is missing so gets uploaded.
So, this works, as long as "delete all files that differ" means it
deletes both old and new files. And as long as conflict resolution does not
itself stash away files in the temp name for later renaming.

View file

@ -43,7 +43,8 @@ the version of the protocol it is using.
Once it knows the version, git-annex will generally
send a message telling the special remote to start up.
(Or it might send a INITREMOTE, so don't hardcode this order.)
(Or it might send an INITREMOTE or EXPORTSUPPORTED,
so don't hardcode this order.)
PREPARE
@ -102,7 +103,7 @@ The following requests *must* all be supported by the special remote.
So any one-time setup tasks should be done idempotently.
* `PREPARE`
Tells the remote that it's time to prepare itself to be used.
Only INITREMOTE can come before this.
Only INITREMOTE or EXPORTSUPPORTED can come before this.
* `TRANSFER STORE|RETRIEVE Key File`
Requests the transfer of a key. For STORE, the File is the file to upload;
for RETRIEVE the File is where to store the download.
@ -143,6 +144,46 @@ replying with `UNSUPPORTED-REQUEST` is acceptable.
network access.
This is not needed when `SETURIPRESENT` is used, since such uris are
automatically displayed by `git annex whereis`.
* `EXPORTSUPPORTED`
Used to check if a special remote supports exports. The remote
responds with either `EXPORTSUPPORTED-SUCCESS` or
`EXPORTSUPPORTED-FAILURE`. Note that this request may be made before
or after `PREPARE`.
* `EXPORT Name`
Comes immediately before each of the following export-related requests,
specifying the name of the exported file. It will be in the form
of a relative path, and may contain path separators, whitespace,
and other special characters.
* `TRANSFEREXPORT STORE|RETRIEVE Key File`
Requests the transfer of a File on local disk to or from the previously
provided Name on the special remote.
Note that it's important that, while a file is being stored,
CHECKPRESENTEXPORT not indicate it's present until all the data has
been transferred.
The remote responds with either `TRANSFER-SUCCESS` or
`TRANSFER-FAILURE`, and a remote where exports do not make sense
may always fail.
* `CHECKPRESENTEXPORT Key`
Requests the remote to check if the previously provided Name is present
in it.
The remote responds with `CHECKPRESENT-SUCCESS`, `CHECKPRESENT-FAILURE`,
or `CHECKPRESENT-UNKNOWN`.
* `REMOVEEXPORT Key`
Requests the remote to remove content stored by `TRANSFEREXPORT`
with the previously provided Name.
The remote responds with either `REMOVE-SUCCESS` or
`REMOVE-FAILURE`.
If the content was already not present in the remote, it should
respond with `REMOVE-SUCCESS`.
* `RENAMEEXPORT Key NewName`
Requests the remote rename a file stored on it from the previously
provided Name to the NewName.
The remote responds with `RENAMEEXPORT-SUCCESS` or
`RENAMEEXPORT-FAILURE`.
To support old external special remote programs that have not been updated
to support exports, git-annex will need to handle an `ERROR` response
when using any of the above.
More optional requests may be added, without changing the protocol version,
so if an unknown request is seen, reply with `UNSUPPORTED-REQUEST`.
@ -210,6 +251,15 @@ while it's handling a request.
stored in the special remote.
* `WHEREIS-FAILURE`
Indicates that no location is known for a key.
* `EXPORTSUPPORTED-SUCCESS`
Indicates that it makes sense to use this special remote as an export.
* `EXPORTSUPPORTED`
Indicates that it does not make sense to use this special remote as an
export.
* `RENAMEEXPORT-SUCCESS`
Indicates that a `RENAMEEXPORT` was done successfully.
* `RENAMEEXPORT-FAILURE`
Indicates that a `RENAMEEXPORT` failed for whatever reason.
* `UNSUPPORTED-REQUEST`
Indicates that the special remote does not know how to handle a request.

64
doc/git-annex-export.mdwn Normal file
View file

@ -0,0 +1,64 @@
# NAME
git-annex export - export content to a remote
# SYNOPSIS
git annex export `treeish --to remote`
# DESCRIPTION
Use this command to export a tree of files from a git-annex repository.
Normally files are stored on a git-annex special remote named by their
keys. That is great for reliable data storage, but your filenames are
obscured. Exporting replicates the tree to the special remote as-is.
Mixing key/value storage and exports in the same remote would be a mess and
so is not allowed. You have to configure a special remote with
`exporttree=yes` when initially setting it up with
[[git-annex-initremote]](1).
Repeated exports are done efficiently, by diffing the old and new tree,
and transferring only the changed files.
Exports can be interrupted and resumed. However, partially uploaded files
will be re-started from the beginning.
Once content has been exported to a remote, commands like `git annex get`
can download content from there the same as from other remotes. However,
since an export is not a key/value store, git-annex has to do more
verification of content downloaded from an export. Some types of keys,
that are not based on checksums, cannot be downloaded from an export.
And, git-annex will never trust an export to retain the content of a key.
# EXPORT CONFLICTS
If two different git-annex repositories are both exporting different trees
to the same special remote, it's possible for an export conflict to occur.
This leaves the special remote with some files from one tree, and some
files from the other. Files in the special remote may have entirely the
wrong content as well.
It's not possible for git-annex to detect when making an export will result
in an export conflict. The best way to avoid export conflicts is to either
only ever export to a special remote from a single repository, or to have a
rule about the tree that you export to the special remote. For example, if
you always export origin/master after pushing to origin, then an export
conflict can't happen.
An export conflict can only be detected after the two git repositories
that produced it get back in sync. Then the next time you run `git annex
export`, it will detect the export conflict, and resolve it.
# SEE ALSO
[[git-annex]](1)
[[git-annex-initremote]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -96,6 +96,8 @@ instead of to the annex.
[[git-annex-add]](1)
[[git-annex-export]](1)
# AUTHOR
Joey Hess <id@joeyh.name>

View file

@ -158,6 +158,12 @@ subdirectories).
See [[git-annex-importfeed]](1) for details.
* `export treeish --to remote`
Export content to a remote.
See [[git-annex-export]](1) for details.
* `undo [filename|directory] ...`
Undo last change to a file or directory.

View file

@ -185,10 +185,23 @@ content expression.
Tracks what trees have been exported to special remotes by
[[git-annex-export]](1).
Each line starts with a timestamp, then the uuid of the special remote,
followed by the sha1 of the tree that was exported to that special remote.
Each line starts with a timestamp, then the uuid of the repository
that exported to the special remote, followed by the sha1 of the tree
that was exported, and then by the uuid of the special remote.
(The exported tree is also grafted into the git-annex branch, at
There can also be subsequent sha1s, of trees that have started to be
exported but whose export is not yet complete. The sha1 of the exported
tree can be the empty tree (4b825dc642cb6eb9a060e54bf8d69288fbee4904)
in order to record the beginning of the first export.
For example:
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 4b825dc642cb6eb9a060e54bf8d69288fbee4904 26339d22-446b-11e0-9101-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55
1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55 7c7af825782b7c8706039b855c72709993542be4
1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55
(The trees are also grafted into the git-annex branch, at
`export.tree`, to prevent git from garbage collecting it. However, the head
of the git-annex branch should never contain such a grafted in tree;
the grafted tree is removed in the same commit that updates `export.log`.)

View file

@ -31,6 +31,10 @@ remote:
Do not use for new remotes. It is not safe to change the chunksize
setting of an existing remote.
* `exporttree` - Set to "yes" to make this special remote usable
by [[git-annex-export]]. It will not be usable as a general-purpose
special remote.
Setup example:
# git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none

View file

@ -14,3 +14,15 @@ Would this be able to reuse the existing `storeKey` interface, or would
there need to be a new interface in supported remotes?
--[[Joey]]
Work is in progress. Todo list:
* `git annex get --from export` works in the repo that exported to it,
but in another repo, the export db won't be populated, so it won't work.
Maybe just show a useful error message in this case?
However, exporting from one repository and then trying to update the
export from another repository also doesn't work right, because the
export database is not populated. So, seems that the export database needs
to get populated based on the export log in these cases.
* Support export to aditional special remotes (S3 etc)
* Support export to external special remotes.

View file

@ -787,6 +787,7 @@ Executable git-annex
Config.GitConfig
Creds
Crypto
Database.Export
Database.Fsck
Database.Handle
Database.Init
@ -849,6 +850,7 @@ Executable git-annex
Logs.Config
Logs.Difference
Logs.Difference.Pure
Logs.Export
Logs.FsckResults
Logs.Group
Logs.Line
@ -901,6 +903,7 @@ Executable git-annex
Remote.Helper.Chunked
Remote.Helper.Chunked.Legacy
Remote.Helper.Encryptable
Remote.Helper.Export
Remote.Helper.Git
Remote.Helper.Hooks
Remote.Helper.Http