Merge branch 'importtree'

This commit is contained in:
Joey Hess 2019-03-11 14:17:59 -04:00
commit 4d610ee98a
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
85 changed files with 2455 additions and 606 deletions

View file

@ -96,7 +96,8 @@ newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState) IO a }
MonadMask,
Fail.MonadFail,
Functor,
Applicative
Applicative,
Alternative
)
-- internal state storage

View file

@ -24,7 +24,7 @@ module Annex.Branch (
forceCommit,
getBranch,
files,
graftTreeish,
rememberTreeish,
performTransitions,
withIndex,
) where
@ -51,6 +51,7 @@ import qualified Git.Branch
import qualified Git.UnionMerge
import qualified Git.UpdateIndex
import qualified Git.Tree
import qualified Git.LsTree
import Git.LsTree (lsTreeParams)
import qualified Git.HashObject
import Annex.HashObject
@ -366,7 +367,7 @@ branchFiles = withIndex $ inRepo branchFiles'
branchFiles' :: Git.Repo -> IO [FilePath]
branchFiles' = Git.Command.pipeNullSplitZombie
(lsTreeParams fullname [Param "--name-only"])
(lsTreeParams Git.LsTree.LsTreeRecursive fullname [Param "--name-only"])
{- Populates the branch's index file with the current branch contents.
-
@ -645,16 +646,15 @@ getMergedRefs' = do
- and then removes it. This ensures that the treeish won't get garbage
- collected, and will always be available as long as the git-annex branch
- is available. -}
graftTreeish :: Git.Ref -> TopFilePath -> Annex ()
graftTreeish treeish graftpoint = lockJournal $ \jl -> do
rememberTreeish :: Git.Ref -> TopFilePath -> Annex ()
rememberTreeish treeish graftpoint = lockJournal $ \jl -> do
branchref <- getBranch
updateIndex jl branchref
Git.Tree.Tree t <- inRepo $ Git.Tree.getTree branchref
t' <- inRepo $ Git.Tree.recordTree $ Git.Tree.Tree $
Git.Tree.RecordedSubTree graftpoint treeish [] : t
origtree <- fromMaybe (giveup "unable to determine git-annex branch tree") <$>
inRepo (Git.Ref.tree branchref)
addedt <- inRepo $ Git.Tree.graftTree treeish graftpoint origtree
c <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit
"graft" [branchref] t'
origtree <- inRepo $ Git.Tree.recordTree (Git.Tree.Tree t)
"graft" [branchref] addedt
c' <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit
"graft cleanup" [c] origtree
inRepo $ Git.Branch.update' fullname c'

View file

@ -40,15 +40,15 @@ getTransitionCalculator ForgetDeadRemotes = Just dropDead
dropDead :: FilePath -> L.ByteString -> TrustMap -> FileTransition
dropDead f content trustmap = case getLogVariety f of
Just UUIDBasedLog
Just OldUUIDBasedLog
-- Don't remove the dead repo from the trust log,
-- because git remotes may still exist, and they need
-- to still know it's dead.
| f == trustLog -> PreserveFile
| otherwise -> ChangeFile $
UUIDBased.buildLog byteString $
UUIDBased.buildLogOld byteString $
dropDeadFromMapLog trustmap id $
UUIDBased.parseLog A.takeByteString content
UUIDBased.parseLogOld A.takeByteString content
Just NewUUIDBasedLog -> ChangeFile $
UUIDBased.buildLogNew byteString $
dropDeadFromMapLog trustmap id $

View file

@ -16,7 +16,6 @@ import qualified Remote
import qualified Command.Drop
import Command
import Annex.Wanted
import Annex.Export
import Config
import Annex.Content.Direct
import qualified Database.Keys

View file

@ -13,11 +13,9 @@ import Types
import Types.Key
import qualified Git
import qualified Types.Remote as Remote
import Config
import Messages
import Utility.FileSystemEncoding
import qualified Data.Map as M
import Control.Applicative
import Data.Maybe
import Prelude
@ -44,9 +42,6 @@ exportKey sha = mk <$> catKey sha
, keyChunkNum = Nothing
}
exportTree :: Remote.RemoteConfig -> Bool
exportTree c = fromMaybe False $ yesNo =<< M.lookup "exporttree" c
warnExportConflict :: Remote -> Annex ()
warnExportConflict r = toplevelWarning True $
"Export conflict detected. Different trees have been exported to " ++

339
Annex/Import.hs Normal file
View file

@ -0,0 +1,339 @@
{- git-annex import from remotes
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Import (
importTree,
ImportTreeConfig(..),
ImportCommitConfig(..),
buildImportCommit,
buildImportTrees,
downloadImport
) where
import Annex.Common
import Types.Import
import qualified Types.Remote as Remote
import Git.Types
import Git.Tree
import Git.Sha
import Git.FilePath
import qualified Git.Ref
import qualified Git.Branch
import qualified Annex
import Annex.Link
import Annex.LockFile
import Annex.Content
import Annex.Export
import Command
import Backend
import Config
import Types.Key
import Types.KeySource
import Messages.Progress
import Utility.DataUnits
import Logs.Export
import Logs.Location
import qualified Database.Export as Export
import qualified Database.ContentIdentifier as CIDDb
import qualified Logs.ContentIdentifier as CIDLog
import Control.Concurrent.STM
import qualified Data.Map.Strict as M
import qualified Data.Set as S
{- Configures how to build an import tree. -}
data ImportTreeConfig
= ImportTree
-- ^ Import the tree as-is from the remote.
| ImportSubTree TopFilePath Sha
-- ^ Import a tree from the remote and graft it into a subdirectory
-- of the existing tree whose Sha is provided, replacing anything
-- that was there before.
deriving (Show)
{- Configures how to build an import commit. -}
data ImportCommitConfig = ImportCommitConfig
{ importCommitParent :: Maybe Sha
-- ^ Commit to use as a parent of the import commit.
, importCommitMode :: Git.Branch.CommitMode
, importCommitMessage :: String
}
{- Builds a commit for an import from a special remote.
-
- When a remote provided a history of versions of files,
- builds a corresponding tree of git commits.
-
- When there are no changes to commit (ie, the imported tree is the same
- as the tree in the importCommitParent), returns Nothing.
-
- After importing from a remote, exporting the same thing back to the
- remote should be a no-op. So, the export log and database are
- updated to reflect the imported tree.
-
- This does not download any content from a remote. But since it needs the
- Key of imported files to be known, its caller will have to first download
- new files in order to generate keys for them.
-}
buildImportCommit
:: Remote
-> ImportTreeConfig
-> ImportCommitConfig
-> ImportableContents Key
-> Annex (Maybe Ref)
buildImportCommit remote importtreeconfig importcommitconfig importable =
case importCommitParent importcommitconfig of
Nothing -> go emptyTree Nothing
Just basecommit -> inRepo (Git.Ref.tree basecommit) >>= \case
Nothing -> go emptyTree Nothing
Just origtree -> go origtree (Just basecommit)
where
basetree = case importtreeconfig of
ImportTree -> emptyTree
ImportSubTree _ sha -> sha
subdir = case importtreeconfig of
ImportTree -> Nothing
ImportSubTree dir _ -> Just dir
go origtree basecommit = do
imported@(History finaltree _) <-
buildImportTrees basetree subdir importable
mkcommits origtree basecommit imported >>= \case
Nothing -> return Nothing
Just finalcommit -> do
updatestate finaltree
return (Just finalcommit)
mkcommits origtree basecommit (History importedtree hs) = do
parents <- catMaybes <$> mapM (mkcommits origtree basecommit) hs
if importedtree == origtree && null parents
then return Nothing -- no changes to commit
else do
let commitparents = if null parents
then catMaybes [basecommit]
else parents
commit <- inRepo $ Git.Branch.commitTree
(importCommitMode importcommitconfig)
(importCommitMessage importcommitconfig)
commitparents
importedtree
return (Just commit)
updatestate committedtree = do
importedtree <- case subdir of
Nothing -> pure committedtree
Just dir ->
let subtreeref = Ref $
fromRef committedtree ++ ":" ++ getTopFilePath dir
in fromMaybe emptyTree
<$> inRepo (Git.Ref.tree subtreeref)
updateexportdb importedtree
oldexport <- updateexportlog importedtree
updatelocationlog oldexport importedtree
updateexportdb importedtree = do
db <- Export.openDb (Remote.uuid remote)
Export.writeLockDbWhile db $ do
prevtree <- liftIO $ fromMaybe emptyTree
<$> Export.getExportTreeCurrent db
when (importedtree /= prevtree) $ do
Export.updateExportDb db prevtree importedtree
liftIO $ Export.recordExportTreeCurrent db importedtree
Export.closeDb db
updateexportlog importedtree = do
oldexport <- getExport (Remote.uuid remote)
recordExport (Remote.uuid remote) $ ExportChange
{ oldTreeish = exportedTreeishes oldexport
, newTreeish = importedtree
}
return oldexport
-- downloadImport takes care of updating the location log
-- for the local repo when keys are downloaded, and also updates
-- the location log for the remote for keys that are present in it.
-- That leaves updating the location log for the remote for keys
-- that have had the last copy of their content removed from it.
--
-- This must run after the export database has been updated
-- and flushed to disk, so it can query it.
updatelocationlog oldexport finaltree = do
let stillpresent db k = liftIO $ not . null
<$> Export.getExportedLocation db k
let updater db oldkey _newkey _ = case oldkey of
Just (AnnexKey k) -> unlessM (stillpresent db k) $
logChange k (Remote.uuid remote) InfoMissing
Just (GitKey _) -> noop
Nothing -> noop
db <- Export.openDb (Remote.uuid remote)
forM_ (exportedTreeishes oldexport) $ \oldtree ->
Export.runExportDiffUpdater updater db oldtree finaltree
Export.closeDb db
data History t = History t [History t]
deriving (Show)
{- Builds a history of git trees reflecting the ImportableContents.
-
- When a subdir is provided, imported tree is grafted into the basetree at
- that location, replacing any object that was there.
-}
buildImportTrees
:: Ref
-> Maybe TopFilePath
-> ImportableContents Key
-> Annex (History Sha)
buildImportTrees basetree msubdir importable = History
<$> (go (importableContents importable) =<< Annex.gitRepo)
<*> mapM (buildImportTrees basetree msubdir) (importableHistory importable)
where
go ls repo = withMkTreeHandle repo $ \hdl -> do
importtree <- liftIO . recordTree' hdl
. treeItemsToTree
=<< mapM mktreeitem ls
case msubdir of
Nothing -> return importtree
Just subdir -> liftIO $
graftTree' importtree subdir basetree repo hdl
mktreeitem (loc, k) = do
let lf = fromImportLocation loc
let treepath = asTopFilePath lf
let topf = asTopFilePath $
maybe lf (\sd -> getTopFilePath sd </> lf) msubdir
relf <- fromRepo $ fromTopFilePath topf
symlink <- calcRepo $ gitAnnexLink relf k
linksha <- hashSymlink symlink
return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha
{- Downloads all new ContentIdentifiers as needed to generate Keys.
- Supports concurrency when enabled.
-
- If any download fails, the whole thing fails, but it will resume where
- it left off.
-}
downloadImport :: Remote -> ImportTreeConfig -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (Maybe (ImportableContents Key))
downloadImport remote importtreeconfig importablecontents = do
-- This map is used to remember content identifiers that
-- were just downloaded, before they have necessarily been
-- stored in the database. This way, if the same content
-- identifier appears multiple times in the
-- importablecontents (eg when it has a history),
-- they will only be downloaded once.
cidmap <- liftIO $ newTVarIO M.empty
-- When concurrency is enabled, this set is needed to
-- avoid two threads both downloading the same content identifier.
downloading <- liftIO $ newTVarIO S.empty
withExclusiveLock gitAnnexContentIdentifierLock $
bracket CIDDb.openDb CIDDb.closeDb $ \db -> do
CIDDb.needsUpdateFromLog db
>>= maybe noop (CIDDb.updateFromLog db)
go cidmap downloading importablecontents db
where
go cidmap downloading (ImportableContents l h) db = do
jobs <- forM l $ \i ->
startdownload cidmap downloading db i
l' <- liftIO $ forM jobs $
either pure (atomically . takeTMVar)
if any isNothing l'
then return Nothing
else do
h' <- mapM (\ic -> go cidmap downloading ic db) h
if any isNothing h'
then return Nothing
else return $ Just $
ImportableContents
(catMaybes l')
(catMaybes h')
waitstart downloading cid = liftIO $ atomically $ do
s <- readTVar downloading
if S.member cid s
then retry
else writeTVar downloading $ S.insert cid s
signaldone downloading cid = liftIO $ atomically $ do
s <- readTVar downloading
writeTVar downloading $ S.delete cid s
startdownload cidmap downloading db i@(loc, (cid, _sz)) = getcidkey cidmap db cid >>= \case
(k:_) -> return $ Left $ Just (loc, k)
[] -> do
job <- liftIO $ newEmptyTMVarIO
let downloadaction = do
showStart "import" (fromImportLocation loc)
next $ tryNonAsync (download cidmap db i) >>= \case
Left e -> next $ do
warning (show e)
liftIO $ atomically $
putTMVar job Nothing
return False
Right r -> next $ do
liftIO $ atomically $
putTMVar job r
return True
commandAction $ bracket_
(waitstart downloading cid)
(signaldone downloading cid)
downloadaction
return (Right job)
download cidmap db (loc, (cid, sz)) = do
let rundownload tmpfile p =
Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (mkkey loc tmpfile) p >>= \case
Just k -> tryNonAsync (moveAnnex k tmpfile) >>= \case
Right True -> do
recordcidkey cidmap db cid k
logStatus k InfoPresent
logChange k (Remote.uuid remote) InfoPresent
return $ Just (loc, k)
_ -> return Nothing
Nothing -> return Nothing
checkDiskSpaceToGet tmpkey Nothing $
withTmp tmpkey $ \tmpfile ->
metered Nothing tmpkey (return Nothing) $
const (rundownload tmpfile)
where
ia = Remote.importActions remote
tmpkey = importKey cid sz
mkkey loc tmpfile = do
f <- fromRepo $ fromTopFilePath $ locworktreefilename loc
backend <- chooseBackend f
let ks = KeySource
{ keyFilename = f
, contentLocation = tmpfile
, inodeCache = Nothing
}
fmap fst <$> genKey ks backend
locworktreefilename loc = asTopFilePath $ case importtreeconfig of
ImportTree -> fromImportLocation loc
ImportSubTree subdir _ ->
getTopFilePath subdir </> fromImportLocation loc
getcidkey cidmap db cid = liftIO $
CIDDb.getContentIdentifierKeys db (Remote.uuid remote) cid >>= \case
[] -> atomically $
maybeToList . M.lookup cid <$> readTVar cidmap
l -> return l
recordcidkey cidmap db cid k = do
liftIO $ atomically $ modifyTVar' cidmap $
M.insert cid k
liftIO $ CIDDb.recordContentIdentifier db (Remote.uuid remote) cid k
CIDLog.recordContentIdentifier (Remote.uuid remote) cid k
{- Temporary key used for import of a ContentIdentifier while downloading
- content, before generating its real key. -}
importKey :: ContentIdentifier -> Integer -> Key
importKey (ContentIdentifier cid) size = stubKey
{ keyName = cid
, keyVariety = OtherKey "CID"
, keySize = Just size
}

View file

@ -48,6 +48,9 @@ module Annex.Locations (
gitAnnexSmudgeLock,
gitAnnexExportDbDir,
gitAnnexExportLock,
gitAnnexExportUpdateLock,
gitAnnexContentIdentifierDbDir,
gitAnnexContentIdentifierLock,
gitAnnexScheduleState,
gitAnnexTransferDir,
gitAnnexCredsDir,
@ -348,6 +351,18 @@ gitAnnexExportDbDir u r = gitAnnexExportDir u r </> "db"
gitAnnexExportLock :: UUID -> Git.Repo -> FilePath
gitAnnexExportLock u r = gitAnnexExportDbDir u r ++ ".lck"
{- Lock file for updating the export state for a special remote. -}
gitAnnexExportUpdateLock :: UUID -> Git.Repo -> FilePath
gitAnnexExportUpdateLock u r = gitAnnexExportDbDir u r ++ ".upl"
{- Directory containing database used to record remote content ids. -}
gitAnnexContentIdentifierDbDir :: Git.Repo -> FilePath
gitAnnexContentIdentifierDbDir r = gitAnnexDir r </> "cid"
{- Lock file for writing to the content id database. -}
gitAnnexContentIdentifierLock :: Git.Repo -> FilePath
gitAnnexContentIdentifierLock r = gitAnnexContentIdentifierDbDir r ++ ".lck"
{- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> FilePath

View file

@ -14,6 +14,7 @@ module Annex.LockFile (
fromLockCache,
withSharedLock,
withExclusiveLock,
takeExclusiveLock,
tryExclusiveLock,
) where
@ -77,11 +78,18 @@ withSharedLock getlockfile a = debugLocks $ do
{- Runs an action with an exclusive lock held. If the lock is already
- held, blocks until it becomes free. -}
withExclusiveLock :: (Git.Repo -> FilePath) -> Annex a -> Annex a
withExclusiveLock getlockfile a = debugLocks $ do
withExclusiveLock getlockfile a = bracket
(takeExclusiveLock getlockfile)
(liftIO . dropLock)
(const a)
{- Takes an exclusive lock, blocking until it's free. -}
takeExclusiveLock :: (Git.Repo -> FilePath) -> Annex LockHandle
takeExclusiveLock getlockfile = debugLocks $ do
lockfile <- fromRepo getlockfile
createAnnexDirectory $ takeDirectory lockfile
mode <- annexFileMode
bracket (lock mode lockfile) (liftIO . dropLock) (const a)
lock mode lockfile
where
#ifndef mingw32_HOST_OS
lock mode = noUmask mode . lockExclusive (Just mode)

View file

@ -0,0 +1,34 @@
{- git-annex remote tracking branches
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.RemoteTrackingBranch
( RemoteTrackingBranch
, mkRemoteTrackingBranch
, fromRemoteTrackingBranch
, setRemoteTrackingBranch
) where
import Annex.Common
import Git.Types
import qualified Git.Ref
import qualified Git.Branch
import qualified Types.Remote as Remote
newtype RemoteTrackingBranch = RemoteTrackingBranch
{ fromRemoteTrackingBranch :: Ref }
deriving (Show, Eq)
{- Makes a remote tracking branch corresponding to a local branch.
- Note that the local branch does not need to exist yet. -}
mkRemoteTrackingBranch :: Remote -> Branch -> RemoteTrackingBranch
mkRemoteTrackingBranch remote ref = RemoteTrackingBranch $
Git.Ref.underBase ("refs/remotes/" ++ Remote.name remote) ref
{- Set remote tracking branch to point to a commit. -}
setRemoteTrackingBranch :: RemoteTrackingBranch -> Sha -> Annex ()
setRemoteTrackingBranch tb commit =
inRepo $ Git.Branch.update' (fromRemoteTrackingBranch tb) commit

View file

@ -83,7 +83,7 @@ scanUnlockedFiles = whenM (isJust <$> inRepo Git.Branch.current) $ do
showSideAction "scanning for unlocked files"
Database.Keys.runWriter $
liftIO . Database.Keys.SQL.dropAllAssociatedFiles
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.Ref.headRef
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive Git.Ref.headRef
forM_ l $ \i ->
when (isregfile i) $
maybe noop (add i)

View file

@ -19,8 +19,8 @@ import Logs.Trust
import Utility.TimeStamp
import qualified Remote
import qualified Types.Remote as Remote
import Config
import Config.DynamicConfig
import Annex.Export
import Control.Concurrent.STM
import System.Posix.Types

View file

@ -64,7 +64,7 @@ exportToRemotes rs = do
forM rs $ \r -> do
Annex.changeState $ \st -> st { Annex.errcounter = 0 }
start <- liftIO getCurrentTime
void $ Command.Sync.seekExportContent rs
void $ Command.Sync.seekExportContent Nothing rs
=<< getCurrentBranch
-- Look at command error counter to see if the export
-- didn't work.

View file

@ -1,9 +1,26 @@
git-annex (7.20190220) UNRELEASED; urgency=medium
* New feature allows importing from special remotes, using
git annex import branch:subdir --from remote
* Directory special remote supports being configured with importree=yes,
to allow git-annex import of files from the directory. This can be
combined with exporttree=yes and git-annex export used to send changes
back to the same directory.
* Remote tracking branches are updated when importing and exporting to
special remotes, in ways analagous to how git fetch and git push do.
* export: Deprecated the --tracking option.
Instead, users can configure remote.<name>.annex-tracking-branch
themselves.
* sync --content: When remote.<name>.annex-tracking-branch is configured,
import from special remotes.
* sync, assistant: --no-push and remote.<name>.annex-push prevent exporting
trees to special remotes.
* Fix storage of metadata values containing newlines.
(Reversion introduced in version 7.20190122.)
* Sped up git-annex export in repositories with lots of keys.
* S3: Support enabling bucket versioning when built with aws-0.21.1.
* stack.yaml: Build with aws-0.21.1
* Fix cleanup of git-annex:export.log after git-annex forget --drop-dead.
-- Joey Hess <id@joeyh.name> Wed, 20 Feb 2019 14:20:59 -0400

View file

@ -10,7 +10,7 @@ Copyright: © 2012-2017 Joey Hess <id@joeyh.name>
© 2014 Sören Brunk
License: AGPL-3+
Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs
Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Import.hs Annex/RemoteTrackingBranch.hs Benchmark.hs Database/ContentIdentifier.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs
Copyright: © 2011-2019 Joey Hess <id@joeyh.name>
License: AGPL-3+

View file

@ -228,7 +228,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do
runbranchkeys bs = do
keyaction <- mkkeyaction
forM_ bs $ \b -> do
(l, cleanup) <- inRepo $ LsTree.lsTree b
(l, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive b
forM_ l $ \i -> do
let bfp = mkActionItem $ BranchFilePath b (LsTree.file i)
maybe noop (\k -> keyaction (k, bfp))

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2017 Joey Hess <id@joeyh.name>
- Copyright 2017-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -24,7 +24,7 @@ import Annex.Export
import Annex.Content
import Annex.Transfer
import Annex.CatFile
import Annex.LockFile
import Annex.RemoteTrackingBranch
import Logs.Location
import Logs.Export
import Database.Export
@ -43,6 +43,7 @@ cmd = command "export" SectionCommon
data ExportOptions = ExportOptions
{ exportTreeish :: Git.Ref
-- ^ can be a tree, a branch, a commit, or a tag
, exportRemote :: DeferredParse Remote
, exportTracking :: Bool
}
@ -58,7 +59,7 @@ optParser _ = ExportOptions
)
parsetracking = switch
( long "tracking"
<> help ("track changes to the " ++ paramTreeish)
<> help ("track changes to the " ++ paramTreeish ++ " (deprecated)")
)
-- To handle renames which swap files, the exported file is first renamed
@ -72,19 +73,42 @@ seek o = do
r <- getParsed (exportRemote o)
unlessM (isExportSupported r) $
giveup "That remote does not support exports."
-- handle deprecated option
when (exportTracking o) $
setConfig (remoteConfig r "export-tracking")
setConfig (remoteConfig r "annex-tracking-branch")
(fromRef $ exportTreeish o)
new <- fromMaybe (giveup "unknown tree") <$>
-- Dereference the tree pointed to by the branch, commit,
-- or tag.
tree <- fromMaybe (giveup "unknown tree") <$>
inRepo (Git.Ref.tree (exportTreeish o))
withExclusiveLock (gitAnnexExportLock (uuid r)) $ do
db <- openDb (uuid r)
changeExport r db new
unlessM (Annex.getState Annex.fast) $
void $ fillExport r db new
closeDb db
mtbcommitsha <- getExportCommit r (exportTreeish o)
db <- openDb (uuid r)
writeLockDbWhile db $ do
changeExport r db tree
unlessM (Annex.getState Annex.fast) $ do
void $ fillExport r db tree mtbcommitsha
closeDb db
-- | When the treeish is a branch like master or refs/heads/master
-- (but not refs/remotes/...), find the commit it points to
-- and the corresponding remote tracking branch.
--
-- The treeish may also be a subdir within a branch, like master:subdir,
-- that results in this returning the same thing it does for the master
-- branch.
getExportCommit :: Remote -> Git.Ref -> Annex (Maybe (RemoteTrackingBranch, Sha))
getExportCommit r treeish
| '/' `notElem` fromRef baseref = do
let tb = mkRemoteTrackingBranch r baseref
commitsha <- inRepo $ Git.Ref.sha $ Git.Ref.underBase refsheads baseref
return (fmap (tb, ) commitsha)
| otherwise = return Nothing
where
baseref = Ref $ takeWhile (/= ':') $ fromRef $
Git.Ref.removeBase refsheads treeish
refsheads = "refs/heads"
-- | Changes what's exported to the remote. Does not upload any new
-- files, but does delete and rename files already exported to the remote.
@ -189,26 +213,42 @@ mkDiffMap old new db = do
| sha == nullSha = return Nothing
| otherwise = Just <$> exportKey sha
-- | Upload all exported files that are not yet in the remote,
-- Returns True when files were uploaded.
fillExport :: Remote -> ExportHandle -> Git.Ref -> Annex Bool
fillExport r db new = do
(l, cleanup) <- inRepo $ Git.LsTree.lsTree new
cvar <- liftIO $ newMVar False
commandActions $ map (startExport r db cvar) l
void $ liftIO $ cleanup
liftIO $ takeMVar cvar
newtype FileUploaded = FileUploaded { fromFileUploaded :: Bool }
startExport :: Remote -> ExportHandle -> MVar Bool -> Git.LsTree.TreeItem -> CommandStart
startExport r db cvar ti = do
newtype AllFilled = AllFilled { fromAllFilled :: Bool }
-- | Upload all exported files that are not yet in the remote.
--
-- Returns True when some files were uploaded (perhaps not all of them).
--
-- Once all exported files have reached the remote, updates the
-- remote tracking branch.
fillExport :: Remote -> ExportHandle -> Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> Annex Bool
fillExport r db newtree mtbcommitsha = do
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive newtree
cvar <- liftIO $ newMVar (FileUploaded False)
allfilledvar <- liftIO $ newMVar (AllFilled True)
commandActions $ map (startExport r db cvar allfilledvar) l
void $ liftIO $ cleanup
case mtbcommitsha of
Nothing -> noop
Just (tb, commitsha) ->
whenM (liftIO $ fromAllFilled <$> takeMVar allfilledvar) $
setRemoteTrackingBranch tb commitsha
liftIO $ fromFileUploaded <$> takeMVar cvar
startExport :: Remote -> ExportHandle -> MVar FileUploaded -> MVar AllFilled -> Git.LsTree.TreeItem -> CommandStart
startExport r db cvar allfilledvar ti = do
ek <- exportKey (Git.LsTree.sha ti)
stopUnless (notrecordedpresent ek) $ do
showStart ("export " ++ name r) f
ifM (either (const False) id <$> tryNonAsync (checkPresentExport (exportActions r) (asKey ek) loc))
( next $ next $ cleanupExport r db ek loc False
, do
liftIO $ modifyMVar_ cvar (pure . const True)
next $ performExport r db ek af (Git.LsTree.sha ti) loc
liftIO $ modifyMVar_ cvar (pure . const (FileUploaded True))
next $ performExport r db ek af (Git.LsTree.sha ti) loc allfilledvar
)
where
loc = mkExportLocation f
@ -220,10 +260,10 @@ startExport r db cvar ti = do
-- will still list it, so also check location tracking.
<*> (notElem (uuid r) <$> loggedLocations (asKey ek))
performExport :: Remote -> ExportHandle -> ExportKey -> AssociatedFile -> Sha -> ExportLocation -> CommandPerform
performExport r db ek af contentsha loc = do
performExport :: Remote -> ExportHandle -> ExportKey -> AssociatedFile -> Sha -> ExportLocation -> MVar AllFilled -> CommandPerform
performExport r db ek af contentsha loc allfilledvar = do
let storer = storeExport (exportActions r)
sent <- case ek of
sent <- tryNonAsync $ case ek of
AnnexKey k -> ifM (inAnnex k)
( notifyTransfer Upload af $
-- Using noRetry here because interrupted
@ -244,9 +284,15 @@ performExport r db ek af contentsha loc = do
liftIO $ L.hPut h b
liftIO $ hClose h
storer tmp sha1k loc nullMeterUpdate
if sent
then next $ cleanupExport r db ek loc True
else stop
let failedsend = liftIO $ modifyMVar_ allfilledvar (pure . const (AllFilled False))
case sent of
Right True -> next $ cleanupExport r db ek loc True
Right False -> do
failedsend
stop
Left err -> do
failedsend
throwM err
cleanupExport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> Bool -> CommandCleanup
cleanupExport r db ek loc sent = do
@ -339,15 +385,16 @@ startMoveFromTempName r db ek f = do
f' = getTopFilePath f
performRename :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandPerform
performRename r db ek src dest = do
ifM (renameExport (exportActions r) (asKey ek) src dest)
( next $ cleanupRename r db ek src dest
-- In case the special remote does not support renaming,
-- unexport the src instead.
, do
performRename r db ek src dest =
renameExport (exportActions r) (asKey ek) src dest >>= \case
Just True -> next $ cleanupRename r db ek src dest
Just False -> do
warning "rename failed; deleting instead"
performUnexport r db [ek] src
)
fallbackdelete
-- Remote does not support renaming, so don't warn about it.
Nothing -> fallbackdelete
where
fallbackdelete = performUnexport r db [ek] src
cleanupRename :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandCleanup
cleanupRename r db ek src dest = do

View file

@ -1,10 +1,12 @@
{- git-annex command
-
- Copyright 2012-2017 Joey Hess <id@joeyh.name>
- Copyright 2012-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE ApplicativeDo #-}
module Command.Import where
import Command
@ -12,6 +14,8 @@ import qualified Git
import qualified Annex
import qualified Command.Add
import qualified Command.Reinject
import qualified Types.Remote as Remote
import qualified Git.Ref
import Utility.CopyFile
import Backend
import Types.KeySource
@ -20,29 +24,52 @@ import Annex.NumCopies
import Annex.FileMatcher
import Annex.Ingest
import Annex.InodeSentinal
import Annex.Import
import Annex.RemoteTrackingBranch
import Utility.InodeCache
import Logs.Location
import Git.FilePath
import Git.Types
import Git.Branch
import Types.Import
cmd :: Command
cmd = notBareRepo $
withGlobalOptions [jobsOption, jsonOptions, fileMatchingOptions] $
command "import" SectionCommon
"move and add files from outside git working copy"
paramPaths (seek <$$> optParser)
"import files from elsewhere into the repository"
(paramPaths ++ "|BRANCH[:SUBDIR]")
(seek <$$> optParser)
data ImportOptions
= LocalImportOptions
{ importFiles :: CmdParams
, duplicateMode :: DuplicateMode
}
| RemoteImportOptions
{ importFromRemote :: DeferredParse Remote
, importToBranch :: Branch
, importToSubDir :: Maybe FilePath
}
optParser :: CmdParamsDesc -> Parser ImportOptions
optParser desc = do
ps <- cmdParams desc
mfromremote <- optional $ parseRemoteOption <$> parseFromOption
dupmode <- fromMaybe Default <$> optional duplicateModeParser
return $ case mfromremote of
Nothing -> LocalImportOptions ps dupmode
Just r -> case ps of
[bs] ->
let (branch, subdir) = separate (== ':') bs
in RemoteImportOptions r
(Ref branch)
(if null subdir then Nothing else Just subdir)
_ -> giveup "expected BRANCH[:SUBDIR]"
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
deriving (Eq)
data ImportOptions = ImportOptions
{ importFiles :: CmdParams
, duplicateMode :: DuplicateMode
}
optParser :: CmdParamsDesc -> Parser ImportOptions
optParser desc = ImportOptions
<$> cmdParams desc
<*> (fromMaybe Default <$> optional duplicateModeParser)
duplicateModeParser :: Parser DuplicateMode
duplicateModeParser =
flag' Duplicate
@ -67,17 +94,26 @@ duplicateModeParser =
)
seek :: ImportOptions -> CommandSeek
seek o = allowConcurrentOutput $ do
seek o@(LocalImportOptions {}) = allowConcurrentOutput $ do
repopath <- liftIO . absPath =<< fromRepo Git.repoPath
inrepops <- liftIO $ filter (dirContains repopath) <$> mapM absPath (importFiles o)
unless (null inrepops) $ do
giveup $ "cannot import files from inside the working tree (use git annex add instead): " ++ unwords inrepops
largematcher <- largeFilesMatcher
(commandAction . start largematcher (duplicateMode o))
(commandAction . startLocal largematcher (duplicateMode o))
`withPathContents` importFiles o
seek o@(RemoteImportOptions {}) = allowConcurrentOutput $ do
r <- getParsed (importFromRemote o)
unlessM (Remote.isImportSupported r) $
giveup "That remote does not support imports."
subdir <- maybe
(pure Nothing)
(Just <$$> inRepo . toTopFilePath)
(importToSubDir o)
seekRemote r (importToBranch o) subdir
start :: GetFileMatcher -> DuplicateMode -> (FilePath, FilePath) -> CommandStart
start largematcher mode (srcfile, destfile) =
startLocal :: GetFileMatcher -> DuplicateMode -> (FilePath, FilePath) -> CommandStart
startLocal largematcher mode (srcfile, destfile) =
ifM (liftIO $ isRegularFile <$> getSymbolicLinkStatus srcfile)
( do
showStart "import" destfile
@ -209,3 +245,62 @@ verifyExisting key destfile (yes, no) = do
(tocheck, preverified) <- verifiableCopies key []
verifyEnoughCopiesToDrop [] key Nothing need [] preverified tocheck
(const yes) no
seekRemote :: Remote -> Branch -> Maybe TopFilePath -> CommandSeek
seekRemote remote branch msubdir = do
importtreeconfig <- case msubdir of
Nothing -> return ImportTree
Just subdir ->
let mk tree = pure $ ImportSubTree subdir tree
in fromtrackingbranch Git.Ref.tree >>= \case
Just tree -> mk tree
Nothing -> inRepo (Git.Ref.tree branch) >>= \case
Just tree -> mk tree
Nothing -> giveup $ "Unable to find base tree for branch " ++ fromRef branch
parentcommit <- fromtrackingbranch Git.Ref.sha
let importcommitconfig = ImportCommitConfig parentcommit ManualCommit importmessage
importable <- download importtreeconfig =<< listcontents
void $ includeCommandAction $
commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable
where
importmessage = "import from " ++ Remote.name remote
tb = mkRemoteTrackingBranch remote branch
fromtrackingbranch a = inRepo $ a (fromRemoteTrackingBranch tb)
listcontents = do
showStart' "list" (Just (Remote.name remote))
Remote.listImportableContents (Remote.importActions remote) >>= \case
Nothing -> do
showEndFail
giveup $ "Unable to list contents of " ++ Remote.name remote
Just importable -> do
showEndOk
return importable
download importtreeconfig importablecontents =
downloadImport remote importtreeconfig importablecontents >>= \case
Nothing -> giveup $ "Failed to import some files from " ++ Remote.name remote ++ ". Re-run command to resume import."
Just importable -> return importable
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents Key -> CommandStart
commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable = do
showStart' "update" (Just $ fromRef $ fromRemoteTrackingBranch tb)
next $ do
importcommit <- buildImportCommit remote importtreeconfig importcommitconfig importable
next $ updateremotetrackingbranch importcommit
where
-- Update the tracking branch. Done even when there
-- is nothing new to import, to make sure it exists.
updateremotetrackingbranch importcommit =
case importcommit <|> parentcommit of
Just c -> do
setRemoteTrackingBranch tb c
return True
Nothing -> do
warning $ "Nothing to import and " ++ fromRef branch ++ " does not exist."
return False

View file

@ -597,7 +597,7 @@ getDirStatInfo o dir = do
getTreeStatInfo :: InfoOptions -> Git.Ref -> Annex (Maybe StatInfo)
getTreeStatInfo o r = do
fast <- Annex.getState Annex.fast
(ls, cleanup) <- inRepo $ LsTree.lsTree r
(ls, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive r
(presentdata, referenceddata, repodata) <- go fast ls initial
ifM (liftIO cleanup)
( return $ Just $

View file

@ -1,11 +1,13 @@
{- git-annex command
-
- Copyright 2011 Joachim Breitner <mail@joachim-breitner.de>
- Copyright 2011-2017 Joey Hess <id@joeyh.name>
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE FlexibleContexts #-}
module Command.Sync (
cmd,
CurrBranch,
@ -37,6 +39,7 @@ import qualified Git.Merge
import qualified Git.Types as Git
import qualified Git.Ref
import qualified Git
import Git.FilePath
import qualified Remote.Git
import Config
import Config.GitConfig
@ -47,6 +50,7 @@ import Annex.Content
import Command.Get (getKey')
import qualified Command.Move
import qualified Command.Export
import qualified Command.Import
import Annex.Drop
import Annex.UUID
import Logs.UUID
@ -57,7 +61,6 @@ import Annex.Ssh
import Annex.BloomFilter
import Annex.UpdateInstead
import Annex.Export
import Annex.LockFile
import Annex.TaggedPush
import Annex.CurrentBranch
import qualified Database.Export as Export
@ -168,7 +171,8 @@ seek o = allowConcurrentOutput $ do
let gitremotes = filter Remote.gitSyncableRemote remotes
dataremotes <- filter (\r -> Remote.uuid r /= NoUUID)
<$> filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . Remote.gitconfig) remotes
let exportremotes = filter (exportTree . Remote.config) dataremotes
let (exportremotes, keyvalueremotes) = partition (exportTree . Remote.config) dataremotes
let importremotes = filter (importTree . Remote.config) dataremotes
if cleanupOption o
then do
@ -185,13 +189,17 @@ seek o = allowConcurrentOutput $ do
, map (withbranch . pullRemote o mergeConfig) gitremotes
, [ mergeAnnex ]
]
whenM shouldsynccontent $ do
-- Send content to any exports first, in
-- case that lets content be dropped from
-- other repositories.
exportedcontent <- withbranch $ seekExportContent exportremotes
syncedcontent <- withbranch $ seekSyncContent o dataremotes
mapM_ (withbranch . importRemote o mergeConfig) importremotes
-- Send content to any exports before other
-- repositories, in case that lets content
-- be dropped from other repositories.
exportedcontent <- withbranch $
seekExportContent (Just o) exportremotes
syncedcontent <- withbranch $
seekSyncContent o keyvalueremotes
-- Transferring content can take a while,
-- and other changes can be pushed to the
-- git-annex branch on the remotes in the
@ -221,10 +229,11 @@ mergeConfig :: [Git.Merge.MergeConfig]
mergeConfig =
[ Git.Merge.MergeNonInteractive
-- In several situations, unrelated histories should be merged
-- together. This includes pairing in the assistant, and merging
-- from a remote into a newly created direct mode repo.
-- together. This includes pairing in the assistant, merging
-- from a remote into a newly created direct mode repo,
-- and an initial merge from an import from a special remote.
-- (Once direct mode is removed, this could be changed, so only
-- the assistant uses it.)
-- the assistant and import from special remotes use it.)
, Git.Merge.MergeUnrelatedHistories
]
@ -400,6 +409,23 @@ pullRemote o mergeconfig remote branch = stopUnless (pure $ pullOption o && want
[Param "fetch", Param $ Remote.name remote]
wantpull = remoteAnnexPull (Remote.gitconfig remote)
importRemote :: SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> CommandSeek
importRemote o mergeconfig remote currbranch
| not (pullOption o) || not wantpull = noop
| otherwise = case remoteAnnexTrackingBranch (Remote.gitconfig remote) of
Nothing -> noop
Just tb -> do
let (b, s) = separate (== ':') (Git.fromRef tb)
let branch = Git.Ref b
let subdir = if null s
then Nothing
else Just (asTopFilePath s)
Command.Import.seekRemote remote branch subdir
void $ mergeRemote remote currbranch mergeconfig
(resolveMergeOverride o)
where
wantpull = remoteAnnexPull (Remote.gitconfig remote)
{- The remote probably has both a master and a synced/master branch.
- Which to merge from? Well, the master has whatever latest changes
- were committed (or pushed changes, if this is a bare remote),
@ -680,32 +706,39 @@ syncFile ebloom rs af k = onlyActionOn' k $ do
put dest = includeCommandAction $
Command.Move.toStart' dest Command.Move.RemoveNever af k (mkActionItem af)
{- When a remote has an export-tracking branch, change the export to
- follow the current content of the branch. Otherwise, transfer any files
{- When a remote has an annex-tracking-branch configuration, change the export
- to contain the current content of the branch. Otherwise, transfer any files
- that were part of an export but are not in the remote yet.
-
- Returns True if any file transfers were made.
-}
seekExportContent :: [Remote] -> CurrBranch -> Annex Bool
seekExportContent rs (currbranch, _) = or <$> forM rs go
seekExportContent :: Maybe SyncOptions -> [Remote] -> CurrBranch -> Annex Bool
seekExportContent o rs (currbranch, _) = or <$> forM rs go
where
go r = withExclusiveLock (gitAnnexExportLock (Remote.uuid r)) $ do
db <- Export.openDb (Remote.uuid r)
exported <- case remoteAnnexExportTracking (Remote.gitconfig r) of
go r
| not (maybe True pullOption o) = return False
| not (remoteAnnexPush (Remote.gitconfig r)) = return False
| otherwise = bracket
(Export.openDb (Remote.uuid r))
Export.closeDb
(\db -> Export.writeLockDbWhile db (go' r db))
go' r db = do
(exported, mtbcommitsha) <- case remoteAnnexTrackingBranch (Remote.gitconfig r) of
Nothing -> nontracking r
Just b -> do
mcur <- inRepo $ Git.Ref.tree b
case mcur of
Nothing -> nontracking r
Just cur -> do
Command.Export.changeExport r db cur
return [mkExported cur []]
Export.closeDb db `after` fillexport r db (exportedTreeishes exported)
mtree <- inRepo $ Git.Ref.tree b
mtbcommitsha <- Command.Export.getExportCommit r b
case (mtree, mtbcommitsha) of
(Just tree, Just _) -> do
Command.Export.changeExport r db tree
return ([mkExported tree []], mtbcommitsha)
_ -> nontracking r
fillexport r db (exportedTreeishes exported) mtbcommitsha
nontracking r = do
exported <- getExport (Remote.uuid r)
maybe noop (warnnontracking r exported) currbranch
return exported
return (exported, Nothing)
warnnontracking r exported currb = inRepo (Git.Ref.tree currb) >>= \case
Just currt | not (any (== currt) (exportedTreeishes exported)) ->
@ -713,15 +746,15 @@ seekExportContent rs (currbranch, _) = or <$> forM rs go
[ "Not updating export to " ++ Remote.name r
, "to reflect changes to the tree, because export"
, "tracking is not enabled. "
, "(Use git-annex export's --tracking option"
, "to enable it.)"
, "(Set " ++ gitconfig ++ " to enable it.)"
]
_ -> noop
where
gitconfig = show (remoteConfig r "tracking-branch")
fillexport _ _ [] = return False
fillexport r db (t:[]) = Command.Export.fillExport r db t
fillexport r _ _ = do
fillexport _ _ [] _ = return False
fillexport r db (t:[]) mtbcommitsha = Command.Export.fillExport r db t mtbcommitsha
fillexport r _ _ _ = do
warnExportConflict r
return False

View file

@ -24,7 +24,7 @@ import Utility.DataUnits
import Utility.CopyFile
import Types.Messages
import Types.Export
import Remote.Helper.Export
import Remote.Helper.ExportImport
import Remote.Helper.Chunked
import Git.Types

View file

@ -94,6 +94,12 @@ setRemoteIgnore r b = setConfig (remoteConfig r "ignore") (Git.Config.boolConfig
setRemoteBare :: Git.Repo -> Bool -> Annex ()
setRemoteBare r b = setConfig (remoteConfig r "bare") (Git.Config.boolConfig b)
exportTree :: Remote.RemoteConfig -> Bool
exportTree c = fromMaybe False $ yesNo =<< M.lookup "exporttree" c
importTree :: Remote.RemoteConfig -> Bool
importTree c = fromMaybe False $ yesNo =<< M.lookup "importtree" c
isBareRepo :: Annex Bool
isBareRepo = fromRepo Git.repoIsLocalBare

View file

@ -0,0 +1,154 @@
{- Sqlite database of ContentIdentifiers imported from special remotes.
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}
module Database.ContentIdentifier (
ContentIdentifierHandle,
openDb,
closeDb,
flushDbQueue,
recordContentIdentifier,
getContentIdentifiers,
getContentIdentifierKeys,
recordAnnexBranchTree,
getAnnexBranchTree,
needsUpdateFromLog,
updateFromLog,
ContentIdentifiersId,
AnnexBranchId,
) where
import Database.Types
import qualified Database.Queue as H
import Database.Init
import Annex.Locations
import Annex.Common hiding (delete)
import qualified Annex.Branch
import Types.Import
import Git.Types
import Git.Sha
import Git.FilePath
import qualified Git.Ref
import qualified Git.DiffTree as DiffTree
import Logs
import qualified Logs.ContentIdentifier as Log
import Database.Persist.Sql hiding (Key)
import Database.Persist.TH
data ContentIdentifierHandle = ContentIdentifierHandle H.DbQueue
share [mkPersist sqlSettings, mkMigrate "migrateContentIdentifier"] [persistLowerCase|
ContentIdentifiers
remote UUID
cid ContentIdentifier
key IKey
ContentIdentifiersIndexRemoteKey remote key
ContentIdentifiersIndexRemoteCID remote cid
UniqueRemoteCidKey remote cid key
-- The last git-annex branch tree sha that was used to update
-- ContentIdentifiers
AnnexBranch
tree SRef
UniqueTree tree
|]
{- Opens the database, creating it if it doesn't exist yet.
-
- Only a single process should write to the database at a time, so guard
- any writes with the gitAnnexContentIdentifierLock.
-}
openDb :: Annex ContentIdentifierHandle
openDb = do
dbdir <- fromRepo gitAnnexContentIdentifierDbDir
let db = dbdir </> "db"
unlessM (liftIO $ doesFileExist db) $ do
initDb db $ void $
runMigrationSilent migrateContentIdentifier
h <- liftIO $ H.openDbQueue H.SingleWriter db "content_identifiers"
return $ ContentIdentifierHandle h
closeDb :: ContentIdentifierHandle -> Annex ()
closeDb (ContentIdentifierHandle h) = liftIO $ H.closeDbQueue h
queueDb :: ContentIdentifierHandle -> SqlPersistM () -> IO ()
queueDb (ContentIdentifierHandle h) = H.queueDb h checkcommit
where
-- commit queue after 1000 changes
checkcommit sz _lastcommittime
| sz > 1000 = return True
| otherwise = return False
flushDbQueue :: ContentIdentifierHandle -> IO ()
flushDbQueue (ContentIdentifierHandle h) = H.flushDbQueue h
-- Be sure to also update the git-annex branch when using this.
recordContentIdentifier :: ContentIdentifierHandle -> UUID -> ContentIdentifier -> Key -> IO ()
recordContentIdentifier h u cid k = queueDb h $ do
void $ insertUnique $ ContentIdentifiers u cid (toIKey k)
getContentIdentifiers :: ContentIdentifierHandle -> UUID -> Key -> IO [ContentIdentifier]
getContentIdentifiers (ContentIdentifierHandle h) u k = H.queryDbQueue h $ do
l <- selectList
[ ContentIdentifiersKey ==. toIKey k
, ContentIdentifiersRemote ==. u
] []
return $ map (contentIdentifiersCid . entityVal) l
getContentIdentifierKeys :: ContentIdentifierHandle -> UUID -> ContentIdentifier -> IO [Key]
getContentIdentifierKeys (ContentIdentifierHandle h) u cid =
H.queryDbQueue h $ do
l <- selectList
[ ContentIdentifiersCid ==. cid
, ContentIdentifiersRemote ==. u
] []
return $ map (fromIKey . contentIdentifiersKey . entityVal) l
recordAnnexBranchTree :: ContentIdentifierHandle -> Sha -> IO ()
recordAnnexBranchTree h s = queueDb h $ do
deleteWhere ([] :: [Filter AnnexBranch])
void $ insertUnique $ AnnexBranch $ toSRef s
getAnnexBranchTree :: ContentIdentifierHandle -> IO Sha
getAnnexBranchTree (ContentIdentifierHandle h) = H.queryDbQueue h $ do
l <- selectList ([] :: [Filter AnnexBranch]) []
case l of
(s:[]) -> return $ fromSRef $ annexBranchTree $ entityVal s
_ -> return emptyTree
{- Check if the git-annex branch has been updated and the database needs
- to be updated with any new content identifiers in it. -}
needsUpdateFromLog :: ContentIdentifierHandle -> Annex (Maybe (Sha, Sha))
needsUpdateFromLog db = do
oldtree <- liftIO $ getAnnexBranchTree db
inRepo (Git.Ref.tree Annex.Branch.fullname) >>= \case
Just currtree | currtree /= oldtree ->
return $ Just (oldtree, currtree)
_ -> return Nothing
{- The database should be locked for write when calling this. -}
updateFromLog :: ContentIdentifierHandle -> (Sha, Sha) -> Annex ()
updateFromLog db (oldtree, currtree) = do
(l, cleanup) <- inRepo $
DiffTree.diffTreeRecursive oldtree currtree
mapM_ go l
void $ liftIO $ cleanup
liftIO $ do
recordAnnexBranchTree db currtree
flushDbQueue db
where
go ti = case extLogFileKey remoteContentIdentifierExt (getTopFilePath (DiffTree.file ti)) of
Nothing -> return ()
Just k -> do
l <- Log.getContentIdentifiers k
liftIO $ forM_ l $ \(u, cids) ->
forM_ cids $ \cid ->
recordContentIdentifier db u cid k

View file

@ -1,6 +1,6 @@
{- Sqlite database used for exports to special remotes.
-
- Copyright 2017 Joey Hess <id@joeyh.name>
- Copyright 2017-2019 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU GPL version 3 or higher.
-}
@ -15,6 +15,7 @@ module Database.Export (
ExportHandle,
openDb,
closeDb,
writeLockDbWhile,
flushDbQueue,
addExportedLocation,
removeExportedLocation,
@ -23,16 +24,20 @@ module Database.Export (
getExportTreeCurrent,
recordExportTreeCurrent,
getExportTree,
getExportTreeKey,
addExportTree,
removeExportTree,
updateExportTree,
updateExportTree',
updateExportTreeFromLog,
updateExportDb,
ExportedId,
ExportedDirectoryId,
ExportTreeId,
ExportTreeCurrentId,
ExportUpdateResult(..),
ExportDiffUpdater,
runExportDiffUpdater,
) where
import Database.Types
@ -44,6 +49,7 @@ import Types.Export
import Annex.Export
import qualified Logs.Export as Log
import Annex.LockFile
import Annex.LockPool
import Git.Types
import Git.Sha
import Git.FilePath
@ -167,6 +173,20 @@ getExportTree (ExportHandle h _) k = H.queryDbQueue h $ do
where
ik = toIKey k
{- Get keys that might be currently exported to a location.
-
- Note that the database does not currently have an index to make this
- fast.
-
- Note that this does not see recently queued changes.
-}
getExportTreeKey :: ExportHandle -> ExportLocation -> IO [Key]
getExportTreeKey (ExportHandle h _) el = H.queryDbQueue h $ do
map (fromIKey . exportTreeKey . entityVal)
<$> selectList [ExportTreeFile ==. ef] []
where
ef = toSFilePath (fromExportLocation el)
addExportTree :: ExportHandle -> Key -> ExportLocation -> IO ()
addExportTree h k loc = queueDb h $
void $ insertUnique $ ExportTree ik ef
@ -181,48 +201,131 @@ removeExportTree h k loc = queueDb h $
ik = toIKey k
ef = toSFilePath (fromExportLocation loc)
{- Diff from the old to the new tree and update the ExportTree table. -}
updateExportTree :: ExportHandle -> Sha -> Sha -> Annex ()
updateExportTree h old new = do
-- An action that is passed the old and new values that were exported,
-- and updates state.
type ExportDiffUpdater
= ExportHandle
-> Maybe ExportKey
-- ^ old exported key
-> Maybe ExportKey
-- ^ new exported key
-> Git.DiffTree.DiffTreeItem
-> Annex ()
mkExportDiffUpdater
:: (ExportHandle -> Key -> ExportLocation -> IO ())
-> (ExportHandle -> Key -> ExportLocation -> IO ())
-> ExportDiffUpdater
mkExportDiffUpdater removeold addnew h srcek dstek i = do
case srcek of
Nothing -> return ()
Just k -> liftIO $ removeold h (asKey k) loc
case dstek of
Nothing -> return ()
Just k -> liftIO $ addnew h (asKey k) loc
where
loc = mkExportLocation $ getTopFilePath $ Git.DiffTree.file i
runExportDiffUpdater :: ExportDiffUpdater -> ExportHandle -> Sha -> Sha -> Annex ()
runExportDiffUpdater updater h old new = do
(diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive old new
forM_ diff $ \i -> do
srcek <- getek (Git.DiffTree.srcsha i)
dstek <- getek (Git.DiffTree.dstsha i)
updateExportTree' h srcek dstek i
updater h srcek dstek i
void $ liftIO cleanup
where
getek sha
| sha == nullSha = return Nothing
| otherwise = Just <$> exportKey sha
updateExportTree' :: ExportHandle -> Maybe ExportKey -> Maybe ExportKey -> Git.DiffTree.DiffTreeItem -> Annex ()
updateExportTree' h srcek dstek i = do
case srcek of
Nothing -> return ()
Just k -> liftIO $ removeExportTree h (asKey k) loc
case dstek of
Nothing -> return ()
Just k -> liftIO $ addExportTree h (asKey k) loc
{- Diff from the old to the new tree and update the ExportTree table. -}
updateExportTree :: ExportHandle -> Sha -> Sha -> Annex ()
updateExportTree = runExportDiffUpdater updateExportTree'
updateExportTree' :: ExportDiffUpdater
updateExportTree' = mkExportDiffUpdater removeExportTree addExportTree
{- Diff from the old to the new tree and update all tables in the export
- database. Should only be used when all the files in the new tree have
- been verified to already be present in the export remote. -}
updateExportDb :: ExportHandle -> Sha -> Sha -> Annex ()
updateExportDb = runExportDiffUpdater $ mkExportDiffUpdater removeold addnew
where
loc = mkExportLocation $ getTopFilePath $ Git.DiffTree.file i
removeold h k loc = liftIO $ do
removeExportTree h k loc
removeExportedLocation h k loc
addnew h k loc = liftIO $ do
addExportTree h k loc
addExportedLocation h k loc
{- Runs an action with the database locked for write. Waits for any other
- writers to finish first. The queue is flushed at the end.
-
- This first updates the ExportTree table with any new information
- from the git-annex branch export log.
-}
writeLockDbWhile :: ExportHandle -> Annex a -> Annex a
writeLockDbWhile db@(ExportHandle _ u) a = do
updatelck <- takeExclusiveLock (gitAnnexExportUpdateLock u)
withExclusiveLock (gitAnnexExportLock u) $ do
bracket_ (setup updatelck) cleanup a
where
setup updatelck = do
void $ updateExportTreeFromLog' db
-- flush the update so it's available immediately to
-- anything waiting on the updatelck
liftIO $ flushDbQueue db
liftIO $ dropLock updatelck
cleanup = liftIO $ flushDbQueue db
data ExportUpdateResult = ExportUpdateSuccess | ExportUpdateConflict
deriving (Eq)
{- Updates the ExportTree table with information from the
- git-annex branch export log.
-
- This can safely be called whether the database is locked for write or
- not. Either way, it will block until the update is complete.
-}
updateExportTreeFromLog :: ExportHandle -> Annex ExportUpdateResult
updateExportTreeFromLog db@(ExportHandle _ u) =
withExclusiveLock (gitAnnexExportLock u) $ do
old <- liftIO $ fromMaybe emptyTree
<$> getExportTreeCurrent db
l <- Log.getExport u
case Log.exportedTreeishes l of
[] -> return ExportUpdateSuccess
(new:[])
| new /= old -> do
updateExportTree db old new
liftIO $ recordExportTreeCurrent db new
liftIO $ flushDbQueue db
return ExportUpdateSuccess
| new == old -> return ExportUpdateSuccess
_ts -> return ExportUpdateConflict
updateExportTreeFromLog db@(ExportHandle _ u) =
-- If another process or thread is performing the update,
-- this will block until it's done.
withExclusiveLock (gitAnnexExportUpdateLock u) $ do
-- If the database is locked by something else,
-- this will not run the update. But, in that case,
-- writeLockDbWhile is running, and has already
-- completed the update, so we don't need to do anything.
mr <- tryExclusiveLock (gitAnnexExportLock u) $
updateExportTreeFromLog' db
case mr of
Just r -> return r
Nothing -> do
old <- liftIO $ fromMaybe emptyTree
<$> getExportTreeCurrent db
l <- Log.getExport u
return $ case Log.exportedTreeishes l of
[] -> ExportUpdateSuccess
(new:[])
| new /= old -> ExportUpdateSuccess
| new == old -> ExportUpdateSuccess
_ts -> ExportUpdateConflict
{- The database should be locked when calling this. -}
updateExportTreeFromLog' :: ExportHandle -> Annex ExportUpdateResult
updateExportTreeFromLog' db@(ExportHandle _ u) = do
old <- liftIO $ fromMaybe emptyTree
<$> getExportTreeCurrent db
l <- Log.getExport u
case Log.exportedTreeishes l of
[] -> return ExportUpdateSuccess
(new:[])
| new /= old -> do
updateExportTree db old new
liftIO $ recordExportTreeCurrent db new
liftIO $ flushDbQueue db
return ExportUpdateSuccess
| new == old -> return ExportUpdateSuccess
_ts -> return ExportUpdateConflict

View file

@ -1,22 +1,29 @@
{- types for SQL databases
-
- Copyright 2015-2017 Joey Hess <id@joeyh.name>
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE TemplateHaskell #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Database.Types where
import Database.Persist.TH
import Database.Persist.Class hiding (Key)
import Database.Persist.Sql hiding (Key)
import Data.Maybe
import Data.Char
import qualified Data.ByteString as S
import qualified Data.Text as T
import Utility.PartialPrelude
import Key
import Utility.InodeCache
import Git.Types (Ref(..))
import Types.UUID
import Types.Import
-- A serialized Key
newtype SKey = SKey String
@ -112,3 +119,26 @@ toSRef = SRef
fromSRef :: SRef -> Ref
fromSRef (SRef r) = r
instance PersistField UUID where
toPersistValue u = toPersistValue b
where
b :: S.ByteString
b = fromUUID u
fromPersistValue v = toUUID <$> go
where
go :: Either T.Text S.ByteString
go = fromPersistValue v
instance PersistFieldSql UUID where
sqlType _ = SqlBlob
instance PersistField ContentIdentifier where
toPersistValue (ContentIdentifier b) = toPersistValue b
fromPersistValue v = ContentIdentifier <$> go
where
go :: Either T.Text S.ByteString
go = fromPersistValue v
instance PersistFieldSql ContentIdentifier where
sqlType _ = SqlBlob

View file

@ -1,6 +1,6 @@
{- git ls-tree interface
-
- Copyright 2011-2016 Joey Hess <id@joeyh.name>
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -9,6 +9,7 @@
module Git.LsTree (
TreeItem(..),
LsTreeMode(..),
lsTree,
lsTree',
lsTreeParams,
@ -34,26 +35,30 @@ data TreeItem = TreeItem
, file :: TopFilePath
} deriving Show
{- Lists the complete contents of a tree, recursing into sub-trees,
- with lazy output. -}
lsTree :: Ref -> Repo -> IO ([TreeItem], IO Bool)
data LsTreeMode = LsTreeRecursive | LsTreeNonRecursive
{- Lists the contents of a tree, with lazy output. -}
lsTree :: LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree = lsTree' []
lsTree' :: [CommandParam] -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree' ps t repo = do
(l, cleanup) <- pipeNullSplit (lsTreeParams t ps) repo
lsTree' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree' ps lsmode t repo = do
(l, cleanup) <- pipeNullSplit (lsTreeParams lsmode t ps) repo
return (map parseLsTree l, cleanup)
lsTreeParams :: Ref -> [CommandParam] -> [CommandParam]
lsTreeParams r ps =
lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam]
lsTreeParams lsmode r ps =
[ Param "ls-tree"
, Param "--full-tree"
, Param "-z"
, Param "-r"
] ++ ps ++
] ++ recursiveparams ++ ps ++
[ Param "--"
, File $ fromRef r
]
where
recursiveparams = case lsmode of
LsTreeRecursive -> [ Param "-r" ]
LsTreeNonRecursive -> []
{- Lists specified files in a tree. -}
lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem]

View file

@ -1,6 +1,6 @@
{- git ref stuff
-
- Copyright 2011-2013 Joey Hess <id@joeyh.name>
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -33,11 +33,18 @@ describe = fromRef . base
- Converts such a fully qualified ref into a base ref
- (eg: master or origin/master). -}
base :: Ref -> Ref
base = Ref . remove "refs/heads/" . remove "refs/remotes/" . fromRef
base = removeBase "refs/heads/" . removeBase "refs/remotes/"
{- Removes a directory such as "refs/heads/master" from a
- fully qualified ref. Any ref not starting with it is left as-is. -}
removeBase :: String -> Ref -> Ref
removeBase dir (Ref r)
| prefix `isPrefixOf` r = Ref (drop (length prefix) r)
| otherwise = Ref r
where
remove prefix s
| prefix `isPrefixOf` s = drop (length prefix) s
| otherwise = s
prefix = case end dir of
['/'] -> dir
_ -> dir ++ "/"
{- Given a directory such as "refs/remotes/origin", and a ref such as
- refs/heads/master, yields a version of that ref under the directory,
@ -88,9 +95,11 @@ headExists repo = do
sha :: Branch -> Repo -> IO (Maybe Sha)
sha branch repo = process <$> showref repo
where
showref = pipeReadStrict [Param "show-ref",
Param "--hash", -- get the hash
Param $ fromRef branch]
showref = pipeReadStrict
[ Param "show-ref"
, Param "--hash" -- get the hash
, Param $ fromRef branch
]
process [] = Nothing
process s = Just $ Ref $ firstLine s

View file

@ -341,7 +341,7 @@ verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool
verifyTree missing treesha r
| S.member treesha missing = return False
| otherwise = do
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams treesha []) r
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams LsTree.LsTreeRecursive treesha []) r
let objshas = map (LsTree.sha . LsTree.parseLsTree) ls
if any (`S.member` missing) objshas
then do

View file

@ -1,6 +1,6 @@
{- git trees
-
- Copyright 2016 Joey Hess <id@joeyh.name>
- Copyright 2016-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -12,8 +12,13 @@ module Git.Tree (
TreeContent(..),
getTree,
recordTree,
recordTree',
TreeItem(..),
treeItemsToTree,
adjustTree,
graftTree,
graftTree',
withMkTreeHandle,
treeMode,
) where
@ -47,15 +52,15 @@ data TreeContent
deriving (Show, Eq, Ord)
{- Gets the Tree for a Ref. -}
getTree :: Ref -> Repo -> IO Tree
getTree r repo = do
(l, cleanup) <- lsTreeWithObjects r repo
getTree :: LsTree.LsTreeMode -> Ref -> Repo -> IO Tree
getTree lstreemode r repo = do
(l, cleanup) <- lsTreeWithObjects lstreemode r repo
let !t = either (\e -> error ("ls-tree parse error:" ++ e)) id
(extractTree l)
void cleanup
return t
lsTreeWithObjects :: Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool)
lsTreeWithObjects :: LsTree.LsTreeMode -> Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool)
lsTreeWithObjects = LsTree.lsTree' [Param "-t"]
newtype MkTreeHandle = MkTreeHandle CoProcess.CoProcessHandle
@ -181,7 +186,7 @@ adjustTree
-> m Sha
adjustTree adjusttreeitem addtreeitems removefiles r repo =
withMkTreeHandle repo $ \h -> do
(l, cleanup) <- liftIO $ lsTreeWithObjects r repo
(l, cleanup) <- liftIO $ lsTreeWithObjects LsTree.LsTreeRecursive r repo
(l', _, _) <- go h False [] 1 inTopTree l
l'' <- adjustlist h 0 inTopTree (const True) l'
sha <- liftIO $ mkTree h l''
@ -229,6 +234,69 @@ adjustTree adjusttreeitem addtreeitems removefiles r repo =
removed (TreeBlob f _ _) = S.member (normalise (gitPath f)) removeset
removed _ = False
{- Grafts subtree into the basetree at the specified location, replacing
- anything that the basetree already had at that location.
-
- This is generally much more efficient than using getTree and recordTree,
- or adjustTree, since it only needs to traverse from the top of the tree
- down to the graft location. It does not buffer the whole tree in memory.
-}
graftTree
:: Sha
-> TopFilePath
-> Sha
-> Repo
-> IO Sha
graftTree subtree graftloc basetree repo =
withMkTreeHandle repo $ graftTree' subtree graftloc basetree repo
graftTree'
:: Sha
-> TopFilePath
-> Sha
-> Repo
-> MkTreeHandle
-> IO Sha
graftTree' subtree graftloc basetree repo hdl = go basetree graftdirs
where
go tsha (topmostgraphdir:restgraphdirs) = do
Tree t <- getTree LsTree.LsTreeNonRecursive tsha repo
t' <- case partition isabovegraft t of
([], _) -> do
graft <- graftin (topmostgraphdir:restgraphdirs)
return (graft:t)
-- normally there can only be one matching item
-- in the tree, but it's theoretically possible
-- for a git tree to have multiple items with the
-- same name, so process them all
(matching, rest) -> do
newshas <- forM matching $ \case
RecordedSubTree tloc tsha' _
| null restgraphdirs -> return $
RecordedSubTree tloc subtree []
| otherwise -> do
tsha'' <- go tsha' restgraphdirs
return $ RecordedSubTree tloc tsha'' []
_ -> graftin (topmostgraphdir:restgraphdirs)
return (newshas ++ rest)
mkTree hdl t'
go _ [] = return subtree
isabovegraft i = beneathSubTree i graftloc || gitPath i == gitPath graftloc
graftin t = recordSubTree hdl $ graftin' t
graftin' [] = RecordedSubTree graftloc subtree []
graftin' (d:rest)
| d == graftloc = graftin' []
| otherwise = NewSubTree d [graftin' rest]
-- For a graftloc of "foo/bar/baz", this generates
-- ["foo", "foo/bar", "foo/bar/baz"]
graftdirs = map (asTopFilePath . toInternalGitPath) $
mkpaths [] $ splitDirectories $ gitPath graftloc
mkpaths _ [] = []
mkpaths base (d:rest) = (joinPath base </> d) : mkpaths (base ++ [d]) rest
{- Assumes the list is ordered, with tree objects coming right before their
- contents. -}
extractTree :: [LsTree.TreeItem] -> Either String Tree

88
Logs.hs
View file

@ -1,6 +1,6 @@
{- git-annex log file names
-
- Copyright 2013-2018 Joey Hess <id@joeyh.name>
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -12,7 +12,7 @@ import Annex.DirHashes
{- There are several varieties of log file formats. -}
data LogVariety
= UUIDBasedLog
= OldUUIDBasedLog
| NewUUIDBasedLog
| ChunkLog Key
| PresenceLog Key
@ -24,16 +24,18 @@ data LogVariety
- of logs used by git-annex, if it's a known path. -}
getLogVariety :: FilePath -> Maybe LogVariety
getLogVariety f
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
| f `elem` topLevelOldUUIDBasedLogs = Just OldUUIDBasedLog
| f `elem` topLevelNewUUIDBasedLogs = Just NewUUIDBasedLog
| isRemoteStateLog f = Just NewUUIDBasedLog
| isChunkLog f = ChunkLog <$> chunkLogFileKey f
| isRemoteContentIdentifierLog f = Just NewUUIDBasedLog
| isChunkLog f = ChunkLog <$> extLogFileKey chunkLogExt f
| isRemoteMetaDataLog f = Just RemoteMetaDataLog
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
{- All the uuid-based logs stored in the top of the git-annex branch. -}
topLevelUUIDBasedLogs :: [FilePath]
topLevelUUIDBasedLogs =
{- All the old-format uuid-based logs stored in the top of the git-annex branch. -}
topLevelOldUUIDBasedLogs :: [FilePath]
topLevelOldUUIDBasedLogs =
[ uuidLog
, remoteLog
, trustLog
@ -44,9 +46,15 @@ topLevelUUIDBasedLogs =
, activityLog
, differenceLog
, multicastLog
, exportLog
]
{- All the new-format uuid-based logs stored in the top of the git-annex branch. -}
topLevelNewUUIDBasedLogs :: [FilePath]
topLevelNewUUIDBasedLogs =
[ exportLog
]
{- All the ways to get a key from a presence log file -}
presenceLogs :: FilePath -> [Maybe Key]
presenceLogs f =
@ -54,7 +62,7 @@ presenceLogs f =
, locationLogFileKey f
]
{- Logs that are neither UUID based nor presence logs. -}
{- Top-level logs that are neither UUID based nor presence logs. -}
otherLogs :: [FilePath]
otherLogs =
[ numcopiesLog
@ -107,16 +115,6 @@ exportLog = "export.log"
locationLogFile :: GitConfig -> Key -> String
locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log"
{- Converts a pathname into a key if it's a location log. -}
locationLogFileKey :: FilePath -> Maybe Key
locationLogFileKey path
| ["remote", "web"] `isPrefixOf` splitDirectories dir = Nothing
| ext == ".log" = fileKey base
| otherwise = Nothing
where
(dir, file) = splitFileName path
(base, ext) = splitAt (length file - 4) file
{- The filename of the url log for a given key. -}
urlLogFile :: GitConfig -> Key -> FilePath
urlLogFile config key = branchHashDir config key </> keyFile key ++ urlLogExt
@ -133,17 +131,6 @@ oldurlLogs config key =
urlLogExt :: String
urlLogExt = ".log.web"
{- Converts a url log file into a key.
- (Does not work on oldurlLogs.) -}
urlLogFileKey :: FilePath -> Maybe Key
urlLogFileKey path
| ext == urlLogExt = fileKey base
| otherwise = Nothing
where
file = takeFileName path
(base, ext) = splitAt (length file - extlen) file
extlen = length urlLogExt
{- Does not work on oldurllogs. -}
isUrlLog :: FilePath -> Bool
isUrlLog file = urlLogExt `isSuffixOf` file
@ -163,15 +150,6 @@ isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path
chunkLogFile :: GitConfig -> Key -> FilePath
chunkLogFile config key = branchHashDir config key </> keyFile key ++ chunkLogExt
chunkLogFileKey :: FilePath -> Maybe Key
chunkLogFileKey path
| ext == chunkLogExt = fileKey base
| otherwise = Nothing
where
file = takeFileName path
(base, ext) = splitAt (length file - extlen) file
extlen = length chunkLogExt
chunkLogExt :: String
chunkLogExt = ".log.cnk"
@ -197,3 +175,35 @@ remoteMetaDataLogExt = ".log.rmet"
isRemoteMetaDataLog :: FilePath -> Bool
isRemoteMetaDataLog path = remoteMetaDataLogExt `isSuffixOf` path
{- The filename of the remote content identifier log for a given key. -}
remoteContentIdentifierLogFile :: GitConfig -> Key -> FilePath
remoteContentIdentifierLogFile config key = branchHashDir config key </> keyFile key ++ remoteContentIdentifierExt
remoteContentIdentifierExt :: String
remoteContentIdentifierExt = ".log.cid"
isRemoteContentIdentifierLog :: FilePath -> Bool
isRemoteContentIdentifierLog path = remoteContentIdentifierExt `isSuffixOf` path
{- From an extension and a log filename, get the key that it's a log for. -}
extLogFileKey :: String -> FilePath -> Maybe Key
extLogFileKey expectedext path
| ext == expectedext = fileKey base
| otherwise = Nothing
where
file = takeFileName path
(base, ext) = splitAt (length file - extlen) file
extlen = length expectedext
{- Converts a url log file into a key.
- (Does not work on oldurlLogs.) -}
urlLogFileKey :: FilePath -> Maybe Key
urlLogFileKey = extLogFileKey urlLogExt
{- Converts a pathname into a key if it's a location log. -}
locationLogFileKey :: FilePath -> Maybe Key
locationLogFileKey path
-- Want only xx/yy/foo.log, not .log files in other places.
| length (splitDirectories path) /= 3 = Nothing
| otherwise = extLogFileKey ".log" path

View file

@ -29,12 +29,12 @@ recordActivity :: Activity -> UUID -> Annex ()
recordActivity act uuid = do
c <- liftIO currentVectorClock
Annex.Branch.change activityLog $
buildLog buildActivity
buildLogOld buildActivity
. changeLog c uuid (Right act)
. parseLog parseActivity
. parseLogOld parseActivity
lastActivities :: Maybe Activity -> Annex (Log Activity)
lastActivities wantact = parseLog (onlywanted =<< parseActivity)
lastActivities wantact = parseLogOld (onlywanted =<< parseActivity)
<$> Annex.Branch.get activityLog
where
onlywanted (Right a) | wanted a = pure a

47
Logs/ContentIdentifier.hs Normal file
View file

@ -0,0 +1,47 @@
{- Remote content identifier logs.
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Logs.ContentIdentifier (
module X,
recordContentIdentifier,
getContentIdentifiers,
) where
import Annex.Common
import Logs
import Logs.MapLog
import Types.Import
import qualified Annex.Branch
import Logs.ContentIdentifier.Pure as X
import qualified Annex
import qualified Data.Map as M
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.List.NonEmpty as NonEmpty
-- | Records a remote's content identifier and the key that it corresponds to.
--
-- A remote may use multiple content identifiers for the same key over time,
-- so ones that were recorded before are preserved.
recordContentIdentifier :: UUID -> ContentIdentifier -> Key -> Annex ()
recordContentIdentifier u cid k = do
c <- liftIO currentVectorClock
config <- Annex.getGitConfig
Annex.Branch.change (remoteContentIdentifierLogFile config k) $
buildLog . addcid c . parseLog
where
addcid c l = changeMapLog c u (cid :| contentIdentifierList (M.lookup u m)) l
where
m = simpleMap l
-- | Get all known content identifiers for a key.
getContentIdentifiers :: Key -> Annex [(UUID, [ContentIdentifier])]
getContentIdentifiers k = do
config <- Annex.getGitConfig
map (\(u, l) -> (u, NonEmpty.toList l) )
. M.toList . simpleMap . parseLog
<$> Annex.Branch.get (remoteContentIdentifierLogFile config k)

View file

@ -0,0 +1,75 @@
{- Remote content identifier logs, pure operations.
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Logs.ContentIdentifier.Pure where
import Annex.Common
import Logs.UUIDBased
import Types.Import
import Utility.Base64
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.List.NonEmpty
-- A ContentIdentifier can contain "", so to avoid ambiguity
-- in parsing, the list of them in the log must be non-empty.
type ContentIdentifierLog = Log (NonEmpty ContentIdentifier)
contentIdentifierList :: Maybe (NonEmpty ContentIdentifier) -> [ContentIdentifier]
contentIdentifierList (Just l) = Data.List.NonEmpty.toList l
contentIdentifierList Nothing = []
buildLog :: ContentIdentifierLog -> Builder
buildLog = buildLogNew buildContentIdentifierList
buildContentIdentifierList :: (NonEmpty ContentIdentifier) -> Builder
buildContentIdentifierList l = case l of
c :| [] -> buildcid c
(c :| cs) -> go (c:cs)
where
buildcid (ContentIdentifier c)
| S8.any (`elem` [':', '\r', '\n']) c || "!" `S8.isPrefixOf` c =
charUtf8 '!' <> byteString (toB64' c)
| otherwise = byteString c
go [] = mempty
go (c:[]) = buildcid c
go (c:cs) = buildcid c <> charUtf8 ':' <> go cs
parseLog :: L.ByteString -> ContentIdentifierLog
parseLog = parseLogNew parseContentIdentifierList
parseContentIdentifierList :: A.Parser (NonEmpty ContentIdentifier)
parseContentIdentifierList = do
first <- cidparser
listparser first []
where
cidparser = do
b <- A8.takeWhile (/= ':')
return $ if "!" `S8.isPrefixOf` b
then ContentIdentifier $ fromMaybe b (fromB64Maybe' (S.drop 1 b))
else ContentIdentifier b
listparser first rest = ifM A8.atEnd
( return (first :| reverse rest)
, do
_ <- A8.char ':'
cid <- cidparser
listparser first (cid:rest)
)
prop_parse_build_contentidentifier_log :: NonEmpty ContentIdentifier -> Bool
prop_parse_build_contentidentifier_log l =
let v = A.parseOnly parseContentIdentifierList $ L.toStrict $
toLazyByteString $ buildContentIdentifierList l
in v == Right l

View file

@ -27,9 +27,9 @@ recordDifferences :: Differences -> UUID -> Annex ()
recordDifferences ds@(Differences {}) uuid = do
c <- liftIO currentVectorClock
Annex.Branch.change differenceLog $
buildLog byteString
buildLogOld byteString
. changeLog c uuid (encodeBS $ showDifferences ds)
. parseLog A.takeByteString
. parseLogOld A.takeByteString
recordDifferences UnknownDifferences _ = return ()
-- Map of UUIDs that have Differences recorded.

View file

@ -20,7 +20,7 @@ import Logs.UUIDBased
parseDifferencesLog :: L.ByteString -> (M.Map UUID Differences)
parseDifferencesLog = simpleMap
. parseLog (readDifferences . decodeBS <$> A.takeByteString)
. parseLogOld (readDifferences . decodeBS <$> A.takeByteString)
-- The sum of all recorded differences, across all UUIDs.
allDifferences :: M.Map UUID Differences -> Differences

View file

@ -124,7 +124,7 @@ recordExportBeginning remoteuuid newtree = do
buildExportLog
. changeMapLog c ep new
. parseExportLog
Annex.Branch.graftTreeish newtree (asTopFilePath "export.tree")
Annex.Branch.rememberTreeish newtree (asTopFilePath "export.tree")
parseExportLog :: L.ByteString -> MapLog ExportParticipants Exported
parseExportLog = parseMapLog exportParticipantsParser exportedParser

View file

@ -40,7 +40,7 @@ groupChange uuid@(UUID _) modifier = do
curr <- lookupGroups uuid
c <- liftIO currentVectorClock
Annex.Branch.change groupLog $
buildLog buildGroup . changeLog c uuid (modifier curr) . parseLog parseGroup
buildLogOld buildGroup . changeLog c uuid (modifier curr) . parseLogOld parseGroup
-- The changed group invalidates the preferred content cache.
Annex.changeState $ \s -> s
@ -76,7 +76,8 @@ groupMap = maybe groupMapLoad return =<< Annex.getState Annex.groupmap
{- Loads the map, updating the cache. -}
groupMapLoad :: Annex GroupMap
groupMapLoad = do
m <- makeGroupMap . simpleMap . parseLog parseGroup <$> Annex.Branch.get groupLog
m <- makeGroupMap . simpleMap . parseLogOld parseGroup
<$> Annex.Branch.get groupLog
Annex.changeState $ \s -> s { Annex.groupmap = Just m }
return m

View file

@ -20,6 +20,7 @@ module Logs.MapLog (
import Common
import Annex.VectorClock
import Logs.Line
import Utility.QuickCheck
import qualified Data.ByteString.Lazy as L
import qualified Data.Map.Strict as M
@ -32,6 +33,9 @@ data LogEntry v = LogEntry
, value :: v
} deriving (Eq, Show)
instance Arbitrary v => Arbitrary (LogEntry v) where
arbitrary = LogEntry <$> arbitrary <*> arbitrary
type MapLog f v = M.Map f (LogEntry v)
buildMapLog :: (f -> Builder) -> (v -> Builder) -> MapLog f v -> Builder

View file

@ -27,12 +27,12 @@ recordFingerprint :: Fingerprint -> UUID -> Annex ()
recordFingerprint fp uuid = do
c <- liftIO currentVectorClock
Annex.Branch.change multicastLog $
buildLog buildFindgerPrint
buildLogOld buildFindgerPrint
. changeLog c uuid fp
. parseLog fingerprintParser
. parseLogOld fingerprintParser
knownFingerPrints :: Annex (M.Map UUID Fingerprint)
knownFingerPrints = simpleMap . parseLog fingerprintParser
knownFingerPrints = simpleMap . parseLogOld fingerprintParser
<$> Annex.Branch.get activityLog
fingerprintParser :: A.Parser Fingerprint

View file

@ -74,7 +74,7 @@ preferredRequiredMapsLoad = do
groupmap <- groupMap
configmap <- readRemoteLog
let genmap l gm = simpleMap
. parseLogWithUUID (\u -> makeMatcher groupmap configmap gm u . decodeBS <$> A.takeByteString)
. parseLogOldWithUUID (\u -> makeMatcher groupmap configmap gm u . decodeBS <$> A.takeByteString)
<$> Annex.Branch.get l
pc <- genmap preferredContentLog =<< groupPreferredContentMapRaw
rc <- genmap requiredContentLog M.empty

View file

@ -32,9 +32,9 @@ setLog :: FilePath -> UUID -> PreferredContentExpression -> Annex ()
setLog logfile uuid@(UUID _) val = do
c <- liftIO currentVectorClock
Annex.Branch.change logfile $
buildLog buildPreferredContentExpression
buildLogOld buildPreferredContentExpression
. changeLog c uuid val
. parseLog parsePreferredContentExpression
. parseLogOld parsePreferredContentExpression
Annex.changeState $ \s -> s
{ Annex.preferredcontentmap = Nothing
, Annex.requiredcontentmap = Nothing
@ -70,11 +70,11 @@ buildPreferredContentExpression :: PreferredContentExpression -> Builder
buildPreferredContentExpression = byteString . encodeBS
preferredContentMapRaw :: Annex (M.Map UUID PreferredContentExpression)
preferredContentMapRaw = simpleMap . parseLog parsePreferredContentExpression
preferredContentMapRaw = simpleMap . parseLogOld parsePreferredContentExpression
<$> Annex.Branch.get preferredContentLog
requiredContentMapRaw :: Annex (M.Map UUID PreferredContentExpression)
requiredContentMapRaw = simpleMap . parseLog parsePreferredContentExpression
requiredContentMapRaw = simpleMap . parseLogOld parsePreferredContentExpression
<$> Annex.Branch.get requiredContentLog
groupPreferredContentMapRaw :: Annex (M.Map Group PreferredContentExpression)

View file

@ -122,6 +122,5 @@ instance Arbitrary LogLine where
arbinfo = (encodeBS <$> arbitrary) `suchThat`
(\b -> C8.notElem '\n' b && C8.notElem '\r' b)
prop_parse_build_log :: [LogLine] -> Bool
prop_parse_build_log l = parseLog (toLazyByteString (buildLog l)) == l
prop_parse_build_presence_log :: [LogLine] -> Bool
prop_parse_build_presence_log l = parseLog (toLazyByteString (buildLog l)) == l

View file

@ -33,13 +33,13 @@ configSet :: UUID -> RemoteConfig -> Annex ()
configSet u cfg = do
c <- liftIO currentVectorClock
Annex.Branch.change remoteLog $
buildLog (byteString . encodeBS . showConfig)
buildLogOld (byteString . encodeBS . showConfig)
. changeLog c u cfg
. parseLog remoteConfigParser
. parseLogOld remoteConfigParser
{- Map of remotes by uuid containing key/value config maps. -}
readRemoteLog :: Annex (M.Map UUID RemoteConfig)
readRemoteLog = simpleMap . parseLog remoteConfigParser
readRemoteLog = simpleMap . parseLogOld remoteConfigParser
<$> Annex.Branch.get remoteLog
remoteConfigParser :: A.Parser RemoteConfig

View file

@ -34,15 +34,15 @@ scheduleSet :: UUID -> [ScheduledActivity] -> Annex ()
scheduleSet uuid@(UUID _) activities = do
c <- liftIO currentVectorClock
Annex.Branch.change scheduleLog $
buildLog byteString
buildLogOld byteString
. changeLog c uuid (encodeBS val)
. parseLog A.takeByteString
. parseLogOld A.takeByteString
where
val = fromScheduledActivities activities
scheduleSet NoUUID _ = error "unknown UUID; cannot modify"
scheduleMap :: Annex (M.Map UUID [ScheduledActivity])
scheduleMap = simpleMap . parseLog parser <$> Annex.Branch.get scheduleLog
scheduleMap = simpleMap . parseLogOld parser <$> Annex.Branch.get scheduleLog
where
parser = either fail pure . parseScheduledActivities . decodeBS
=<< A.takeByteString

View file

@ -24,9 +24,9 @@ trustSet :: UUID -> TrustLevel -> Annex ()
trustSet uuid@(UUID _) level = do
c <- liftIO currentVectorClock
Annex.Branch.change trustLog $
buildLog buildTrustLevel .
buildLogOld buildTrustLevel .
changeLog c uuid level .
parseLog trustLevelParser
parseLogOld trustLevelParser
Annex.changeState $ \s -> s { Annex.trustmap = Nothing }
trustSet NoUUID _ = error "unknown UUID; cannot modify"

View file

@ -19,7 +19,7 @@ import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder
calcTrustMap :: L.ByteString -> TrustMap
calcTrustMap = simpleMap . parseLog trustLevelParser
calcTrustMap = simpleMap . parseLogOld trustLevelParser
trustLevelParser :: A.Parser TrustLevel
trustLevelParser = (totrust <$> A8.anyChar <* A.endOfInput)

View file

@ -32,7 +32,7 @@ describeUUID :: UUID -> UUIDDesc -> Annex ()
describeUUID uuid desc = do
c <- liftIO currentVectorClock
Annex.Branch.change uuidLog $
buildLog buildUUIDDesc . changeLog c uuid desc . parseUUIDLog
buildLogOld buildUUIDDesc . changeLog c uuid desc . parseUUIDLog
{- The map is cached for speed. -}
uuidDescMap :: Annex UUIDDescMap
@ -53,4 +53,4 @@ uuidDescMapLoad = do
preferold = flip const
parseUUIDLog :: L.ByteString -> Log UUIDDesc
parseUUIDLog = parseLog (UUIDDesc <$> A.takeByteString)
parseUUIDLog = parseLogOld (UUIDDesc <$> A.takeByteString)

View file

@ -3,9 +3,9 @@
- This is used to store information about UUIDs in a way that can
- be union merged.
-
- A line of the log will look like: "UUID[ INFO[ timestamp=foo]]"
- The old format looks like: "UUID[ INFO[ timestamp=foo]]"
- The timestamp is last for backwards compatability reasons,
- and may not be present on old log lines.
- and may not be present on very old log lines.
-
- New uuid based logs instead use the form: "timestamp UUID INFO"
-
@ -21,10 +21,10 @@ module Logs.UUIDBased (
LogEntry(..),
VectorClock,
currentVectorClock,
parseLog,
parseLogOld,
parseLogNew,
parseLogWithUUID,
buildLog,
parseLogOldWithUUID,
buildLogOld,
buildLogNew,
changeLog,
addLog,
@ -48,8 +48,8 @@ import qualified Data.DList as D
type Log v = MapLog UUID v
buildLog :: (v -> Builder) -> Log v -> Builder
buildLog builder = mconcat . map genline . M.toList
buildLogOld :: (v -> Builder) -> Log v -> Builder
buildLogOld builder = mconcat . map genline . M.toList
where
genline (u, LogEntry c@(VectorClock {}) v) =
buildUUID u <> sp <> builder v <> sp <>
@ -59,15 +59,15 @@ buildLog builder = mconcat . map genline . M.toList
sp = charUtf8 ' '
nl = charUtf8 '\n'
parseLog :: A.Parser a -> L.ByteString -> Log a
parseLog = parseLogWithUUID . const
parseLogOld :: A.Parser a -> L.ByteString -> Log a
parseLogOld = parseLogOldWithUUID . const
parseLogWithUUID :: (UUID -> A.Parser a) -> L.ByteString -> Log a
parseLogWithUUID parser = fromMaybe M.empty . A.maybeResult
. A.parse (logParser parser)
parseLogOldWithUUID :: (UUID -> A.Parser a) -> L.ByteString -> Log a
parseLogOldWithUUID parser = fromMaybe M.empty . A.maybeResult
. A.parse (logParserOld parser)
logParser :: (UUID -> A.Parser a) -> A.Parser (Log a)
logParser parser = M.fromListWith best <$> parseLogLines go
logParserOld :: (UUID -> A.Parser a) -> A.Parser (Log a)
logParserOld parser = M.fromListWith best <$> parseLogLines go
where
go = do
u <- toUUID <$> A8.takeWhile1 (/= ' ')

View file

@ -17,7 +17,7 @@ import qualified Git
import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Export
import Remote.Helper.ExportImport
import Annex.UUID
import Utility.Metered
@ -35,6 +35,7 @@ remote = RemoteType
, generate = gen
, setup = adbSetup
, exportSupported = exportIsSupported
, importSupported = importUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -61,6 +62,7 @@ gen r u c gc = do
, removeExportDirectory = Just $ removeExportDirectoryM serial adir
, renameExport = renameExportM serial adir
}
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
@ -220,9 +222,9 @@ checkPresentExportM r serial adir _k loc = checkKey' r serial aloc
where
aloc = androidExportLocation adir loc
renameExportM :: AndroidSerial -> AndroidPath -> Key -> ExportLocation -> ExportLocation -> Annex Bool
renameExportM serial adir _k old new = liftIO $ adbShellBool serial
[Param "mv", Param "-f", File oldloc, File newloc]
renameExportM :: AndroidSerial -> AndroidPath -> Key -> ExportLocation -> ExportLocation -> Annex (Maybe Bool)
renameExportM serial adir _k old new = liftIO $ Just <$>
adbShellBool serial [Param "mv", Param "-f", File oldloc, File newloc]
where
oldloc = fromAndroidPath $ androidExportLocation adir old
newloc = fromAndroidPath $ androidExportLocation adir new

View file

@ -27,7 +27,7 @@ import Annex.Perms
import Annex.Tmp
import Annex.UUID
import qualified Annex.Url as Url
import Remote.Helper.Export
import Remote.Helper.ExportImport
import Network.URI
@ -43,6 +43,7 @@ remote = RemoteType
, generate = gen
, setup = error "not supported"
, exportSupported = exportUnsupported
, importSupported = importUnsupported
}
-- There is only one bittorrent remote, and it always exists.
@ -68,6 +69,7 @@ gen r _ c gc = do
, checkPresent = checkKey
, checkPresentCheap = False
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -25,7 +25,7 @@ import Config.Cost
import qualified Remote.Helper.Ssh as Ssh
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Export
import Remote.Helper.ExportImport
import Utility.Hash
import Utility.UserInfo
import Annex.UUID
@ -41,6 +41,7 @@ remote = RemoteType
, generate = gen
, setup = bupSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -67,6 +68,7 @@ gen r u c gc = do
, checkPresent = checkPresentDummy
, checkPresentCheap = bupLocal buprepo
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -19,7 +19,7 @@ import qualified Git
import Config
import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Export
import Remote.Helper.ExportImport
import Annex.Ssh
import Annex.UUID
import Utility.SshHost
@ -36,6 +36,7 @@ remote = RemoteType
, generate = gen
, setup = ddarSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -66,6 +67,7 @@ gen r u c gc = do
, checkPresent = checkPresentDummy
, checkPresentCheap = ddarLocal ddarrepo
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -1,6 +1,6 @@
{- A "remote" that is just a filesystem directory.
-
- Copyright 2011-2017 Joey Hess <id@joeyh.name>
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -26,12 +26,14 @@ import Config.Cost
import Config
import Utility.FileMode
import Remote.Helper.Special
import Remote.Helper.Export
import Remote.Helper.ExportImport
import Types.Import
import qualified Remote.Directory.LegacyChunked as Legacy
import Annex.Content
import Annex.UUID
import Utility.Metered
import Utility.Tmp
import Utility.InodeCache
remote :: RemoteType
remote = RemoteType
@ -40,6 +42,7 @@ remote = RemoteType
, generate = gen
, setup = directorySetup
, exportSupported = exportIsSupported
, importSupported = importIsSupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -73,6 +76,14 @@ gen r u c gc = do
, removeExportDirectory = Nothing
, renameExport = renameExportM dir
}
, importActions = ImportActions
{ listImportableContents = listImportableContentsM dir
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM dir
, storeExportWithContentIdentifier = storeExportWithContentIdentifierM dir
, removeExportWithContentIdentifier = removeExportWithContentIdentifierM dir
, removeExportDirectoryWhenEmpty = Nothing
, checkPresentExportWithContentIdentifier = checkPresentExportWithContentIdentifierM dir
}
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
@ -227,14 +238,17 @@ checkPresentM d (LegacyChunks _) k = Legacy.checkKey d locations k
checkPresentM d _ k = checkPresentGeneric d (locations d k)
checkPresentGeneric :: FilePath -> [FilePath] -> Annex Bool
checkPresentGeneric d ps = liftIO $
ifM (anyM doesFileExist ps)
( return True
, ifM (doesDirectoryExist d)
( return False
, giveup $ "directory " ++ d ++ " is not accessible"
)
checkPresentGeneric d ps = checkPresentGeneric' d $
liftIO $ anyM doesFileExist ps
checkPresentGeneric' :: FilePath -> Annex Bool -> Annex Bool
checkPresentGeneric' d check = ifM check
( return True
, ifM (liftIO $ doesDirectoryExist d)
( return False
, giveup $ "directory " ++ d ++ " is not accessible"
)
)
storeExportM :: FilePath -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool
storeExportM d src _k loc p = liftIO $ catchBoolIO $ do
@ -265,13 +279,15 @@ checkPresentExportM :: FilePath -> Key -> ExportLocation -> Annex Bool
checkPresentExportM d _k loc =
checkPresentGeneric d [exportPath d loc]
renameExportM :: FilePath -> Key -> ExportLocation -> ExportLocation -> Annex Bool
renameExportM d _k oldloc newloc = liftIO $ catchBoolIO $ do
createDirectoryIfMissing True (takeDirectory dest)
renameFile src dest
removeExportLocation d oldloc
return True
renameExportM :: FilePath -> Key -> ExportLocation -> ExportLocation -> Annex (Maybe Bool)
renameExportM d _k oldloc newloc = liftIO $ Just <$> go
where
go = catchBoolIO $ do
createDirectoryIfMissing True (takeDirectory dest)
renameFile src dest
removeExportLocation d oldloc
return True
src = exportPath d oldloc
dest = exportPath d newloc
@ -288,3 +304,157 @@ removeExportLocation topdir loc =
go Nothing _ = return ()
go (Just loc') _ = go (upFrom loc')
=<< tryIO (removeDirectory $ exportPath topdir (mkExportLocation loc'))
listImportableContentsM :: FilePath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsM dir = catchMaybeIO $ liftIO $ do
l <- dirContentsRecursive dir
l' <- mapM go l
return $ ImportableContents (catMaybes l') []
where
go f = do
st <- getFileStatus f
mkContentIdentifier f st >>= \case
Nothing -> return Nothing
Just cid -> do
relf <- relPathDirToFile dir f
sz <- getFileSize' f st
return $ Just (mkImportLocation relf, (cid, sz))
-- Make a ContentIdentifier that contains an InodeCache.
--
-- The InodeCache is generated without checking a sentinal file.
-- So in a case when a remount etc causes all the inodes to change,
-- files may appear to be modified when they are not, which will only
-- result in extra work to re-import them.
--
-- If the file is not a regular file, this will return Nothing.
mkContentIdentifier :: FilePath -> FileStatus -> IO (Maybe ContentIdentifier)
mkContentIdentifier f st =
fmap (ContentIdentifier . encodeBS . showInodeCache)
<$> toInodeCache noTSDelta f st
retrieveExportWithContentIdentifierM :: FilePath -> ExportLocation -> ContentIdentifier -> FilePath -> Annex (Maybe Key) -> MeterUpdate -> Annex (Maybe Key)
retrieveExportWithContentIdentifierM dir loc cid dest mkkey p =
catchDefaultIO Nothing $ precheck $ docopy postcheck
where
f = exportPath dir loc
docopy cont = do
#ifndef mingw32_HOST_OS
-- Need a duplicate fd for the post check, since
-- hGetContentsMetered closes its handle.
fd <- liftIO $ openFd f ReadOnly Nothing defaultFileFlags
dupfd <- liftIO $ dup fd
h <- liftIO $ fdToHandle fd
#else
h <- liftIO $ openBinaryFile f ReadMode
#endif
liftIO $ hGetContentsMetered h p >>= L.writeFile dest
k <- mkkey
#ifndef mingw32_HOST_OS
cont dupfd (return k)
#else
cont (return k)
#endif
-- Check before copy, to avoid expensive copy of wrong file
-- content.
precheck cont = comparecid cont
=<< liftIO . mkContentIdentifier f
=<< liftIO (getFileStatus f)
-- Check after copy, in case the file was changed while it was
-- being copied.
--
-- When possible (not on Windows), check the same handle
-- Check the same handle that the file was copied from.
-- Avoids some race cases where the file is modified while
-- it's copied but then gets restored to the original content
-- afterwards.
--
-- This does not guard against every possible race, but neither
-- can InodeCaches detect every possible modification to a file.
-- It's probably as good or better than git's handling of similar
-- situations with files being modified while it's updating the
-- working tree for a merge.
#ifndef mingw32_HOST_OS
postcheck fd cont = do
#else
postcheck cont = do
#endif
currcid <- liftIO $ mkContentIdentifier f
#ifndef mingw32_HOST_OS
=<< getFdStatus fd
#else
=<< getFileStatus f
#endif
comparecid cont currcid
comparecid cont currcid
| currcid == Just cid = cont
| otherwise = return Nothing
storeExportWithContentIdentifierM :: FilePath -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex (Maybe ContentIdentifier)
storeExportWithContentIdentifierM dir src _k loc overwritablecids p =
catchDefaultIO Nothing $ do
liftIO $ createDirectoryIfMissing True destdir
withTmpFileIn destdir template $ \tmpf tmph -> do
liftIO $ withMeteredFile src p (L.hPut tmph)
liftIO $ hFlush tmph
liftIO (getFileStatus tmpf) >>= liftIO . mkContentIdentifier tmpf >>= \case
Nothing -> return Nothing
Just newcid ->
checkExportContent dir loc (newcid:overwritablecids) Nothing $ const $ do
liftIO $ rename tmpf dest
return (Just newcid)
where
dest = exportPath dir loc
(destdir, base) = splitFileName dest
template = relatedTemplate (base ++ ".tmp")
removeExportWithContentIdentifierM :: FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> Annex Bool
removeExportWithContentIdentifierM dir k loc removeablecids =
checkExportContent dir loc removeablecids False $ \case
DoesNotExist -> return True
KnownContentIdentifier -> removeExportM dir k loc
checkPresentExportWithContentIdentifierM :: FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> Annex Bool
checkPresentExportWithContentIdentifierM dir _k loc knowncids =
checkPresentGeneric' dir $
checkExportContent dir loc knowncids False $ \case
DoesNotExist -> return False
KnownContentIdentifier -> return True
data CheckResult = DoesNotExist | KnownContentIdentifier
-- Checks if the content at an ExportLocation is in the knowncids,
-- and only runs the callback that modifies it if it's safe to do so.
--
-- This should avoid races to the extent possible. However,
-- if something has the file open for write, it could write to the handle
-- after the callback has overwritten or deleted it, and its write would
-- be lost, and we don't need to detect that.
-- (In similar situations, git doesn't either!)
--
-- It follows that if something is written to the destination file
-- shortly before, it's acceptable to run the callback anyway, as that's
-- nearly indistinguishable from the above case.
--
-- So, it suffices to check if the destination file's current
-- content is known, and immediately run the callback.
checkExportContent :: FilePath -> ExportLocation -> [ContentIdentifier] -> a -> (CheckResult -> Annex a) -> Annex a
checkExportContent dir loc knowncids unsafe callback =
tryWhenExists (liftIO $ getFileStatus dest) >>= \case
Just destst
| not (isRegularFile destst) -> return unsafe
| otherwise -> catchDefaultIO Nothing (liftIO $ mkContentIdentifier dest destst) >>= \case
Just destcid
| destcid `elem` knowncids -> callback KnownContentIdentifier
-- dest exists with other content
| otherwise -> return unsafe
-- should never happen
Nothing -> return unsafe
-- dest does not exist
Nothing -> callback DoesNotExist
where
dest = exportPath dir loc

View file

@ -19,8 +19,7 @@ import Config
import Git.Config (isTrue, boolConfig)
import Git.Env
import Remote.Helper.Special
import Remote.Helper.Export
import Annex.Export
import Remote.Helper.ExportImport
import Remote.Helper.ReadOnly
import Remote.Helper.Messages
import Utility.Metered
@ -48,6 +47,7 @@ remote = RemoteType
, generate = gen
, setup = externalSetup
, exportSupported = checkExportSupported
, importSupported = importUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -119,6 +119,7 @@ gen r u c gc
, checkPresent = checkPresentDummy
, checkPresentCheap = False
, exportActions = exportactions
, importActions = importUnsupported
, whereisKey = towhereis
, remoteFsck = Nothing
, repairRepo = Nothing
@ -306,25 +307,28 @@ removeExportDirectoryM external dir = safely $
where
req = REMOVEEXPORTDIRECTORY dir
renameExportM :: External -> Key -> ExportLocation -> ExportLocation -> Annex Bool
renameExportM external k src dest = safely $
renameExportM :: External -> Key -> ExportLocation -> ExportLocation -> Annex (Maybe Bool)
renameExportM external k src dest = safely' (Just False) $
handleRequestExport external src req k Nothing $ \resp -> case resp of
RENAMEEXPORT_SUCCESS k'
| k' == k -> result True
| k' == k -> result (Just True)
RENAMEEXPORT_FAILURE k'
| k' == k -> result False
UNSUPPORTED_REQUEST -> result False
| k' == k -> result (Just False)
UNSUPPORTED_REQUEST -> result Nothing
_ -> Nothing
where
req sk = RENAMEEXPORT sk dest
safely :: Annex Bool -> Annex Bool
safely a = go =<< tryNonAsync a
safely = safely' False
safely' :: a -> Annex a -> Annex a
safely' onerr a = go =<< tryNonAsync a
where
go (Right r) = return r
go (Left e) = do
toplevelWarning False (show e)
return False
return onerr
{- Sends a Request to the external remote, and waits for it to generate
- a Response. That is fed into the responsehandler, which should return

View file

@ -38,7 +38,7 @@ import Remote.Helper.Git
import Remote.Helper.Encryptable
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Export
import Remote.Helper.ExportImport
import qualified Remote.Helper.Ssh as Ssh
import Utility.Metered
import Annex.UUID
@ -61,6 +61,7 @@ remote = RemoteType
, generate = gen
, setup = gCryptSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
}
chainGen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -119,6 +120,7 @@ gen' r u c gc = do
, checkPresent = checkPresentDummy
, checkPresentCheap = repoCheap r
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -48,7 +48,7 @@ import Utility.Batch
import Utility.SimpleProtocol
import Remote.Helper.Git
import Remote.Helper.Messages
import Remote.Helper.Export
import Remote.Helper.ExportImport
import qualified Remote.Helper.Ssh as Ssh
import qualified Remote.GCrypt
import qualified Remote.P2P
@ -72,6 +72,7 @@ remote = RemoteType
, generate = gen
, setup = gitSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
}
list :: Bool -> Annex [Git.Repo]
@ -165,6 +166,7 @@ gen r u c gc
, checkPresent = inAnnex new st
, checkPresentCheap = repoCheap r
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = if Git.repoIsUrl r
then Nothing

View file

@ -18,7 +18,7 @@ import Config
import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Export
import Remote.Helper.ExportImport
import qualified Remote.Helper.AWS as AWS
import Creds
import Utility.Metered
@ -36,6 +36,7 @@ remote = RemoteType
, generate = gen
, setup = glacierSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -65,6 +66,7 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost
, checkPresent = checkPresentDummy
, checkPresentCheap = False
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -1,215 +0,0 @@
{- exports to remotes
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE FlexibleInstances #-}
module Remote.Helper.Export where
import Annex.Common
import Types.Remote
import Types.Backend
import Types.Key
import Backend
import Remote.Helper.Encryptable (isEncrypted)
import Database.Export
import Annex.Export
import Config
import Git.Types (fromRef)
import Logs.Export
import qualified Data.Map as M
import Control.Concurrent.STM
-- | Use for remotes that do not support exports.
class HasExportUnsupported a where
exportUnsupported :: a
instance HasExportUnsupported (RemoteConfig -> RemoteGitConfig -> Annex Bool) where
exportUnsupported = \_ _ -> return False
instance HasExportUnsupported (ExportActions Annex) where
exportUnsupported = ExportActions
{ storeExport = \_ _ _ _ -> do
warning "store export is unsupported"
return False
, retrieveExport = \_ _ _ _ -> return False
, checkPresentExport = \_ _ -> return False
, removeExport = \_ _ -> return False
, removeExportDirectory = Just $ \_ -> return False
, renameExport = \_ _ _ -> return False
}
exportIsSupported :: RemoteConfig -> RemoteGitConfig -> Annex Bool
exportIsSupported = \_ _ -> return True
-- | Prevent or allow exporttree=yes when setting up a new remote,
-- depending on exportSupported and other configuration.
adjustExportableRemoteType :: RemoteType -> RemoteType
adjustExportableRemoteType rt = rt { setup = setup' }
where
setup' st mu cp c gc = do
let cont = setup rt st mu cp c gc
ifM (exportSupported rt c gc)
( case st of
Init
| exportTree c && isEncrypted c ->
giveup "cannot enable both encryption and exporttree"
| otherwise -> cont
Enable oldc
| exportTree c /= exportTree oldc ->
giveup "cannot change exporttree of existing special remote"
| otherwise -> cont
, if exportTree c
then giveup "exporttree=yes is not supported by this special remote"
else cont
)
-- | If the remote is exportSupported, and exporttree=yes, adjust the
-- remote to be an export.
adjustExportable :: Remote -> Annex Remote
adjustExportable r = case M.lookup "exporttree" (config r) of
Nothing -> notexport
Just c -> case yesNo c of
Just True -> ifM (isExportSupported r)
( isexport
, notexport
)
Just False -> notexport
Nothing -> do
warning $ "bad exporttree value for " ++ name r ++ ", assuming not an export"
notexport
where
notexport = return $ r
{ exportActions = exportUnsupported
, remotetype = (remotetype r)
{ exportSupported = exportUnsupported
}
}
isexport = do
db <- openDb (uuid r)
updateflag <- liftIO $ newTVarIO Nothing
-- When multiple threads run this, all except the first
-- will block until the first runs doneupdateonce.
-- Returns True when an update should be done and False
-- when the update has already been done.
let startupdateonce = liftIO $ atomically $
readTVar updateflag >>= \case
Nothing -> do
writeTVar updateflag (Just True)
return True
Just True -> retry
Just False -> return False
let doneupdateonce = \updated ->
when updated $
liftIO $ atomically $
writeTVar updateflag (Just False)
exportinconflict <- liftIO $ newTVarIO False
-- Get export locations for a key. Checks once
-- if the export log is different than the database and
-- updates the database, to notice when an export has been
-- updated from another repository.
let getexportlocs = \k -> do
bracket startupdateonce doneupdateonce $ \updatenow ->
when updatenow $
updateExportTreeFromLog db >>= \case
ExportUpdateSuccess -> return ()
ExportUpdateConflict -> do
warnExportConflict r
liftIO $ atomically $
writeTVar exportinconflict True
liftIO $ getExportTree db k
return $ r
-- Storing a key on an export could be implemented,
-- but it would perform unncessary work
-- when another repository has already stored the
-- key, and the local repository does not know
-- about it. To avoid unnecessary costs, don't do it.
{ storeKey = \_ _ _ -> do
warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
return False
-- Keys can be retrieved using retrieveExport,
-- but since that retrieves from a path in the
-- remote that another writer could have replaced
-- with content not of the requested key,
-- the content has to be strongly verified.
--
-- appendonly remotes have a key/value store,
-- so don't need to use retrieveExport. However,
-- fall back to it if retrieveKeyFile fails.
, retrieveKeyFile = \k af dest p ->
let retrieveexport = retrieveKeyFileFromExport getexportlocs exportinconflict k af dest p
in if appendonly r
then do
ret@(ok, _v) <- retrieveKeyFile r k af dest p
if ok
then return ret
else retrieveexport
else retrieveexport
, retrieveKeyFileCheap = if appendonly r
then retrieveKeyFileCheap r
else \_ _ _ -> return False
-- Removing a key from an export would need to
-- change the tree in the export log to not include
-- the file. Otherwise, conflicts when removing
-- files would not be dealt with correctly.
-- There does not seem to be a good use case for
-- removing a key from an export in any case.
, removeKey = \_k -> do
warning "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
return False
-- Can't lock content on exports, since they're
-- not key/value stores, and someone else could
-- change what's exported to a file at any time.
--
-- (except for appendonly remotes)
, lockContent = if appendonly r
then lockContent r
else Nothing
-- Check if any of the files a key was exported to
-- are present. This doesn't guarantee the export
-- contains the right content, which is why export
-- remotes are untrusted.
--
-- (but appendonly remotes work the same as any
-- non-export remote)
, checkPresent = if appendonly r
then checkPresent r
else \k -> anyM (checkPresentExport (exportActions r) k)
=<< getexportlocs k
-- checkPresent from an export is more expensive
-- than otherwise, so not cheap. Also, this
-- avoids things that look at checkPresentCheap and
-- silently skip non-present files from behaving
-- in confusing ways when there's an export
-- conflict.
, checkPresentCheap = False
, mkUnavailable = return Nothing
, getInfo = do
ts <- map fromRef . exportedTreeishes
<$> getExport (uuid r)
is <- getInfo r
return (is++[("export", "yes"), ("exportedtree", unwords ts)])
}
retrieveKeyFileFromExport getexportlocs exportinconflict k _af dest p = unVerified $
if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k))
then do
locs <- getexportlocs k
case locs of
[] -> do
ifM (liftIO $ atomically $ readTVar exportinconflict)
( warning "unknown export location, likely due to the export conflict"
, warning "unknown export location"
)
return False
(l:_) -> retrieveExport (exportActions r) k l dest p
else do
warning $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (keyVariety k)) ++ " backend"
return False

View file

@ -0,0 +1,335 @@
{- Helper to make remotes support export and import (or not).
-
- Copyright 2017-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE FlexibleInstances, FlexibleContexts #-}
module Remote.Helper.ExportImport where
import Annex.Common
import Types.Remote
import Types.Backend
import Types.Key
import Backend
import Remote.Helper.Encryptable (isEncrypted)
import qualified Database.Export as Export
import qualified Database.ContentIdentifier as ContentIdentifier
import Annex.Export
import Annex.LockFile
import Config
import Git.Types (fromRef)
import Logs.Export
import Logs.ContentIdentifier (recordContentIdentifier)
import qualified Data.Map as M
import Control.Concurrent.STM
-- | Use for remotes that do not support exports.
class HasExportUnsupported a where
exportUnsupported :: a
instance HasExportUnsupported (RemoteConfig -> RemoteGitConfig -> Annex Bool) where
exportUnsupported = \_ _ -> return False
instance HasExportUnsupported (ExportActions Annex) where
exportUnsupported = ExportActions
{ storeExport = \_ _ _ _ -> do
warning "store export is unsupported"
return False
, retrieveExport = \_ _ _ _ -> return False
, checkPresentExport = \_ _ -> return False
, removeExport = \_ _ -> return False
, removeExportDirectory = Just $ \_ -> return False
, renameExport = \_ _ _ -> return Nothing
}
-- | Use for remotes that do not support imports.
class HasImportUnsupported a where
importUnsupported :: a
instance HasImportUnsupported (RemoteConfig -> RemoteGitConfig -> Annex Bool) where
importUnsupported = \_ _ -> return False
instance HasImportUnsupported (ImportActions Annex) where
importUnsupported = ImportActions
{ listImportableContents = return Nothing
, retrieveExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
, storeExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
, removeExportWithContentIdentifier = \_ _ _ -> return False
, removeExportDirectoryWhenEmpty = Just $ \_ -> return False
, checkPresentExportWithContentIdentifier = \_ _ _ -> return False
}
exportIsSupported :: RemoteConfig -> RemoteGitConfig -> Annex Bool
exportIsSupported = \_ _ -> return True
importIsSupported :: RemoteConfig -> RemoteGitConfig -> Annex Bool
importIsSupported = \_ _ -> return True
-- | Prevent or allow exporttree=yes and importtree=yes when
-- setting up a new remote, depending on exportSupported and importSupported.
adjustExportImportRemoteType :: RemoteType -> RemoteType
adjustExportImportRemoteType rt = rt { setup = setup' }
where
setup' st mu cp c gc =
let checkconfig supported configured setting cont =
ifM (supported rt c gc)
( case st of
Init
| configured c && isEncrypted c ->
giveup $ "cannot enable both encryption and " ++ setting
| otherwise -> cont
Enable oldc
| configured c /= configured oldc ->
giveup $ "cannot change " ++ setting ++ " of existing special remote"
| otherwise -> cont
, if configured c
then giveup $ setting ++ " is not supported by this special remote"
else cont
)
in checkconfig exportSupported exportTree "exporttree" $
checkconfig importSupported importTree "importtree" $
if importTree c && not (exportTree c)
then giveup "cannot enable importtree=yes without also enabling exporttree=yes"
else setup rt st mu cp c gc
-- | Adjust a remote to support exporttree=yes and importree=yes.
--
-- Note that all remotes with importree=yes also have exporttree=yes.
adjustExportImport :: Remote -> Annex Remote
adjustExportImport r = case M.lookup "exporttree" (config r) of
Nothing -> return $ notexport r
Just c -> case yesNo c of
Just True -> ifM (isExportSupported r)
( do
exportdbv <- prepexportdb
r' <- isexport exportdbv
if importTree (config r)
then isimport r' exportdbv
else return r'
, return $ notexport r
)
Just False -> return $ notexport r
Nothing -> do
warning $ "bad exporttree value for " ++ name r ++ ", assuming not an export"
return $ notexport r
where
notexport r' = notimport r'
{ exportActions = exportUnsupported
, remotetype = (remotetype r')
{ exportSupported = exportUnsupported
}
}
notimport r' = r'
{ importActions = importUnsupported
, remotetype = (remotetype r')
{ importSupported = importUnsupported
}
}
isimport r' exportdbv = do
ciddbv <- prepciddb
let keycids k = do
db <- getciddb ciddbv
liftIO $ ContentIdentifier.getContentIdentifiers db (uuid r') k
let checkpresent k loc =
checkPresentExportWithContentIdentifier
(importActions r')
k loc
=<< keycids k
return $ r'
{ exportActions = (exportActions r')
{ storeExport = \f k loc p -> do
db <- getciddb ciddbv
exportdb <- getexportdb exportdbv
updateexportdb exportdb exportdbv
oldks <- liftIO $ Export.getExportTreeKey exportdb loc
oldcids <- liftIO $ concat
<$> mapM (ContentIdentifier.getContentIdentifiers db (uuid r')) oldks
storeExportWithContentIdentifier (importActions r') f k loc oldcids p >>= \case
Nothing -> return False
Just newcid -> do
withExclusiveLock gitAnnexContentIdentifierLock $ do
liftIO $ ContentIdentifier.recordContentIdentifier db (uuid r') newcid k
liftIO $ ContentIdentifier.flushDbQueue db
recordContentIdentifier (uuid r') newcid k
return True
, removeExport = \k loc ->
removeExportWithContentIdentifier (importActions r') k loc
=<< keycids k
, removeExportDirectory = removeExportDirectoryWhenEmpty (importActions r')
-- renameExport is optional, and the
-- remote's implementation may
-- lose modifications to the file
-- (by eg copying and then deleting)
-- so don't use it
, renameExport = \_ _ _ -> return Nothing
, checkPresentExport = checkpresent
}
, checkPresent = if appendonly r'
then checkPresent r'
else \k -> anyM (checkpresent k)
=<< getexportlocs exportdbv k
}
isexport dbv = return $ r
-- Storing a key on an export could be implemented,
-- but it would perform unncessary work
-- when another repository has already stored the
-- key, and the local repository does not know
-- about it. To avoid unnecessary costs, don't do it.
{ storeKey = \_ _ _ -> do
warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
return False
-- Keys can be retrieved using retrieveExport,
-- but since that retrieves from a path in the
-- remote that another writer could have replaced
-- with content not of the requested key,
-- the content has to be strongly verified.
--
-- appendonly remotes have a key/value store,
-- so don't need to use retrieveExport. However,
-- fall back to it if retrieveKeyFile fails.
, retrieveKeyFile = \k af dest p ->
let retrieveexport = retrieveKeyFileFromExport dbv k af dest p
in if appendonly r
then do
ret@(ok, _v) <- retrieveKeyFile r k af dest p
if ok
then return ret
else retrieveexport
else retrieveexport
, retrieveKeyFileCheap = if appendonly r
then retrieveKeyFileCheap r
else \_ _ _ -> return False
-- Removing a key from an export would need to
-- change the tree in the export log to not include
-- the file. Otherwise, conflicts when removing
-- files would not be dealt with correctly.
-- There does not seem to be a good use case for
-- removing a key from an export in any case.
, removeKey = \_k -> do
warning "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
return False
-- Can't lock content on exports, since they're
-- not key/value stores, and someone else could
-- change what's exported to a file at any time.
--
-- (except for appendonly remotes)
, lockContent = if appendonly r
then lockContent r
else Nothing
-- Check if any of the files a key was exported to
-- are present. This doesn't guarantee the export
-- contains the right content, which is why export
-- remotes are untrusted.
--
-- (but appendonly remotes work the same as any
-- non-export remote)
, checkPresent = if appendonly r
then checkPresent r
else \k -> anyM (checkPresentExport (exportActions r) k)
=<< getexportlocs dbv k
-- checkPresent from an export is more expensive
-- than otherwise, so not cheap. Also, this
-- avoids things that look at checkPresentCheap and
-- silently skip non-present files from behaving
-- in confusing ways when there's an export
-- conflict.
, checkPresentCheap = False
, mkUnavailable = return Nothing
, getInfo = do
ts <- map fromRef . exportedTreeishes
<$> getExport (uuid r)
is <- getInfo r
return (is++[("export", "yes"), ("exportedtree", unwords ts)])
}
prepciddb = do
lcklckv <- liftIO newEmptyTMVarIO
dbtv <- liftIO newEmptyTMVarIO
return (dbtv, lcklckv)
prepexportdb = do
lcklckv <- liftIO newEmptyTMVarIO
dbv <- liftIO newEmptyTMVarIO
exportinconflict <- liftIO $ newTVarIO False
exportupdated <- liftIO $ newTMVarIO ()
return (dbv, lcklckv, exportinconflict, exportupdated)
-- Only open the database once it's needed.
getciddb (dbtv, lcklckv) =
liftIO (atomically (tryReadTMVar dbtv)) >>= \case
Just db -> return db
-- let only one thread take the lock
Nothing -> ifM (liftIO $ atomically $ tryPutTMVar lcklckv ())
( do
db <- ContentIdentifier.openDb
ContentIdentifier.needsUpdateFromLog db >>= \case
Just v -> withExclusiveLock gitAnnexContentIdentifierLock $
ContentIdentifier.updateFromLog db v
Nothing -> noop
liftIO $ atomically $ putTMVar dbtv db
return db
-- loser waits for winner to open the db and
-- can then also use its handle
, liftIO $ atomically (readTMVar dbtv)
)
-- Only open the database once it's needed.
getexportdb (dbv, lcklckv, _, _) =
liftIO (atomically (tryReadTMVar dbv)) >>= \case
Just db -> return db
-- let only one thread take the lock
Nothing -> ifM (liftIO $ atomically $ tryPutTMVar lcklckv ())
( do
db <- Export.openDb (uuid r)
liftIO $ atomically $ putTMVar dbv db
return db
-- loser waits for winner to open the db and
-- can then also use its handle
, liftIO $ atomically (readTMVar dbv)
)
getexportinconflict (_, _, v, _) = v
-- Check once if the export log is different than the database and
-- updates the database, to notice when an export has been
-- updated from another repository.
updateexportdb db (_, _, exportinconflict, exportupdated) =
liftIO (atomically (tryTakeTMVar exportupdated)) >>= \case
Just () -> Export.updateExportTreeFromLog db >>= \case
Export.ExportUpdateSuccess -> return ()
Export.ExportUpdateConflict -> do
warnExportConflict r
liftIO $ atomically $
writeTVar exportinconflict True
Nothing -> return ()
getexportlocs dbv k = do
db <- getexportdb dbv
updateexportdb db dbv
liftIO $ Export.getExportTree db k
retrieveKeyFileFromExport dbv k _af dest p = unVerified $
if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k))
then do
locs <- getexportlocs dbv k
case locs of
[] -> do
ifM (liftIO $ atomically $ readTVar $ getexportinconflict dbv)
( warning "unknown export location, likely due to the export conflict"
, warning "unknown export location"
)
return False
(l:_) -> retrieveExport (exportActions r) k l dest p
else do
warning $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (keyVariety k)) ++ " backend"
return False

View file

@ -16,7 +16,7 @@ import Config.Cost
import Annex.UUID
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Export
import Remote.Helper.ExportImport
import Utility.Env
import Messages.Progress
@ -32,6 +32,7 @@ remote = RemoteType
, generate = gen
, setup = hookSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -57,6 +58,7 @@ gen r u c gc = do
, checkPresent = checkPresentDummy
, checkPresentCheap = False
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -18,7 +18,7 @@ import Types.Remote
import Annex.UUID
import Remote.Helper.Hooks
import Remote.Helper.ReadOnly
import Remote.Helper.Export
import Remote.Helper.ExportImport
import qualified Git
import qualified Git.Config
@ -44,7 +44,7 @@ import qualified Remote.Hook
import qualified Remote.External
remoteTypes :: [RemoteType]
remoteTypes = map adjustExportableRemoteType
remoteTypes = map adjustExportImportRemoteType
[ Remote.Git.remote
, Remote.GCrypt.remote
, Remote.P2P.remote
@ -100,13 +100,13 @@ remoteListRefresh = do
{- Generates a Remote. -}
remoteGen :: M.Map UUID RemoteConfig -> RemoteType -> Git.Repo -> Annex (Maybe Remote)
remoteGen m t r = do
u <- getRepoUUID r
gc <- Annex.getRemoteGitConfig r
remoteGen m t g = do
u <- getRepoUUID g
gc <- Annex.getRemoteGitConfig g
let c = fromMaybe M.empty $ M.lookup u m
generate t r u c gc >>= maybe
(return Nothing)
(Just <$$> adjustExportable . adjustReadOnly . addHooks)
generate t g u c gc >>= \case
Nothing -> return Nothing
Just r -> Just <$> adjustExportImport (adjustReadOnly (addHooks r))
{- Updates a local git Remote, re-reading its git config. -}
updateRemote :: Remote -> Annex (Maybe Remote)

View file

@ -23,7 +23,7 @@ import Annex.UUID
import Config
import Config.Cost
import Remote.Helper.Git
import Remote.Helper.Export
import Remote.Helper.ExportImport
import Remote.Helper.P2P
import Utility.AuthToken
@ -38,6 +38,7 @@ remote = RemoteType
, generate = \_ _ _ _ -> return Nothing
, setup = error "P2P remotes are set up using git-annex p2p"
, exportSupported = exportUnsupported
, importSupported = importUnsupported
}
chainGen :: P2PAddress -> Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -59,6 +60,7 @@ chainGen addr r u c gc = do
, checkPresent = checkpresent protorunner
, checkPresentCheap = False
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -28,7 +28,7 @@ import Annex.UUID
import Annex.Ssh
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Export
import Remote.Helper.ExportImport
import Types.Export
import Remote.Rsync.RsyncUrl
import Crypto
@ -51,6 +51,7 @@ remote = RemoteType
, generate = gen
, setup = rsyncSetup
, exportSupported = exportIsSupported
, importSupported = importUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -85,6 +86,7 @@ gen r u c gc = do
, removeExportDirectory = Just (removeExportDirectoryM o)
, renameExport = renameExportM o
}
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
@ -291,8 +293,8 @@ removeExportDirectoryM o ed = removeGeneric o (allbelow d : includes d)
Nothing -> []
Just f' -> includes f'
renameExportM :: RsyncOpts -> Key -> ExportLocation -> ExportLocation -> Annex Bool
renameExportM _ _ _ _ = return False
renameExportM :: RsyncOpts -> Key -> ExportLocation -> ExportLocation -> Annex (Maybe Bool)
renameExportM _ _ _ _ = return Nothing
{- Rsync params to enable resumes of sending files safely,
- ensure that files are only moved into place once complete

View file

@ -39,14 +39,13 @@ import Control.Concurrent.STM.TVar
import Annex.Common
import Types.Remote
import Types.Export
import Annex.Export
import qualified Git
import Config
import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Http
import Remote.Helper.Messages
import Remote.Helper.Export
import Remote.Helper.ExportImport
import qualified Remote.Helper.AWS as AWS
import Creds
import Annex.UUID
@ -72,6 +71,7 @@ remote = RemoteType
, generate = gen
, setup = s3Setup
, exportSupported = exportIsSupported
, importSupported = importUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -112,6 +112,7 @@ gen r u c gc = do
, removeExportDirectory = Nothing
, renameExport = renameExportS3 hdl this info
}
, importActions = importUnsupported
, whereisKey = Just (getPublicWebUrls u info c)
, remoteFsck = Nothing
, repairRepo = Nothing
@ -397,15 +398,17 @@ checkPresentExportS3 hv r info k loc = withS3Handle hv $ \case
giveup "No S3 credentials configured"
-- S3 has no move primitive; copy and delete.
renameExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> ExportLocation -> Annex Bool
renameExportS3 hv r info k src dest = withS3Handle hv $ \case
Just h -> checkVersioning info (uuid r) k $
catchNonAsync (go h) (\_ -> return False)
Nothing -> do
warning $ needS3Creds (uuid r)
return False
renameExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> ExportLocation -> Annex (Maybe Bool)
renameExportS3 hv r info k src dest = Just <$> go
where
go h = liftIO $ runResourceT $ do
go = withS3Handle hv $ \case
Just h -> checkVersioning info (uuid r) k $
catchNonAsync (go' h) (\_ -> return False)
Nothing -> do
warning $ needS3Creds (uuid r)
return False
go' h = liftIO $ runResourceT $ do
let co = S3.copyObject (bucket info) dstobject
(S3.ObjectId (bucket info) srcobject Nothing)
S3.CopyMetadata
@ -413,6 +416,7 @@ renameExportS3 hv r info k src dest = withS3Handle hv $ \case
void $ sendS3Handle h $ co { S3.coAcl = acl info }
void $ sendS3Handle h $ S3.DeleteObject srcobject (bucket info)
return True
srcobject = T.pack $ bucketExportLocation info src
dstobject = T.pack $ bucketExportLocation info dest

View file

@ -34,7 +34,7 @@ import qualified Git
import Config
import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Export
import Remote.Helper.ExportImport
import Annex.UUID
import Annex.Content
import Logs.RemoteState
@ -58,6 +58,7 @@ remote = RemoteType
, generate = gen
, setup = tahoeSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -80,6 +81,7 @@ gen r u c gc = do
, checkPresent = checkKey u hdl
, checkPresentCheap = False
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Just (getWhereisKey u)
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -10,7 +10,7 @@ module Remote.Web (remote, getWebUrls) where
import Annex.Common
import Types.Remote
import Remote.Helper.Messages
import Remote.Helper.Export
import Remote.Helper.ExportImport
import qualified Git
import qualified Git.Construct
import Annex.Content
@ -29,6 +29,7 @@ remote = RemoteType
, generate = gen
, setup = error "not supported"
, exportSupported = exportUnsupported
, importSupported = importUnsupported
}
-- There is only one web remote, and it always exists.
@ -57,6 +58,7 @@ gen r _ c gc = do
, checkPresent = checkKey
, checkPresentCheap = False
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing

View file

@ -34,7 +34,7 @@ import Config.Cost
import Remote.Helper.Special
import Remote.Helper.Messages
import Remote.Helper.Http
import Remote.Helper.Export
import Remote.Helper.ExportImport
import qualified Remote.Helper.Chunked.Legacy as Legacy
import Creds
import Utility.Metered
@ -53,6 +53,7 @@ remote = RemoteType
, generate = gen
, setup = webdavSetup
, exportSupported = exportIsSupported
, importSupported = importUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
@ -88,6 +89,7 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
removeExportDirectoryDav this
, renameExport = renameExportDav this
}
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
@ -237,19 +239,21 @@ removeExportDirectoryDav r dir = withDAVHandle r $ \mh -> runExport mh $ \_dav -
safely (inLocation d delContentM)
>>= maybe (return False) (const $ return True)
renameExportDav :: Remote -> Key -> ExportLocation -> ExportLocation -> Annex Bool
renameExportDav :: Remote -> Key -> ExportLocation -> ExportLocation -> Annex (Maybe Bool)
renameExportDav r _k src dest = case (exportLocation src, exportLocation dest) of
(Right srcl, Right destl) -> withDAVHandle r $ \case
Just h
-- box.com's DAV endpoint has buggy handling of renames,
-- so avoid renaming when using it.
| boxComUrl `isPrefixOf` baseURL h -> return False
| otherwise -> runExport (Just h) $ \dav -> do
maybe noop (void . mkColRecursive) (locationParent destl)
moveDAV (baseURL dav) srcl destl
return True
Nothing -> return False
_ -> return False
| boxComUrl `isPrefixOf` baseURL h -> return Nothing
| otherwise -> do
v <- runExport (Just h) $ \dav -> do
maybe noop (void . mkColRecursive) (locationParent destl)
moveDAV (baseURL dav) srcl destl
return True
return (Just v)
Nothing -> return (Just False)
_ -> return (Just False)
runExport :: Maybe DavHandle -> (DavHandle -> DAVT IO Bool) -> Annex Bool
runExport Nothing _ = return False

55
Test.hs
View file

@ -1,6 +1,6 @@
{- git-annex test suite
-
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -48,6 +48,7 @@ import qualified Logs.Remote
import qualified Logs.Unused
import qualified Logs.Transfer
import qualified Logs.Presence
import qualified Logs.ContentIdentifier
import qualified Logs.PreferredContent
import qualified Types.MetaData
import qualified Remote
@ -175,7 +176,8 @@ properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck"
, testProperty "prop_segment_regressionTest" Utility.Misc.prop_segment_regressionTest
, testProperty "prop_read_write_transferinfo" Logs.Transfer.prop_read_write_transferinfo
, testProperty "prop_read_show_inodecache" Utility.InodeCache.prop_read_show_inodecache
, testProperty "prop_parse_build_log" Logs.Presence.prop_parse_build_log
, testProperty "prop_parse_build_presence_log" Logs.Presence.prop_parse_build_presence_log
, testProperty "prop_parse_build_contentidentifier_log" Logs.ContentIdentifier.prop_parse_build_contentidentifier_log
, testProperty "prop_read_show_TrustLevel" Types.TrustLevel.prop_read_show_TrustLevel
, testProperty "prop_parse_build_TrustLevelLog" Logs.Trust.prop_parse_build_TrustLevelLog
, testProperty "prop_hashes_stable" Utility.Hash.prop_hashes_stable
@ -205,6 +207,7 @@ unitTests :: String -> TestTree
unitTests note = testGroup ("Unit Tests " ++ note)
[ testCase "add dup" test_add_dup
, testCase "add extras" test_add_extras
, testCase "export_import" test_export_import
, testCase "shared clone" test_shared_clone
, testCase "log" test_log
, testCase "import" test_import
@ -1723,3 +1726,51 @@ test_addurl = intmpclonerepo $ do
let dest = "addurlurldest"
filecmd "addurl" ["--file", dest, url] @? ("addurl failed on " ++ url ++ " with --file")
doesFileExist dest @? (dest ++ " missing after addurl --file")
test_export_import :: Assertion
test_export_import = intmpclonerepoInDirect $ do
createDirectory "dir"
git_annex "initremote" (words "foo type=directory encryption=none directory=dir exporttree=yes importtree=yes") @? "initremote failed"
git_annex "get" [] @? "get of files failed"
annexed_present annexedfile
git_annex "export" ["master", "--to", "foo"] @? "export to dir failed"
dircontains annexedfile (content annexedfile)
writedir "import" (content "import")
git_annex "import" ["master", "--from", "foo"] @? "import from dir failed"
boolSystem "git" [Param "merge", Param "foo/master", Param "-mmerge", Param "--allow-unrelated-histories"] @? "git merge foo/master failed"
-- FIXME fails when in an adjusted unlocked branch because
-- it's imported locked
--annexed_present "import"
nukeFile "import"
writecontent "import" (content "newimport1")
git_annex "add" ["import"] @? "add of import failed"
boolSystem "git" [Param "commit", Param "-q", Param "-mchanged"] @? "git commit failed"
git_annex "export" ["master", "--to", "foo"] @? "export modified file to dir failed"
dircontains "import" (content "newimport1")
-- verify that export refuses to overwrite modified file
writedir "import" (content "newimport2")
nukeFile "import"
writecontent "import" (content "newimport3")
git_annex "add" ["import"] @? "add of import failed"
boolSystem "git" [Param "commit", Param "-q", Param "-mchanged"] @? "git commit failed"
git_annex_shouldfail "export" ["master", "--to", "foo"] @? "export failed to fail in conflict"
dircontains "import" (content "newimport2")
-- resolving import conflict
git_annex "import" ["master", "--from", "foo"] @? "import from dir failed"
not <$> boolSystem "git" [Param "merge", Param "foo/master", Param "-mmerge"] @? "git merge of conflict failed to exit nonzero"
nukeFile "import"
writecontent "import" (content "newimport3")
git_annex "add" ["import"] @? "add of import failed"
boolSystem "git" [Param "commit", Param "-q", Param "-mchanged"] @? "git commit failed"
git_annex "export" ["master", "--to", "foo"] @? "export failed after import conflict"
dircontains "import" (content "newimport3")
where
dircontains f v =
((v==) <$> readFile ("dir" </> f))
@? ("did not find expected content of " ++ "dir" </> f)
writedir f = writecontent ("dir" </> f)

View file

@ -21,7 +21,7 @@ import Utility.Split
import qualified System.FilePath.Posix as Posix
-- A location on a remote that a key can be exported to.
-- The FilePath will be relative to the top of the export,
-- The FilePath will be relative to the top of the remote,
-- and uses unix-style path separators.
newtype ExportLocation = ExportLocation FilePath
deriving (Show, Eq)

View file

@ -231,7 +231,7 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexReadOnly :: Bool
, remoteAnnexVerify :: Bool
, remoteAnnexCheckUUID :: Bool
, remoteAnnexExportTracking :: Maybe Git.Ref
, remoteAnnexTrackingBranch :: Maybe Git.Ref
, remoteAnnexTrustLevel :: Maybe String
, remoteAnnexStartCommand :: Maybe String
, remoteAnnexStopCommand :: Maybe String
@ -287,8 +287,10 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexReadOnly = getbool "readonly" False
, remoteAnnexCheckUUID = getbool "checkuuid" True
, remoteAnnexVerify = getbool "verify" True
, remoteAnnexExportTracking = Git.Ref
<$> notempty (getmaybe "export-tracking")
, remoteAnnexTrackingBranch = Git.Ref <$>
( notempty (getmaybe "tracking-branch")
<|> notempty (getmaybe "export-tracking") -- old name
)
, remoteAnnexTrustLevel = notempty $ getmaybe "trustlevel"
, remoteAnnexStartCommand = notempty $ getmaybe "start-command"
, remoteAnnexStopCommand = notempty $ getmaybe "stop-command"

50
Types/Import.hs Normal file
View file

@ -0,0 +1,50 @@
{- git-annex import types
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Types.Import where
import qualified Data.ByteString as S
import Data.Char
import Types.Export
import Utility.QuickCheck
import Utility.FileSystemEncoding
{- Location of content on a remote that can be imported.
- This is just an alias to ExportLocation, because both are referring to a
- location on the remote. -}
type ImportLocation = ExportLocation
mkImportLocation :: FilePath -> ImportLocation
mkImportLocation = mkExportLocation
fromImportLocation :: ImportLocation -> FilePath
fromImportLocation = fromExportLocation
{- An identifier for content stored on a remote that has been imported into
- the repository. It should be reasonably short since it is stored in the
- git-annex branch. -}
newtype ContentIdentifier = ContentIdentifier S.ByteString
deriving (Eq, Ord, Show)
instance Arbitrary ContentIdentifier where
-- Avoid non-ascii ContentIdentifiers because fully arbitrary
-- strings may not be encoded using the filesystem
-- encoding, which is normally applied to all input.
arbitrary = ContentIdentifier . encodeBS
<$> arbitrary `suchThat` all isAscii
{- List of files that can be imported from a remote, each with some added
- information. -}
data ImportableContents info = ImportableContents
{ importableContents :: [(ImportLocation, info)]
, importableHistory :: [ImportableContents info]
-- ^ Used by remotes that support importing historical versions of
-- files that are stored in them. This is equivilant to a git
-- commit history.
}
deriving (Show)

View file

@ -2,7 +2,7 @@
-
- Most things should not need this, using Types instead
-
- Copyright 2011-2018 Joey Hess <id@joeyh.name>
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -20,7 +20,10 @@ module Types.Remote
, unVerified
, RetrievalSecurityPolicy(..)
, isExportSupported
, isImportSupported
, ExportActions(..)
, ImportActions(..)
, ByteSize
)
where
@ -36,11 +39,13 @@ import Types.Creds
import Types.UrlContents
import Types.NumCopies
import Types.Export
import Types.Import
import Config.Cost
import Utility.Metered
import Git.Types (RemoteName)
import Utility.SafeCommand
import Utility.Url
import Utility.DataUnits
type RemoteConfigKey = String
@ -61,6 +66,8 @@ data RemoteTypeA a = RemoteType
, setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID)
-- check if a remote of this type is able to support export
, exportSupported :: RemoteConfig -> RemoteGitConfig -> a Bool
-- check if a remote of this type is able to support import
, importSupported :: RemoteConfig -> RemoteGitConfig -> a Bool
}
instance Eq (RemoteTypeA a) where
@ -102,8 +109,10 @@ data RemoteA a = Remote
-- Some remotes can checkPresent without an expensive network
-- operation.
, checkPresentCheap :: Bool
-- Some remotes support exports of trees.
-- Some remotes support export of trees.
, exportActions :: ExportActions a
-- Some remotes support import of trees.
, importActions :: ImportActions a
-- Some remotes can provide additional details for whereis.
, whereisKey :: Maybe (Key -> a [String])
-- Some remotes can run a fsck operation on the remote,
@ -207,6 +216,9 @@ data RetrievalSecurityPolicy
isExportSupported :: RemoteA a -> a Bool
isExportSupported r = exportSupported (remotetype r) (config r) (gitconfig r)
isImportSupported :: RemoteA a -> a Bool
isImportSupported r = importSupported (remotetype r) (config r) (gitconfig r)
data ExportActions a = ExportActions
-- Exports content to an ExportLocation.
-- The exported file should not appear to be present on the remote
@ -230,7 +242,82 @@ data ExportActions a = ExportActions
-- Throws an exception if the remote cannot be accessed.
, checkPresentExport :: Key -> ExportLocation -> a Bool
-- Renames an already exported file.
-- This may fail, if the file doesn't exist, or the remote does not
-- support renames.
, renameExport :: Key -> ExportLocation -> ExportLocation -> a Bool
-- This may fail with False, if the file doesn't exist.
-- If the remote does not support renames, it can return Nothing.
, renameExport :: Key -> ExportLocation -> ExportLocation -> a (Maybe Bool)
}
data ImportActions a = ImportActions
-- Finds the current set of files that are stored in the remote,
-- along with their content identifiers and size.
--
-- May also find old versions of files that are still stored in the
-- remote.
{ listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
-- Retrieves a file from the remote. Ensures that the file
-- it retrieves has the requested ContentIdentifier.
--
-- This has to be used rather than retrieveExport
-- when a special remote supports imports, since files on such a
-- special remote can be changed at any time.
, retrieveExportWithContentIdentifier
:: ExportLocation
-> ContentIdentifier
-> FilePath
-- ^ file to write content to
-> a (Maybe Key)
-- ^ callback that generates a key from the downloaded content
-> MeterUpdate
-> a (Maybe Key)
-- Exports content to an ExportLocation, and returns the
-- ContentIdentifier corresponding to the content it stored.
--
-- This is used rather than storeExport when a special remote
-- supports imports, since files on such a special remote can be
-- changed at any time.
--
-- Since other things can modify the same file on the special
-- remote, this must take care to not overwrite such modifications,
-- and only overwrite a file that has one of the ContentIdentifiers
-- passed to it, unless listContents can recover an overwritten file.
--
-- Also, since there can be concurrent writers, the implementation
-- needs to make sure that the ContentIdentifier it returns
-- corresponds to what it wrote, not to what some other writer
-- wrote.
, storeExportWithContentIdentifier
:: FilePath
-> Key
-> ExportLocation
-> [ContentIdentifier]
-- ^ old content that it's safe to overwrite
-> MeterUpdate
-> a (Maybe ContentIdentifier)
-- This is used rather than removeExport when a special remote
-- supports imports.
--
-- It should only remove a file from the remote when it has one
-- of the ContentIdentifiers passed to it, unless listContents
-- can recover an overwritten file.
--
-- It needs to handle races similar to storeExportWithContentIdentifier.
, removeExportWithContentIdentifier
:: Key
-> ExportLocation
-> [ContentIdentifier]
-> a Bool
-- Removes a directory from the export, but only when it's empty.
-- Used instead of removeExportDirectory when a special remote
-- supports imports.
--
-- If the directory is not empty, it should succeed.
, removeExportDirectoryWhenEmpty :: Maybe (ExportDirectory -> a Bool)
-- Checks if the specified ContentIdentifier is exported to the
-- remote at the specified ExportLocation.
-- Throws an exception if the remote cannot be accessed.
, checkPresentExportWithContentIdentifier
:: Key
-> ExportLocation
-> [ContentIdentifier]
-> a Bool
}

View file

@ -18,6 +18,7 @@ import Data.ByteString.Builder
import qualified Data.Semigroup as Sem
import Utility.FileSystemEncoding
import Utility.QuickCheck
import qualified Utility.SimpleProtocol as Proto
-- A UUID is either an arbitrary opaque string, or UUID info may be missing.
@ -81,3 +82,9 @@ type UUIDDescMap = M.Map UUID UUIDDesc
instance Proto.Serializable UUID where
serialize = fromUUID
deserialize = Just . toUUID
instance Arbitrary UUID where
arbitrary = frequency [(1, return NoUUID), (3, UUID <$> arb)]
where
arb = encodeBS <$> listOf1 (elements uuidchars)
uuidchars = '-' : ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9']

View file

@ -17,6 +17,7 @@ import Test.QuickCheck as X
import Data.Time.Clock.POSIX
import Data.Ratio
import System.Posix.Types
import Data.List.NonEmpty (NonEmpty(..))
import Prelude
{- Times before the epoch are excluded. Half with decimal and half without. -}
@ -41,6 +42,10 @@ instance Arbitrary FileID where
instance Arbitrary FileOffset where
arbitrary = nonNegative arbitrarySizedIntegral
{- Quickcheck lacks this instance. -}
instance Arbitrary l => Arbitrary (NonEmpty l) where
arbitrary = (:|) <$> arbitrary <*> arbitrary
nonNegative :: (Num a, Ord a) => Gen a -> Gen a
nonNegative g = g `suchThat` (>= 0)

View file

@ -28,7 +28,7 @@ viaTmp :: (MonadMask m, MonadIO m) => (FilePath -> v -> m ()) -> FilePath -> v -
viaTmp a file content = bracketIO setup cleanup use
where
(dir, base) = splitFileName file
template = base ++ ".tmp"
template = relatedTemplate (base ++ ".tmp")
setup = do
createDirectoryIfMissing True dir
openTempFile dir template

View file

@ -202,7 +202,7 @@ replying with `UNSUPPORTED-REQUEST` is acceptable.
empty directories, this does not need to be implemented.
The directory will be in the form of a relative path, and may contain path
separators, whitespace, and other special characters.
Typically the directory will be empty, but it could possbly contain
Typically the directory will be empty, but it could possibly contain
files or other directories, and it's ok to remove those.
The remote responds with either `REMOVEEXPORTDIRECTORY-SUCCESS`
or `REMOVEEXPORTDIRECTORY-FAILURE`.
@ -211,7 +211,8 @@ replying with `UNSUPPORTED-REQUEST` is acceptable.
Requests the remote rename a file stored on it from the previously
provided Name to the NewName.
The remote responds with `RENAMEEXPORT-SUCCESS` or
`RENAMEEXPORT-FAILURE`.
`RENAMEEXPORT-FAILURE` or with `UNSUPPORTED-REQUEST` if renaming is not
supported.
To support old external special remote programs that have not been updated
to support exports, git-annex will need to handle an `ERROR` response
@ -318,7 +319,8 @@ while it's handling a request.
* `REMOVEEXPORTDIRECTORY-FAILURE`
Indicates that a `REMOVEEXPORTDIRECTORY` failed for whatever reason.
* `UNSUPPORTED-REQUEST`
Indicates that the special remote does not know how to handle a request.
Indicates that the special remote does not know how to handle a request,
or cannot handle the request.
## special remote messages

View file

@ -209,11 +209,17 @@ The situations to keep in mind are these:
This is an extension to the ExportActions api.
listContents :: Annex (Tree [(ExportLocation, ContentIdentifier)])
listContents :: Annex (ContentHistory [(ExportLocation, ContentIdentifier)])
retrieveExportWithContentIdentifier :: ExportLocation -> ContentIdentifier -> (FilePath -> Annex Key) -> MeterUpdate -> Annex (Maybe Key)
storeExportWithContentIdentifier :: FilePath -> Key -> ExportLocation -> Maybe ContentIdentifier -> MeterUpdate -> Annex (Maybe ContentIdentifier)
storeExportWithContentIdentifier :: FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex (Maybe ContentIdentifier)
removeExportWithContentIdentifier :: Key -> ExportLocation -> [ContentIdentifier] -> Annex Bool
removeExportDirectoryWhenEmpty :: Maybe (ExportDirectory -> Annex Bool)
checkPresentExportWithContentIdentifier :: Key -> ExportLocation -> [ContentIdentifier] -> Annex (Maybe Bool)
listContents finds the current set of files that are stored in the remote,
some of which may have been written by other programs than git-annex,
@ -222,8 +228,14 @@ a single node tree.
listContents may also find past versions of files that are stored in the
remote, when it supports storing multiple versions of files. Since it
returns a tree of lists of files, it can represent anything from a linear
history to a full branching version control history.
returns a history tree of lists of files, it can represent anything from a
linear history to a full branching version control history.
Note that listContents does not need to worry about generating an
ExportLocation that contains a ".." attack or an absolute path or other
such mischief. Since a git tree is built from the ExportLocations, and is
merged the same as a tree pulled from a regular git remote is,
git's usual safety measures avoid such attacks.
retrieveExportWithContentIdentifier is used when downloading a new file from
the remote that listContents found. retrieveExport can't be used because
@ -236,6 +248,11 @@ downloaded may not match the requested content identifier (eg when
something else wrote to it while it was being retrieved), and fail
in that case.
When a remote supports imports and exports, storeExport and removeExport
should not be used when exporting to it, and instead
storeExportWithContentIdentifier and removeExportWithContentIdentifier
be used.
storeExportWithContentIdentifier stores content and returns the
content identifier corresponding to what it stored. It can either get
the content identifier in reply to the store (as S3 does with versioning),
@ -248,11 +265,30 @@ to it, to avoid overwriting a file that was modified by something else.
But alternatively, if listContents can later recover the modified file, it can
overwrite the modified file.
storeExportWithContentIdentifier needs to handle the case when there's a
race with a concurrent writer. It needs to avoid getting the wrong
ContentIdentifier for data written by the other writer. It may detect such
races and fail, or it could succeed and overwrite the other file, so long
as it can later be recovered by listContents.
Similarly, removeExportWithContentIdentifier must only remove a file
on the remote if it has the same content identifier that's passed to it,
or if listContent can later recover the modified file.
Otherwise it should fail. (Like removeExport, removeExportWithContentIdentifier
also succeeds if the file is not present.)
Both storeExportWithContentIdentifier and removeExportWithContentIdentifier
need to handle the case when there's a race with a concurrent writer.
They can detect such races and fail. Or, if overwritten/deleted modified
files can later be recovered by listContents, it's acceptable to not detect
the race.
removeExportDirectoryWhenEmpty is used instead of removeExportDirectory.
It should only remove empty directories, and succeeds if there are files
in the directory.
checkPresentExportWithContentIdentifier is used instead of
checkPresentExport. It should verify that one of the provided
ContentIdentifiers matches the current content of the file.
Note that renameExport is never used when the special remote supports
imports, because it may have an implementation that loses changes
to imported files. (For example, it may copy the file to the new name,
and delete the old name.)
## multiple git-annex repos accessing a special remote

View file

@ -6,8 +6,6 @@ git-annex export - export content to a remote
git annex export `treeish --to remote`
git annex export `--tracking treeish --to remote`
# DESCRIPTION
Use this command to export a tree of files from a git-annex repository.
@ -45,6 +43,27 @@ paragraph do not apply. Note that dropping content from such a remote is
not supported. See individual special remotes' documentation for
details of how to enable such versioning.
The `git annex sync --content` command (and the git-annex assistant)
can also be used to export a branch to a special remote,
updating the special remote whenever the branch is changed.
To do this, you need to configure "remote.<name>.annex-tracking-branch"
to tell it what branch to track.
For example:
git config remote.myremote.annex-tracking-branch master
git annex sync --content
You can combine using `git annex export` to send changes to a special
remote with `git annex import` to fetch changes from a special remote.
When a file on a special remote has been modified, exporting to it will
not overwrite the modified file, and the export will not succeed.
You can resolve this conflict by using `git annex import`.
(Some types of special remotes such as S3 with versioning may instead
let an export overwrite the modified file; then `git annex import`
will create a sequence of commits that includes the modified file,
so the overwritten modification is not lost.)
# OPTIONS
* `--to=remote`
@ -53,13 +72,9 @@ details of how to enable such versioning.
* `--tracking`
This makes the export track changes that are committed to
the branch. `git annex sync --content` and the git-annex assistant
will update exports with commits made to the branch.
This is a local configuration setting, similar to a git remote's tracking
branch. You'll need to run `git annex export --tracking` in each
repository you want the export to track.
This is a deprecated way to set "remote.<name>.annex-tracking-branch".
Instead of using this option, you should just set the git configuration
yourself.
* `--fast`
@ -69,28 +84,24 @@ details of how to enable such versioning.
# EXAMPLE
git annex initremote myexport type=directory directory=/mnt/myexport \
git annex initremote myremote type=directory directory=/mnt/myremote \
exporttree=yes encryption=none
git annex export master --to myexport
git annex export master --to myremote
After that, /mnt/myexport will contain the same tree of files as the master
After that, /mnt/myremote will contain the same tree of files as the master
branch does.
git mv myfile subdir/myfile
git commit -m renamed
git annex export master --to myexport
git annex export master --to myremote
That updates /mnt/myexport to reflect the renamed file.
That updates /mnt/myremote to reflect the renamed file.
git annex export master:subdir --to myexport
git annex export master:subdir --to myremote
That updates /mnt/myexport, to contain only the files in the "subdir"
That updates /mnt/myremote, to contain only the files in the "subdir"
directory of the master branch.
git annex export --tracking master --to myexport
That makes myexport track changes that are committed to the master branch.
# EXPORT CONFLICTS
If two different git-annex repositories are both exporting different trees
@ -116,6 +127,8 @@ export`, it will detect the export conflict, and resolve it.
[[git-annex-initremote]](1)
[[git-annex-import]](1)
[[git-annex-sync]](1)
# HISTORY

View file

@ -1,16 +1,77 @@
# NAME
git-annex import - move and add files from outside git working copy
git-annex import - add files from a non-versioned directory or a special remote
# SYNOPSIS
git annex import `[path ...]`
git annex import `[path ...]` | git annex import --from remote branch[:subdir]
# DESCRIPTION
Moves files from somewhere outside the git working copy, and adds them to
the annex. Individual files to import can be specified.
If a directory is specified, the entire directory is imported.
This command is a way to import files from elsewhere into your git-annex
repository. It can import files from a directory into your repository,
or it can import files from a git-annex special remote.
## IMPORTING FROM A SPECIAL REMOTE
Importing from a special remote first downloads all new content from it,
and then constructs a git commit that reflects files that have changed on
the special remote since the last time git-annex looked at it. Merging that
commit into your repository will update it to reflect changes made on the
special remote.
This way, something can be using the special remote for file storage,
adding files, modifying files, and deleting files, and you can track those
changes using git-annex.
You can combine using `git annex import` to fetch changes from a special
remote with `git annex export` to send your local changes to the special
remote.
You can only import from special remotes that were configured with
`importtree=yes` when set up with [[git-annex-initremote]](1). Only some
kinds of special remotes will let you configure them this way.
To import from a special remote, you must specify the name of a branch.
A corresponding remote tracking branch will be updated by `git annex
import`. After that point, it's the same as if you had run a `git fetch`
from a regular git remote; you can `git merge` the changes into your
currently checked out branch.
For example:
git annex import master --from myremote
git merge myremote/master
Note that you may need to pass `--allow-unrelated-histories` the first time
you `git merge` from an import. Think of this as the remote being a
separate git repository with its own files. If you first
`git annex export` files to a remote, and then `git annex import` from it,
you won't need that option.
You can also limit the import to a subdirectory, using the
"branch:subdir" syntax. For example, if "camera" is a special remote
that accesses a camera, and you want to import those into the photos
directory, rather than to the root of your repository:
git annex import master:photos --from camera
git merge camera/master
The `git annex sync --content` command (and the git-annex assistant)
can also be used to import from a special remote.
To do this, you need to configure "remote.<name>.annex-tracking-branch"
to tell it what branch to track. For example:
git config remote.myremote.annex-tracking-branch master
git annex sync --content
## IMPORTING FROM A DIRECTORY
When run with a path, `git annex import` moves files from somewhere outside
the git working copy, and adds them to the annex.
Individual files to import can be specified. If a directory is specified,
the entire directory is imported.
git annex import /media/camera/DCIM/*
@ -25,9 +86,11 @@ a new filename being added to the repository, so the duplicate file
is present in the repository twice. (With all checksumming backends,
including the default SHA256E, only one copy of the data will be stored.)
Several options can be used to adjust handling of duplicate files.
Several options can be used to adjust handling of duplicate files, see
`--duplicate`, `--deduplicate`, `--skip-duplicates`, `--clean-duplicates`,
and `--reinject-duplicates` documentation below.
# OPTIONS
# OPTIONS FOR IMPORTING FROM A DIRECTORY
* `--duplicate`
@ -71,6 +134,8 @@ Several options can be used to adjust handling of duplicate files.
git annex import /dir --include='*.png'
## COMMON OPTIONS
* `--jobs=N` `-JN`
Imports multiple files in parallel. This may be faster.

View file

@ -51,7 +51,8 @@ by running "git annex sync" on the remote.
* `--pull`, `--no-pull`
By default, git pulls from remotes. Use --no-pull to disable all pulling.
By default, git pulls from remotes and imports from some special remotes.
Use --no-pull to disable all pulling.
When `remote.<name>.annex-pull` or `remote.<name>.annex-sync`
are set to false, pulling is disabled for those remotes, and using
@ -59,8 +60,8 @@ by running "git annex sync" on the remote.
* `--push`, `--no-push`
By default, git pushes changes to remotes.
Use --no-push to disable all pushing.
By default, git pushes changes to remotes and exports to some
special remotes. Use --no-push to disable all pushing.
When `remote.<name>.annex-push` or `remote.<name>.annex-sync` are
set to false, or `remote.<name>.annex-readonly` is set to true,
@ -82,9 +83,12 @@ by running "git annex sync" on the remote.
This behavior can be overridden by configuring the preferred content
of a repository. See [[git-annex-preferred-content]](1).
When a special remote is configured as an export and is tracking a branch,
the export will be updated to the current content of the branch.
See [[git-annex-export]](1).
When `remote.<name>.annex-tracking-branch` is configured for a special remote
and that branch is checked out, syncing will import changes from
the remote, merge them into the branch, and export any changes that have
been committed to the branch back to the remote. See
See [[git-annex-import]](1) and [[git-annex-export]](1) for details about
how importing and exporting work.
* `--content-of=path` `-C path`

View file

@ -148,7 +148,8 @@ subdirectories).
* `import [path ...]`
Move and add files from outside git working copy into the annex.
Add files from a non-version-controlled directory or a
special remote into the annex.
See [[git-annex-import]](1) for details.
@ -1266,13 +1267,24 @@ Here are all the supported configuration settings.
in some edge cases, where it's likely the case than an
object was downloaded incorrectly, or when needed for security.
* `remote.<name>.annex-tracking-branch`
This is for use with special remotes that support exports and imports.
When set to eg, "master", this tells git-annex that you want the
special remote to track that branch.
When set to eg, "master:subdir", the special remote tracks only
the subdirectory of that branch.
`git-annex sync --content` will import changes from the remote and
merge them into the annex-tracking-branch. They also export changes
made to the branch to the remote.
* `remote.<name>.annex-export-tracking`
When set to a branch name or other treeish, this makes what's exported
to the special remote track changes to the branch. See
[[git-annex-export]](1). `git-annex sync --content` and the
git-annex assistant update exports when changes have been
committed to the tracking branch.
Deprecated name for `remote.<name>.annex-tracking-branch`. Will still be used
if it's configured and `remote.<name>.annex-tracking-branch` is not.
* `remote.<name>.annexUrl`

View file

@ -281,6 +281,18 @@ For example:
1287290776.765152s 26339d22-446b-11e0-9101-002170d25c55:x +1
1291237510.141453s 26339d22-446b-11e0-9101-002170d25c55:x -1 26339d22-446b-11e0-9101-002170d25c55:x +2
## `aaa/bbb/*.log.cid`
These log files store per-remote content identifiers for keys.
A given key may have any number of content identifiers.
The format is a timestamp, followed by the uuid or the remote,
followed by the content identifiers which are separated by colons.
If a content identifier contains a colon or \r or \n, it will be base64
encoded. Base64 encoded values are indicated by prefixing them with "!".
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 5248916:5250378
## `aaa/bbb/*.log.cnk`
These log files are used when objects are stored in chunked form on

View file

@ -35,6 +35,10 @@ remote:
by [[git-annex-export]]. It will not be usable as a general-purpose
special remote.
* `importtree` - Set to "yes" to make this special remote usable
by [[git-annex-import]]. It will not be usable as a general-purpose
special remote.
Setup example:
# git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none

View file

@ -5,9 +5,55 @@ The command could be `git annex import --from remote`
There also ought to be a way to make `git annex sync` automatically import.
See [[design/importing_trees_from_special_remotes]] for current design for
See [[design/importing_trees_from_special_remotes]] for the design for
this.
Status: Basic git annex export treeish --to remote` is working,
and `git annex sync --content` can be configured to use it.
## remaining todo
* Currently only directory special remote supports importing, at least S3
can also support it.
* Add to external special remote protocol.
* Support importing from adb special remotes, webdav, etc?
Problem is that these may have no way to avoid an export
overwriting changed content that would have been imported otherwise.
So if they're supported the docs need to reflect the problem so the user
avoids situations that cause data loss, or decides to accept the
possibility of data loss.
* When on an adjusted unlocked branch, need to import the files unlocked.
Also, the tracking branch code needs to know about such branches,
currently it will generate the wrong tracking branch.
The test case for `export_import` currently has a line commented out
that fails on adjusted unlocked branches.
Alternatively, could not do anything special for adjusted branches,
so generating a non-adjusted branch, and require the user use `git annex
sync` to merge in that branch. Rationalle: After fetching from a normal
git repo in an adjusted branch, merging does the same thing, and the docs
say to use `git annex sync` instead. Any improvments to that workflow
(like eg a way to merge a specified branch and update the adjustment)
would thus benefit both uses cases.
* Need to support annex.largefiles when importing.
* If a tree containing a non-annexed file is exported,
and then an import is done from the remote, the new tree will have that
file annexed, and so merging it converts to annexed (there is no merge
conflict). This problem seems hard to avoid, other than relaying on
annex.largefiles to tell git-annex if a file should be imported
non-annexed.
Although.. The importer could check for each file,
if there's a corresponding file in the branch it's generating the
import for, if that file is annexed. But this might be slow and seems a
lot of bother for an edge case?
## race conditions
(Some thoughts about races that the design should cover now, but kept here

View file

@ -0,0 +1,27 @@
Collection of non-ideal things about git-annex's use of sqlite databases.
Would be good to improve these sometime, but it would need a migration
process.
* Database.Export.getExportedKey would be faster if there was an index
in the database, eg "ExportedIndex file key". This only affects
the speed of `git annex export`, which is probably swamped by the actual
upload of the data to the remote.
* There may be other selects elsewhere that are not indexed.
* Database.Types has some suboptimal encodings for Key and InodeCache.
They are both slow due to being implemented using String
(which may be fixable w/o changing the DB schema),
and the VARCHARs they generate are longer than necessary
since they look like eg `SKey "whatever"` and `I "whatever"`
* SFilePath is stored efficiently, and has to be a String anyway,
(until ByteStringFilePath is used)
but since it's stored as a VARCHAR, which sqlite interprets using the
current locale, there can be encoding problems. This is at least worked
around with a hack that escapes FilePaths that contain unusual
characters. It would be much better to use a BLOB.
* IKey could fail to round-trip as well, when a Key contains something
(eg, a filename extension) that is not valid in the current locale,
for similar reasons to SFilePath. Using BLOB would be better.

View file

@ -630,6 +630,7 @@ Executable git-annex
Annex.GitOverlay
Annex.HashObject
Annex.Hook
Annex.Import
Annex.Ingest
Annex.Init
Annex.InodeSentinal
@ -649,6 +650,7 @@ Executable git-annex
Annex.Perms
Annex.Queue
Annex.ReplaceFile
Annex.RemoteTrackingBranch
Annex.SpecialRemote
Annex.Ssh
Annex.TaggedPush
@ -808,6 +810,7 @@ Executable git-annex
Config.Smudge
Creds
Crypto
Database.ContentIdentifier
Database.Export
Database.Fsck
Database.Handle
@ -869,6 +872,8 @@ Executable git-annex
Logs.Chunk
Logs.Chunk.Pure
Logs.Config
Logs.ContentIdentifier
Logs.ContentIdentifier.Pure
Logs.Difference
Logs.Difference.Pure
Logs.Export
@ -928,7 +933,7 @@ Executable git-annex
Remote.Helper.Chunked
Remote.Helper.Chunked.Legacy
Remote.Helper.Encryptable
Remote.Helper.Export
Remote.Helper.ExportImport
Remote.Helper.Git
Remote.Helper.Hooks
Remote.Helper.Messages
@ -973,6 +978,7 @@ Executable git-annex
Types.FileMatcher
Types.GitConfig
Types.Group
Types.Import
Types.Key
Types.KeySource
Types.LockCache