From b6ba0bd5564df762f19c88aba53744d45e3ca48f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 19 Jan 2014 17:35:36 -0400 Subject: [PATCH] sync --content: New option that makes the content of annexed files be transferred. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to the assistant, this honors any configured preferred content expressions. I am not entirely happpy with the implementation. It would be nicer if the seek function returned a list of actions which included the individual file gets and copies and drops, rather than the current list of calls to syncContent. This would allow getting rid of the somewhat reundant display of "sync file [ok|failed]" after the get/put display. But, do that, withFilesInGit would need to somehow be able to construct such a mixed action list. And it would be less efficient than the current implementation, which is able to reuse several values between eg get and drop. Note that currently this does not try to satisfy numcopies when getting/putting files (numcopies are of course checked when dropping files!) This makes it like the assistant, and unlike get --auto and copy --auto, which do duplicate files when numcopies is not yet satisfied. I don't know if this is the right decision; it only seemed to make sense to have this parallel the assistant as far as possible to start with, since I know the assistant works. This commit was sponsored by Øyvind Andersen Holm. --- Annex/Drop.hs | 118 ++++++++++++++++++++++++++ Assistant/DaemonStatus.hs | 2 +- Assistant/Drop.hs | 99 ++-------------------- Assistant/Threads/TransferScanner.hs | 2 +- Command/Get.hs | 6 +- Command/Sync.hs | 120 +++++++++++++++++++++++---- Remote.hs | 2 +- Remote/List.hs | 4 +- debian/changelog | 3 + doc/git-annex.mdwn | 8 +- doc/preferred_content.mdwn | 10 ++- 11 files changed, 251 insertions(+), 123 deletions(-) create mode 100644 Annex/Drop.hs diff --git a/Annex/Drop.hs b/Annex/Drop.hs new file mode 100644 index 0000000000..df64895be4 --- /dev/null +++ b/Annex/Drop.hs @@ -0,0 +1,118 @@ +{- dropping of unwanted content + - + - Copyright 2012-2014 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Annex.Drop where + +import Common.Annex +import Logs.Location +import Logs.Trust +import Types.Remote (uuid) +import qualified Remote +import qualified Command.Drop +import Command +import Annex.Wanted +import Annex.Exception +import Config +import Annex.Content.Direct + +import qualified Data.Set as S +import System.Log.Logger (debugM) + +type Reason = String + +{- Drop a key from local and/or remote when allowed by the preferred content + - and numcopies settings. + - + - The Remote list can include other remotes that do not have the content. + - + - A remote can be specified that is known to have the key. This can be + - used an an optimisation when eg, a key has just been uploaded to a + - remote. + -} +handleDrops :: Reason -> [Remote] -> Bool -> Key -> AssociatedFile -> Maybe Remote -> Annex () +handleDrops _ _ _ _ Nothing _ = noop +handleDrops reason rs fromhere key f knownpresentremote = do + locs <- loggedLocations key + handleDropsFrom locs rs reason fromhere key f knownpresentremote + +{- The UUIDs are ones where the content is believed to be present. + - The Remote list can include other remotes that do not have the content; + - only ones that match the UUIDs will be dropped from. + - If allowed to drop fromhere, that drop will be tried first. + - + - In direct mode, all associated files are checked, and only if all + - of them are unwanted are they dropped. + -} +handleDropsFrom :: [UUID] -> [Remote] -> Reason -> Bool -> Key -> AssociatedFile -> Maybe Remote -> Annex () +handleDropsFrom _ _ _ _ _ Nothing _ = noop +handleDropsFrom locs rs reason fromhere key (Just afile) knownpresentremote = do + fs <- ifM isDirect + ( do + l <- associatedFilesRelative key + if null l + then return [afile] + else return l + , return [afile] + ) + n <- getcopies fs + if fromhere && checkcopies n Nothing + then go fs rs =<< dropl fs n + else go fs rs n + where + getcopies fs = do + (untrusted, have) <- trustPartition UnTrusted locs + numcopies <- maximum <$> mapM (getNumCopies <=< numCopies) fs + return (length have, numcopies, S.fromList untrusted) + + {- Check that we have enough copies still to drop the content. + - When the remote being dropped from is untrusted, it was not + - counted as a copy, so having only numcopies suffices. Otherwise, + - we need more than numcopies to safely drop. -} + checkcopies (have, numcopies, _untrusted) Nothing = have > numcopies + checkcopies (have, numcopies, untrusted) (Just u) + | S.member u untrusted = have >= numcopies + | otherwise = have > numcopies + + decrcopies (have, numcopies, untrusted) Nothing = + (have - 1, numcopies, untrusted) + decrcopies v@(_have, _numcopies, untrusted) (Just u) + | S.member u untrusted = v + | otherwise = decrcopies v Nothing + + go _ [] _ = noop + go fs (r:rest) n + | uuid r `S.notMember` slocs = go fs rest n + | checkcopies n (Just $ Remote.uuid r) = + dropr fs r n >>= go fs rest + | otherwise = noop + + checkdrop fs n@(have, numcopies, _untrusted) u a = + ifM (allM (wantDrop True u . Just) fs) + ( ifM (safely $ doCommand $ a (Just numcopies)) + ( do + liftIO $ debugM "drop" $ unwords + [ "dropped" + , afile + , "(from " ++ maybe "here" show u ++ ")" + , "(copies now " ++ show (have - 1) ++ ")" + , ": " ++ reason + ] + return $ decrcopies n u + , return n + ) + , return n + ) + + dropl fs n = checkdrop fs n Nothing $ \numcopies -> + Command.Drop.startLocal (Just afile) numcopies key knownpresentremote + + dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \numcopies -> + Command.Drop.startRemote (Just afile) numcopies key r + + safely a = either (const False) id <$> tryAnnex a + + slocs = S.fromList locs diff --git a/Assistant/DaemonStatus.hs b/Assistant/DaemonStatus.hs index ef1e065949..e38463ff66 100644 --- a/Assistant/DaemonStatus.hs +++ b/Assistant/DaemonStatus.hs @@ -59,7 +59,7 @@ calcSyncRemotes = do return $ \dstatus -> dstatus { syncRemotes = syncable - , syncGitRemotes = filter Remote.syncableRemote syncable + , syncGitRemotes = filter Remote.gitSyncableRemote syncable , syncDataRemotes = syncdata , syncingToCloudRemote = any iscloud syncdata } diff --git a/Assistant/Drop.hs b/Assistant/Drop.hs index d9d8123972..03ab5ab2ca 100644 --- a/Assistant/Drop.hs +++ b/Assistant/Drop.hs @@ -5,24 +5,15 @@ - Licensed under the GNU GPL version 3 or higher. -} -module Assistant.Drop where +module Assistant.Drop ( + handleDrops, + handleDropsFrom, +) where import Assistant.Common import Assistant.DaemonStatus +import Annex.Drop (handleDropsFrom, Reason) import Logs.Location -import Logs.Trust -import Types.Remote (uuid) -import qualified Remote -import qualified Command.Drop -import Command -import Annex.Wanted -import Annex.Exception -import Config -import Annex.Content.Direct - -import qualified Data.Set as S - -type Reason = String {- Drop from local and/or remote when allowed by the preferred content and - numcopies settings. -} @@ -31,82 +22,4 @@ handleDrops _ _ _ Nothing _ = noop handleDrops reason fromhere key f knownpresentremote = do syncrs <- syncDataRemotes <$> getDaemonStatus locs <- liftAnnex $ loggedLocations key - handleDropsFrom locs syncrs reason fromhere key f knownpresentremote - -{- The UUIDs are ones where the content is believed to be present. - - The Remote list can include other remotes that do not have the content; - - only ones that match the UUIDs will be dropped from. - - If allowed to drop fromhere, that drop will be tried first. - - - - In direct mode, all associated files are checked, and only if all - - of them are unwanted are they dropped. - -} -handleDropsFrom :: [UUID] -> [Remote] -> Reason -> Bool -> Key -> AssociatedFile -> Maybe Remote -> Assistant () -handleDropsFrom _ _ _ _ _ Nothing _ = noop -handleDropsFrom locs rs reason fromhere key (Just afile) knownpresentremote = do - fs <- liftAnnex $ ifM isDirect - ( do - l <- associatedFilesRelative key - if null l - then return [afile] - else return l - , return [afile] - ) - n <- getcopies fs - if fromhere && checkcopies n Nothing - then go fs rs =<< dropl fs n - else go fs rs n - where - getcopies fs = liftAnnex $ do - (untrusted, have) <- trustPartition UnTrusted locs - numcopies <- maximum <$> mapM (getNumCopies <=< numCopies) fs - return (length have, numcopies, S.fromList untrusted) - - {- Check that we have enough copies still to drop the content. - - When the remote being dropped from is untrusted, it was not - - counted as a copy, so having only numcopies suffices. Otherwise, - - we need more than numcopies to safely drop. -} - checkcopies (have, numcopies, _untrusted) Nothing = have > numcopies - checkcopies (have, numcopies, untrusted) (Just u) - | S.member u untrusted = have >= numcopies - | otherwise = have > numcopies - - decrcopies (have, numcopies, untrusted) Nothing = - (have - 1, numcopies, untrusted) - decrcopies v@(_have, _numcopies, untrusted) (Just u) - | S.member u untrusted = v - | otherwise = decrcopies v Nothing - - go _ [] _ = noop - go fs (r:rest) n - | uuid r `S.notMember` slocs = go fs rest n - | checkcopies n (Just $ Remote.uuid r) = - dropr fs r n >>= go fs rest - | otherwise = noop - - checkdrop fs n@(have, numcopies, _untrusted) u a = - ifM (liftAnnex $ allM (wantDrop True u . Just) fs) - ( ifM (liftAnnex $ safely $ doCommand $ a (Just numcopies)) - ( do - debug - [ "dropped" - , afile - , "(from " ++ maybe "here" show u ++ ")" - , "(copies now " ++ show (have - 1) ++ ")" - , ": " ++ reason - ] - return $ decrcopies n u - , return n - ) - , return n - ) - - dropl fs n = checkdrop fs n Nothing $ \numcopies -> - Command.Drop.startLocal (Just afile) numcopies key knownpresentremote - - dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \numcopies -> - Command.Drop.startRemote (Just afile) numcopies key r - - safely a = either (const False) id <$> tryAnnex a - - slocs = S.fromList locs + liftAnnex $ handleDropsFrom locs syncrs reason fromhere key f knownpresentremote diff --git a/Assistant/Threads/TransferScanner.hs b/Assistant/Threads/TransferScanner.hs index ba302d6bb9..b001957891 100644 --- a/Assistant/Threads/TransferScanner.hs +++ b/Assistant/Threads/TransferScanner.hs @@ -156,7 +156,7 @@ expensiveScan urlrenderer rs = unless onlyweb $ batch <~> do syncrs <- syncDataRemotes <$> getDaemonStatus locs <- liftAnnex $ loggedLocations key present <- liftAnnex $ inAnnex key - handleDropsFrom locs syncrs + liftAnnex $ handleDropsFrom locs syncrs "expensive scan found too many copies of object" present key (Just f) Nothing liftAnnex $ do diff --git a/Command/Get.hs b/Command/Get.hs index 9adf79393c..52fbd25f9d 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -59,7 +59,11 @@ perform key afile = stopUnless (getViaTmp key $ getKeyFile key afile) $ {- Try to find a copy of the file in one of the remotes, - and copy it to here. -} getKeyFile :: Key -> AssociatedFile -> FilePath -> Annex Bool -getKeyFile key afile dest = dispatch =<< Remote.keyPossibilities key +getKeyFile key afile dest = getKeyFile' key afile dest + =<< Remote.keyPossibilities key + +getKeyFile' :: Key -> AssociatedFile -> FilePath -> [Remote] -> Annex Bool +getKeyFile' key afile dest = dispatch where dispatch [] = do showNote "not available" diff --git a/Command/Sync.hs b/Command/Sync.hs index 38a6a5c6a6..429a219bb5 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -1,7 +1,7 @@ {- git-annex command - - Copyright 2011 Joachim Breitner - - Copyright 2011,2012 Joey Hess + - Copyright 2011-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -10,10 +10,11 @@ module Command.Sync where import Common.Annex import Command -import qualified Remote import qualified Annex import qualified Annex.Branch import qualified Annex.Queue +import qualified Remote +import qualified Types.Remote as Remote import Annex.Direct import Annex.CatFile import Annex.Link @@ -26,19 +27,34 @@ import qualified Git import Git.Types (BlobType(..)) import qualified Types.Remote import qualified Remote.Git +import qualified Option import Types.Key import Config import Annex.ReplaceFile import Git.FileMode +import Annex.Wanted +import Annex.Content +import Command.Get (getKeyFile') +import Logs.Transfer +import Logs.Presence +import Logs.Location +import Annex.Drop import qualified Data.Set as S import Data.Hash.MD5 import Control.Concurrent.MVar def :: [Command] -def = [command "sync" (paramOptional (paramRepeating paramRemote)) +def = [withOptions syncOptions $ + command "sync" (paramOptional (paramRepeating paramRemote)) [seek] SectionCommon "synchronize local repository with remotes"] +syncOptions :: [Option] +syncOptions = [ contentOption ] + +contentOption :: Option +contentOption = Option.flag [] "content" "also transfer file contents" + -- syncing involves several operations, any of which can independently fail seek :: CommandSeek seek rs = do @@ -60,17 +76,26 @@ seek rs = do let withbranch a = a =<< getbranch remotes <- syncRemotes rs + let gitremotes = filter Remote.gitSyncableRemote remotes + + synccontent <- ifM (Annex.getFlag $ Option.name contentOption) + ( withFilesInGit (whenAnnexed $ syncContent remotes) [] + , return [] + ) + return $ concat [ [ commit ] , [ withbranch mergeLocal ] - , [ withbranch (pullRemote remote) | remote <- remotes ] + , map (withbranch . pullRemote) gitremotes , [ mergeAnnex ] + , synccontent , [ withbranch pushLocal ] - , [ withbranch (pushRemote remote) | remote <- remotes ] + , map (withbranch . pushRemote) gitremotes ] {- Merging may delete the current directory, so go to the top - - of the repo. -} + - of the repo. This also means that sync always acts on all files in the + - repository, not just on a subdirectory. -} prepMerge :: Annex () prepMerge = liftIO . setCurrentDirectory =<< fromRepo Git.repoPath @@ -83,21 +108,16 @@ remoteBranch remote = Git.Ref.underBase $ "refs/remotes/" ++ Remote.name remote syncRemotes :: [String] -> Annex [Remote] syncRemotes rs = ifM (Annex.getState Annex.fast) ( nub <$> pickfast , wanted ) where - pickfast = (++) <$> listed <*> (good =<< fastest <$> available) + pickfast = (++) <$> listed <*> (filterM good =<< fastest <$> available) wanted - | null rs = good =<< concat . Remote.byCost <$> available + | null rs = filterM good =<< concat . Remote.byCost <$> available | otherwise = listed - listed = do - l <- catMaybes <$> mapM (Remote.byName . Just) rs - let s = filter (not . Remote.syncableRemote) l - unless (null s) $ - error $ "cannot sync special remotes: " ++ - unwords (map Types.Remote.name s) - return l - available = filter Remote.syncableRemote - . filter (remoteAnnexSync . Types.Remote.gitconfig) + listed = catMaybes <$> mapM (Remote.byName . Just) rs + available = filter (remoteAnnexSync . Types.Remote.gitconfig) <$> Remote.remoteList - good = filterM $ Remote.Git.repoAvail . Types.Remote.repo + good r + | Remote.gitSyncableRemote r = Remote.Git.repoAvail $ Types.Remote.repo r + | otherwise = return True fastest = fromMaybe [] . headMaybe . Remote.byCost commit :: CommandStart @@ -152,6 +172,9 @@ mergeLocal (Just branch) = go =<< needmerge pushLocal :: Maybe Git.Ref -> CommandStart pushLocal Nothing = stop pushLocal (Just branch) = do + -- In case syncing content made changes to the git-annex branch, + -- commit it. + Annex.Branch.commit "update" -- Update the sync branch to match the new state of the branch inRepo $ updateBranch $ syncBranch branch -- In direct mode, we're operating on some special direct mode @@ -464,3 +487,64 @@ newer remote b = do ( inRepo $ Git.Branch.changed r b , return True ) + +{- If it's preferred content, and we don't have it, get it from one of the + - listed remotes (preferring the cheaper earlier ones). + - + - Send it to each remote that doesn't have it, and for which it's + - preferred content. + - + - Drop it locally if it's not preferred content (honoring numcopies). + - + - Drop it from each remote that has it, where it's not preferred content + - (honoring numcopies). + -} +syncContent :: [Remote] -> FilePath -> (Key, Backend) -> CommandStart +syncContent rs f (k, _) = do + locs <- loggedLocations k + let (have, lack) = partition (\r -> Remote.uuid r `elem` locs) rs + + results <- mapM run =<< concat <$> sequence + [ handleget have + , handleput lack + ] + handleDropsFrom locs rs "unwanted" True k (Just f) Nothing + if null results + then stop + else do + showStart "sync" f + next $ next $ return $ all id results + where + run a = do + r <- a + showEndResult r + return r + + wantget have = allM id + [ pure (not $ null have) + , not <$> inAnnex k + , wantGet True (Just f) + ] + handleget have = ifM (wantget have) + ( return [ get have ] + , return [] + ) + get have = do + showStart "get" f + getViaTmp k $ \dest -> getKeyFile' k (Just f) dest have + + wantput r + | Remote.readonly r || remoteAnnexReadOnly (Types.Remote.gitconfig r) = return False + | otherwise = wantSend True (Just f) (Remote.uuid r) + handleput lack = ifM (inAnnex k) + ( map put <$> (filterM wantput lack) + , return [] + ) + put dest = do + showStart "copy" f + showAction $ "to " ++ Remote.name dest + ok <- upload (Remote.uuid dest) k (Just f) noRetry $ + Remote.storeKey dest k (Just f) + when ok $ + Remote.logStatus dest k InfoPresent + return ok diff --git a/Remote.hs b/Remote.hs index e355b09751..3c838a6230 100644 --- a/Remote.hs +++ b/Remote.hs @@ -20,7 +20,7 @@ module Remote ( remoteTypes, remoteList, - syncableRemote, + gitSyncableRemote, remoteMap, uuidDescriptions, byName, diff --git a/Remote/List.hs b/Remote/List.hs index 31a9209b15..e3afc939ca 100644 --- a/Remote/List.hs +++ b/Remote/List.hs @@ -111,6 +111,6 @@ updateRemote remote = do | otherwise = return r {- Checks if a remote is syncable using git. -} -syncableRemote :: Remote -> Bool -syncableRemote r = remotetype r `elem` +gitSyncableRemote :: Remote -> Bool +gitSyncableRemote r = remotetype r `elem` [ Remote.Git.remote, Remote.GCrypt.remote ] diff --git a/debian/changelog b/debian/changelog index 7dd8019069..d41fe5e6df 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,8 @@ git-annex (5.20140118) UNRELEASED; urgency=medium + * sync --content: New option that makes the content of annexed files be + transferred. Similar to the assistant, this honors any configured + preferred content expressions. * Remove --json option from commands not supporting it. * status: Support --json. * list: Fix specifying of files to list. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index a3c52458f0..e8058720c0 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -156,8 +156,12 @@ subdirectories). are pushed to the remote, so they can be merged into its working tree by running "git annex sync" on the remote. - Note that sync does not transfer any annexed file contents from or - to the remote repositories; it only syncs the git repositories. + With the `--content` option, the contents of annexed files in the work + tree will also be uploaded and downloaded from remotes. By default, + this tries to get each annexed file that the local repository does not + yet have, and then copies each file to every remote that it is syncing with. + This behavior can be overridden by configuring the preferred content of + a repository. See see PREFERRED CONTENT below. * `merge` diff --git a/doc/preferred_content.mdwn b/doc/preferred_content.mdwn index 2e12dce497..9c698c8ba7 100644 --- a/doc/preferred_content.mdwn +++ b/doc/preferred_content.mdwn @@ -3,13 +3,15 @@ data always exist, and leaves it up to you to use commands like `git annex get` and `git annex drop` to move the content to the repositories you want to contain it. But sometimes, it can be good to have more fine-grained control over which repositories prefer to have which content. Configuring -this allows `git annex get --auto`, `git annex drop --auto`, etc to do -smarter things. +this allows the git-annex assistant as well as +`git annex get --auto`, `git annex drop --auto`, `git annex sync --content`, +etc to do smarter things. Preferred content settings can be edited using `git annex vicfg`, or viewed and set at the command line with `git annex wanted`. -Each repository can have its own settings, and other repositories may also -try to honor those settings. So there's no local `.git/config` setting it. +Each repository can have its own settings, and other repositories will +try to honor those settings when interacting with it. +So there's no local `.git/config` for preferred content settings. The idea is that you write an expression that files are matched against. If a file matches, it's preferred to have its content stored in the