diff --git a/Annex/Import.hs b/Annex/Import.hs index d268ad6dde..585deefa24 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -13,7 +13,9 @@ module Annex.Import ( ImportCommitConfig(..), buildImportCommit, buildImportTrees, - downloadImport + downloadImport, + filterImportableContents, + makeImportMatcher, ) where import Annex.Common @@ -41,6 +43,10 @@ import Messages.Progress import Utility.DataUnits import Logs.Export import Logs.Location +import Logs.PreferredContent +import Types.FileMatcher +import Annex.FileMatcher +import Utility.Matcher (isEmpty) import qualified Database.Export as Export import qualified Database.ContentIdentifier as CIDDb import qualified Logs.ContentIdentifier as CIDLog @@ -192,7 +198,7 @@ buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History t | otherwise -> do let oldimportedtrees = mapHistory historyCommitTree oldimported mknewcommits oldhc oldimportedtrees imported - ti' <- addBackNonPreferredContent remote ti + ti' <- addBackExportExcluded remote ti Just <$> makeRemoteTrackingBranchMergeCommit' trackingcommit importedcommit ti' where @@ -399,11 +405,11 @@ importKey (ContentIdentifier cid) size = stubKey -- special remote). -- -- That presents a problem: Merging the imported tree would result - -- in deletion of the non-preferred content. To avoid that happening, - -- this adds the non-preferred content back to the imported tree. + -- in deletion of the files that were excluded from export. + -- To avoid that happening, this adds them back to the imported tree. --} -addBackNonPreferredContent :: Remote -> Sha -> Annex Sha -addBackNonPreferredContent remote importtree = +addBackExportExcluded :: Remote -> Sha -> Annex Sha +addBackExportExcluded remote importtree = getExportExcluded (Remote.uuid remote) >>= \case [] -> return importtree excludedlist -> inRepo $ @@ -417,3 +423,60 @@ addBackNonPreferredContent remote importtree = (\imported _excluded -> imported) [] importtree + +{- Match the preferred content of the remote at import time. + - + - Only keyless tokens are supported, because the keys are not known + - until an imported file is downloaded, which is too late to bother + - excluding it from an import. + -} +makeImportMatcher :: Remote -> Annex (Either String (FileMatcher Annex)) +makeImportMatcher r = load preferredContentKeylessTokens >>= \case + Nothing -> return $ Right matchAll + Just (Right v) -> return $ Right v + Just (Left err) -> load preferredContentTokens >>= \case + Just (Left err') -> return $ Left err' + _ -> return $ Left $ + "The preferred content expression contains terms that cannot be checked when importing: " ++ err + where + load t = M.lookup (Remote.uuid r) . fst <$> preferredRequiredMapsLoad' t + +wantImport :: FileMatcher Annex -> ImportLocation -> ByteSize -> Annex Bool +wantImport matcher loc sz = checkMatcher' matcher mi mempty + where + mi = MatchingInfo $ ProvidedInfo + { providedFilePath = Right $ fromImportLocation loc + , providedKey = unavail "key" + , providedFileSize = Right sz + , providedMimeType = unavail "mime" + , providedMimeEncoding = unavail "mime" + } + -- This should never run, as long as the FileMatcher was generated + -- using the preferredContentKeylessTokens. + unavail v = Left $ error $ "Internal error: unavailable " ++ v + +{- If a file is not preferred content, but it was previously exported or + - imported to the remote, not importing it would result in a remote + - tracking branch that, when merged, would delete the file. + - + - To avoid that problem, such files are included in the import. + - The next export will remove them from the remote. + -} +shouldImport :: Export.ExportHandle -> FileMatcher Annex -> ImportLocation -> ByteSize -> Annex Bool +shouldImport dbhandle matcher loc sz = + wantImport matcher loc sz + <||> + liftIO (not . null <$> Export.getExportTreeKey dbhandle loc) + +filterImportableContents :: Remote -> FileMatcher Annex -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (ImportableContents (ContentIdentifier, ByteSize)) +filterImportableContents r matcher importable + | isEmpty matcher = return importable + | otherwise = do + dbhandle <- Export.openDb (Remote.uuid r) + go dbhandle importable + where + go dbhandle ic = ImportableContents + <$> filterM (match dbhandle) (importableContents ic) + <*> mapM (go dbhandle) (importableHistory ic) + + match dbhandle (loc, (_cid, sz)) = shouldImport dbhandle matcher loc sz diff --git a/CHANGELOG b/CHANGELOG index 0e78fb27b3..943c62cf12 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,10 @@ git-annex (7.20190508) UNRELEASED; urgency=medium annex.jobs=cpus, or using option --jobs=cpus or -Jcpus. * Honor preferred content of a special remote when exporting trees to it; unwanted files are filtered out of the tree that is exported. + * Importing from a special remote honors its preferred content too; + unwanted files are not imported. But, some preferred content + expressions can't be checked before files are imported, and trying to + import with such an expression will fail. * Improve shape of commit tree when importing from unversioned special remotes. diff --git a/Command/Import.hs b/Command/Import.hs index b108535f79..f890f982bd 100644 --- a/Command/Import.hs +++ b/Command/Import.hs @@ -293,9 +293,13 @@ listContents remote tvar = do showStart' "list" (Just (Remote.name remote)) next $ Remote.listImportableContents (Remote.importActions remote) >>= \case Nothing -> giveup $ "Unable to list contents of " ++ Remote.name remote - Just importable -> next $ do - liftIO $ atomically $ writeTVar tvar (Just importable) - return True + Just importable -> do + importable' <- makeImportMatcher remote >>= \case + Right matcher -> filterImportableContents remote matcher importable + Left err -> giveup $ "Cannot import from " ++ Remote.name remote ++ " because of a problem with its configuration: " ++ err + next $ do + liftIO $ atomically $ writeTVar tvar (Just importable') + return True commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents Key -> CommandStart commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig importable = do diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs index 16ffef1297..57bbeb6bd9 100644 --- a/Logs/PreferredContent.hs +++ b/Logs/PreferredContent.hs @@ -20,6 +20,7 @@ module Logs.PreferredContent ( setStandardGroup, defaultStandardGroup, preferredRequiredMapsLoad, + preferredRequiredMapsLoad', prop_standardGroups_parse, ) where @@ -71,24 +72,37 @@ requiredContentMap = maybe (snd <$> preferredRequiredMapsLoad preferredContentTo preferredRequiredMapsLoad :: (PreferredContentData -> [ParseToken (MatchFiles Annex)]) -> Annex (FileMatcherMap Annex, FileMatcherMap Annex) preferredRequiredMapsLoad mktokens = do + (pc, rc) <- preferredRequiredMapsLoad' mktokens + let pc' = handleunknown pc + let rc' = handleunknown rc + Annex.changeState $ \s -> s + { Annex.preferredcontentmap = Just pc' + , Annex.requiredcontentmap = Just rc' + } + return (pc', rc') + where + handleunknown = M.mapWithKey $ \u -> + fromRight (unknownMatcher u) + +preferredRequiredMapsLoad' :: (PreferredContentData -> [ParseToken (MatchFiles Annex)]) -> Annex (M.Map UUID (Either String (FileMatcher Annex)), M.Map UUID (Either String (FileMatcher Annex))) +preferredRequiredMapsLoad' mktokens = do groupmap <- groupMap configmap <- readRemoteLog let genmap l gm = - let mk u = fromRight (unknownMatcher u) . - makeMatcher groupmap configmap gm u mktokens + let mk u = makeMatcher groupmap configmap gm u mktokens in simpleMap . parseLogOldWithUUID (\u -> mk u . decodeBS <$> A.takeByteString) <$> Annex.Branch.get l pc <- genmap preferredContentLog =<< groupPreferredContentMapRaw rc <- genmap requiredContentLog M.empty - -- Required content is implicitly also preferred content, so - -- combine. - let m = M.unionWith combineMatchers pc rc - Annex.changeState $ \s -> s - { Annex.preferredcontentmap = Just m - , Annex.requiredcontentmap = Just rc - } - return (m, rc) + -- Required content is implicitly also preferred content, so combine. + let pc' = M.unionWith combiner pc rc + return (pc', rc) + where + combiner (Right a) (Right b) = Right (combineMatchers a b) + combiner (Left a) (Left b) = Left (a ++ " " ++ b) + combiner (Left a) (Right _) = Left a + combiner (Right _) (Left b) = Left b {- This intentionally never fails, even on unparsable expressions, - because the configuration is shared among repositories and newer diff --git a/Types/FileMatcher.hs b/Types/FileMatcher.hs index d7ad96d066..d0e24ba37d 100644 --- a/Types/FileMatcher.hs +++ b/Types/FileMatcher.hs @@ -31,7 +31,7 @@ data FileInfo = FileInfo } -- This is used when testing a matcher, with values to match against --- provided by the user, rather than queried from files. +-- provided in some way, rather than queried from files on disk. data ProvidedInfo = ProvidedInfo { providedFilePath :: OptInfo FilePath , providedKey :: OptInfo Key @@ -48,7 +48,7 @@ getInfo :: MonadIO m => OptInfo a -> m a getInfo (Right i) = return i getInfo (Left e) = liftIO e -type FileMatcherMap a = M.Map UUID (Utility.Matcher.Matcher (S.Set UUID -> MatchInfo -> a Bool)) +type FileMatcherMap a = M.Map UUID (FileMatcher a) type MkLimit a = String -> Either String (MatchFiles a) diff --git a/doc/git-annex-export.mdwn b/doc/git-annex-export.mdwn index fa92efb7e6..8d5b84dc8f 100644 --- a/doc/git-annex-export.mdwn +++ b/doc/git-annex-export.mdwn @@ -68,8 +68,6 @@ let an export overwrite the modified file; then `git annex import` will create a sequence of commits that includes the modified file, so the overwritten modification is not lost.) -# PREFERRED - # OPTIONS * `--to=remote` diff --git a/doc/git-annex-import.mdwn b/doc/git-annex-import.mdwn index de4dc76783..a89986f013 100644 --- a/doc/git-annex-import.mdwn +++ b/doc/git-annex-import.mdwn @@ -68,18 +68,14 @@ to tell it what branch to track. For example: If a preferred content expression is configured for the special remote, it will be honored when importing from it. Files that are not preferred content of the remote will not be imported from it, but will be left on the -remote. A couple of caveats: +remote. -References to directories in the preferred content expression -are relative to the top of the special remote, not of the git repository -it's being imported into. - -Preferred content expressions that relate to the content of a file will -make the file be downloaded from the special remote, even when it turns out -not to be preferred content. The download will only happen once for each -version of a file, and the unwanted content will be thrown away. Such -expressions include "copies=", "metadata=", and other things that depend on -the key, but not "smallerthan=", "largerthan=", "include=", "exclude=" +However, preferred content expressions that relate to the key +can't be matched when importing, because the content of the file is not +known. Importing will fail when such a preferred content expression is +set. This includes expressions containing "copies=", "metadata=", and other +things that depend on the key. Preferred content expressions containing +"include=", "exclude=" "smallerthan=", "largerthan=" will work. # IMPORTING FROM A DIRECTORY diff --git a/doc/git-annex-preferred-content.mdwn b/doc/git-annex-preferred-content.mdwn index d9209a7a9c..832100a94e 100644 --- a/doc/git-annex-preferred-content.mdwn +++ b/doc/git-annex-preferred-content.mdwn @@ -44,9 +44,9 @@ elsewhere to allow removing it). when you're done with them. Then you could configure your laptop to prefer to not retain those files, like this: `exclude=*/archive/*` - When a subdirectory is being exported to a special remote (see - [[git-annex-export]](1)), these match relative to the top of the - subdirectory. + When a subdirectory is being exported or imported to a special remote (see + [[git-annex-export]](1)) and [[git-annex-import]](1), these match relative + to the top of the subdirectory. * `copies=number` diff --git a/doc/todo/export_preferred_content.mdwn b/doc/todo/export_preferred_content.mdwn index cba1e43616..bc82528b31 100644 --- a/doc/todo/export_preferred_content.mdwn +++ b/doc/todo/export_preferred_content.mdwn @@ -4,6 +4,8 @@ But, in some situations, the user may want to export a subset of files, in a way that can be well expressed by a preferred content expression. > started work on this in the `preferred` branch. --[[Joey]] +> +> > And [[done]]! --[[Joey]] For example, they may want to export .mp3 files but not the .wav files used to produce those. @@ -39,10 +41,9 @@ exclude= etc match relative to the top of the exported tree when exporting a subtree. > done -Problem: Each `git-annex sync --content` re-filters the exported tree. +Note: Each `git-annex sync --content` re-filters the exported tree. Unnecessary work. If there were a way to look up the original tree that corresponds with the filtered exported tree, that could be avoided. -TODO ---- @@ -54,6 +55,7 @@ TODO > is added to the remote, it shouldn't be downloaded. Or a better example, > if directory Music is excluded from an android remote, importing from > it should exclude that directory. +> > done ## import after limited export @@ -167,6 +169,9 @@ TODO > and if they changed to `exclude=*.mp3 or metadata=tag=podcast` > and it did all that extra work, that would be surprising. +> > done; it seemed to make sense at least at first to make import +> > fail when the preferred content dependened on a key. + ## different preferred content for export and import? May be cases where this makes sense. For example, I might make my phone @@ -226,14 +231,10 @@ But, if some other file got deleted from the special remote after the export, the import would then not delete it. Alternatively, when a preferred content expression doesn't match a file at -import, could check if the same file was present in the last export. (With -same or different content.) If so, assume the preferred content has changed -and that the user does not want to delete this file, so keep it in the -import anyway (using the content that was last exported to it). -(The state does not currently differentiate between the last export -and the last import, so the file would keep being included in -imports until an export was made that removed it.) - -OR, don't match preferred content expressions on import at all; download -everything, and let the user delete unwanted imports locally. Does avoid -all these complications. +import, could check if the same file is known to be present on the remote +as of the last import or export. (With same or different content.) If so, +assume the preferred content has changed and that the user does not want to +delete this file, so keep it in the import anyway. This way the import does +not delete files from master, and when the next export removes it from +the remote it will still not get deleted from master. +> done