diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 2262a9b3ad..542f1cfecd 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -29,25 +29,35 @@ import Logs.Transfer import Remote.List import qualified Remote import Annex.CatFile -import Git.CatFile (catObjectStream) +import Git.CatFile import Annex.CurrentBranch import Annex.Content +import Annex.Link import Annex.InodeSentinal +import Annex.Concurrent import qualified Annex.Branch import qualified Annex.BranchState import qualified Database.Keys import qualified Utility.RawFilePath as R +import Utility.Tuple + +import Control.Concurrent.Async +import System.Posix.Types withFilesInGit :: WarnUnmatchWhen -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek -withFilesInGit ww a l = seekActions $ prepFiltered a $ - seekHelper ww LsFiles.inRepo l +withFilesInGit ww a l = seekFiltered a $ + seekHelper id ww LsFiles.inRepo l + +withFilesInGitAnnex :: WarnUnmatchWhen -> (RawFilePath -> Key -> CommandSeek) -> [WorkTreeItem] -> CommandSeek +withFilesInGitAnnex ww a l = seekFiltered' a $ + seekHelper fst3 ww LsFiles.inRepoDetails l withFilesInGitNonRecursive :: WarnUnmatchWhen -> String -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek withFilesInGitNonRecursive ww needforce a l = ifM (Annex.getState Annex.force) ( withFilesInGit ww a l , if null l then giveup needforce - else seekActions $ prepFiltered a (getfiles [] l) + else seekFiltered a (getfiles [] l) ) where getfiles c [] = return (reverse c) @@ -71,8 +81,8 @@ withFilesNotInGit a l = go =<< seek g <- gitRepo liftIO $ Git.Command.leaveZombie <$> LsFiles.notInRepo [] force (map (\(WorkTreeItem f) -> toRawFilePath f) l) g - go fs = seekActions $ prepFiltered a $ - return $ concat $ segmentPaths (map (\(WorkTreeItem f) -> toRawFilePath f) l) fs + go fs = seekFiltered a $ + return $ concat $ segmentPaths id (map (\(WorkTreeItem f) -> toRawFilePath f) l) fs withPathContents :: ((FilePath, FilePath) -> CommandSeek) -> CmdParams -> CommandSeek withPathContents a params = do @@ -94,21 +104,21 @@ withPathContents a params = do } withWords :: ([String] -> CommandSeek) -> CmdParams -> CommandSeek -withWords a params = seekActions $ return [a params] +withWords a params = a params withStrings :: (String -> CommandSeek) -> CmdParams -> CommandSeek -withStrings a params = seekActions $ return $ map a params +withStrings a params = sequence_ $ map a params withPairs :: ((String, String) -> CommandSeek) -> CmdParams -> CommandSeek -withPairs a params = seekActions $ return $ map a $ pairs [] params +withPairs a params = sequence_ $ map a $ pairs [] params where pairs c [] = reverse c pairs c (x:y:xs) = pairs ((x,y):c) xs pairs _ _ = giveup "expected pairs" withFilesToBeCommitted :: (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek -withFilesToBeCommitted a l = seekActions $ prepFiltered a $ - seekHelper WarnUnmatchWorkTreeItems (const LsFiles.stagedNotDeleted) l +withFilesToBeCommitted a l = seekFiltered a $ + seekHelper id WarnUnmatchWorkTreeItems (const LsFiles.stagedNotDeleted) l isOldUnlocked :: RawFilePath -> Annex Bool isOldUnlocked f = liftIO (notSymlink f) <&&> @@ -117,11 +127,10 @@ isOldUnlocked f = liftIO (notSymlink f) <&&> {- unlocked pointer files that are staged, and whose content has not been - modified-} withUnmodifiedUnlockedPointers :: WarnUnmatchWhen -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek -withUnmodifiedUnlockedPointers ww a l = seekActions $ - prepFiltered a unlockedfiles +withUnmodifiedUnlockedPointers ww a l = seekFiltered a unlockedfiles where unlockedfiles = filterM isUnmodifiedUnlocked - =<< seekHelper ww (const LsFiles.typeChangedStaged) l + =<< seekHelper id ww (const LsFiles.typeChangedStaged) l isUnmodifiedUnlocked :: RawFilePath -> Annex Bool isUnmodifiedUnlocked f = catKeyFile f >>= \case @@ -130,11 +139,11 @@ isUnmodifiedUnlocked f = catKeyFile f >>= \case {- Finds files that may be modified. -} withFilesMaybeModified :: WarnUnmatchWhen -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek -withFilesMaybeModified ww a params = seekActions $ - prepFiltered a $ seekHelper ww LsFiles.modified params +withFilesMaybeModified ww a params = seekFiltered a $ + seekHelper id ww LsFiles.modified params withKeys :: (Key -> CommandSeek) -> CmdParams -> CommandSeek -withKeys a l = seekActions $ return $ map (a . parse) l +withKeys a l = sequence_ $ map (a . parse) l where parse p = fromMaybe (giveup "bad key") $ deserializeKey p @@ -251,23 +260,51 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do forM_ ts $ \(t, i) -> keyaction (transferKey t, mkActionItem (t, i)) -prepFiltered :: (RawFilePath -> CommandSeek) -> Annex [RawFilePath] -> Annex [CommandSeek] -prepFiltered a fs = do +seekFiltered :: (RawFilePath -> CommandSeek) -> Annex [RawFilePath] -> Annex () +seekFiltered a fs = do matcher <- Limit.getMatcher - map (process matcher) <$> fs + sequence_ =<< (map (process matcher) <$> fs) where process matcher f = whenM (matcher $ MatchingFile $ FileInfo f f) $ a f -seekActions :: Annex [CommandSeek] -> Annex () -seekActions gen = sequence_ =<< gen +seekFiltered' :: (RawFilePath -> Key -> CommandSeek) -> Annex [(RawFilePath, Git.Sha, FileMode)] -> Annex () +seekFiltered' a fs = do + g <- Annex.gitRepo + catObjectStream' g $ \feeder closer reader -> do + tid <- liftIO . async =<< forkState (gofeed feeder closer) + goread reader + join (liftIO (wait tid)) + where + gofeed feeder closer = do + matcher <- Limit.getMatcher + l <- fs + forM_ l $ process matcher feeder + liftIO closer + + process matcher feeder (f, sha, mode) = + -- TODO handle non-symlink separately to avoid + -- catting large files + -- If the matcher needs to look up a key, it should be run + -- in goread, not here, and the key passed in. OTOH, if + -- the matcher does not need to look up a key, it's more + -- efficient to put it here, to avoid catting files that + -- will not be matched. + whenM (matcher $ MatchingFile $ FileInfo f f) $ + liftIO $ feeder (f, sha) -seekHelper :: WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([RawFilePath], IO Bool)) -> [WorkTreeItem] -> Annex [RawFilePath] -seekHelper ww a l = do + goread reader = liftIO reader >>= \case + Just (f, content) -> do + maybe noop (a f) (parseLinkTargetOrPointerLazy =<< content) + goread reader + _ -> return () + +seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> [WorkTreeItem] -> Annex [a] +seekHelper c ww a l = do os <- seekOptions ww inRepo $ \g -> concat . concat <$> forM (segmentXargsOrdered l') - (runSegmentPaths (\fs -> Git.Command.leaveZombie <$> a os fs g) . map toRawFilePath) + (runSegmentPaths c (\fs -> Git.Command.leaveZombie <$> a os fs g) . map toRawFilePath) where l' = map (\(WorkTreeItem f) -> f) l diff --git a/Command/Export.hs b/Command/Export.hs index eba9532a76..d1fde25d1e 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -200,7 +200,7 @@ changeExport r db (PreferredFiltered new) = do mapdiff a oldtreesha newtreesha = do (diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive oldtreesha newtreesha - seekActions $ pure $ map a diff + sequence_ $ map a diff void $ liftIO cleanup -- Map of old and new filenames for each changed ExportKey in a diff. diff --git a/Command/Get.hs b/Command/Get.hs index b4cd4d62e4..36156a49b4 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -40,12 +40,13 @@ optParser desc = GetOptions seek :: GetOptions -> CommandSeek seek o = startConcurrency downloadStages $ do from <- maybe (pure Nothing) (Just <$$> getParsed) (getFrom o) - let go = whenAnnexed $ start o from + let go = start o from case batchOption o of - Batch fmt -> batchFilesMatching fmt (go . toRawFilePath) + Batch fmt -> batchFilesMatching fmt + (whenAnnexed go . toRawFilePath) NoBatch -> withKeyOptions (keyOptions o) (autoMode o) (commandAction . startKeys from) - (withFilesInGit ww (commandAction . go)) + (withFilesInGitAnnex ww (\f k -> commandAction (go f k))) =<< workTreeItems ww (getFiles o) where ww = WarnUnmatchLsFiles diff --git a/Command/Multicast.hs b/Command/Multicast.hs index 9dcded33db..5cd688601d 100644 --- a/Command/Multicast.hs +++ b/Command/Multicast.hs @@ -130,7 +130,7 @@ send ups fs = do starting "sending files" (ActionItemOther Nothing) $ withTmpFile "send" $ \t h -> do let ww = WarnUnmatchLsFiles - fs' <- seekHelper ww LsFiles.inRepo + fs' <- seekHelper id ww LsFiles.inRepo =<< workTreeItems ww fs matcher <- Limit.getMatcher let addlist f o = whenM (matcher $ MatchingFile $ FileInfo f f) $ diff --git a/Command/Sync.hs b/Command/Sync.hs index 4e4940fafd..ff977795c8 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -652,11 +652,11 @@ seekSyncContent o rs currbranch = do liftIO $ not <$> isEmptyMVar mvar where seekworktree mvar l bloomfeeder = - seekHelper ww LsFiles.inRepo l + seekHelper id ww LsFiles.inRepo l >>= gofiles bloomfeeder mvar seekincludinghidden origbranch mvar l bloomfeeder = - seekHelper ww (LsFiles.inRepoOrBranch origbranch) l + seekHelper id ww (LsFiles.inRepoOrBranch origbranch) l >>= gofiles bloomfeeder mvar ww = WarnUnmatchLsFiles diff --git a/Utility/Path.hs b/Utility/Path.hs index 4228e200c6..56eed759ab 100644 --- a/Utility/Path.hs +++ b/Utility/Path.hs @@ -214,22 +214,23 @@ prop_relPathDirToFile_regressionTest = same_dir_shortcurcuits_at_difference - we stop preserving ordering at that point. Presumably a user passing - that many paths in doesn't care too much about order of the later ones. -} -segmentPaths :: [RawFilePath] -> [RawFilePath] -> [[RawFilePath]] -segmentPaths [] new = [new] -segmentPaths [_] new = [new] -- optimisation -segmentPaths (l:ls) new = found : segmentPaths ls rest +segmentPaths :: (a -> RawFilePath) -> [RawFilePath] -> [a] -> [[a]] +segmentPaths _ [] new = [new] +segmentPaths _ [_] new = [new] -- optimisation +segmentPaths c (l:ls) new = found : segmentPaths c ls rest where (found, rest) = if length ls < 100 then partition inl new else break (not . inl) new - inl f = fromRawFilePath l `dirContains` fromRawFilePath f + inl f = l' `dirContains` fromRawFilePath (c f) + l' = fromRawFilePath l {- This assumes that it's cheaper to call segmentPaths on the result, - than it would be to run the action separately with each path. In - the case of git file list commands, that assumption tends to hold. -} -runSegmentPaths :: ([RawFilePath] -> IO [RawFilePath]) -> [RawFilePath] -> IO [[RawFilePath]] -runSegmentPaths a paths = segmentPaths paths <$> a paths +runSegmentPaths :: (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[a]] +runSegmentPaths c a paths = segmentPaths c paths <$> a paths {- Converts paths in the home directory to use ~/ -} relHome :: FilePath -> IO String