From 4c9ad1de46ddc6b477bd1631f2bcf720f2a77acf Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 10 Jul 2020 13:54:52 -0400 Subject: [PATCH 1/6] optimisation: stream keys through git cat-file --buffer This is only implemented for git-annex get so far. It makes git-annex get nearly twice as fast in a repo with 10k files, all of them present! But, see the TODO for some caveats. --- CmdLine/Seek.hs | 87 +++++++++++++++++++++++++++++++------------- Command/Export.hs | 2 +- Command/Get.hs | 7 ++-- Command/Multicast.hs | 2 +- Command/Sync.hs | 4 +- Utility/Path.hs | 15 ++++---- 6 files changed, 78 insertions(+), 39 deletions(-) diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 2262a9b3ad..542f1cfecd 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -29,25 +29,35 @@ import Logs.Transfer import Remote.List import qualified Remote import Annex.CatFile -import Git.CatFile (catObjectStream) +import Git.CatFile import Annex.CurrentBranch import Annex.Content +import Annex.Link import Annex.InodeSentinal +import Annex.Concurrent import qualified Annex.Branch import qualified Annex.BranchState import qualified Database.Keys import qualified Utility.RawFilePath as R +import Utility.Tuple + +import Control.Concurrent.Async +import System.Posix.Types withFilesInGit :: WarnUnmatchWhen -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek -withFilesInGit ww a l = seekActions $ prepFiltered a $ - seekHelper ww LsFiles.inRepo l +withFilesInGit ww a l = seekFiltered a $ + seekHelper id ww LsFiles.inRepo l + +withFilesInGitAnnex :: WarnUnmatchWhen -> (RawFilePath -> Key -> CommandSeek) -> [WorkTreeItem] -> CommandSeek +withFilesInGitAnnex ww a l = seekFiltered' a $ + seekHelper fst3 ww LsFiles.inRepoDetails l withFilesInGitNonRecursive :: WarnUnmatchWhen -> String -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek withFilesInGitNonRecursive ww needforce a l = ifM (Annex.getState Annex.force) ( withFilesInGit ww a l , if null l then giveup needforce - else seekActions $ prepFiltered a (getfiles [] l) + else seekFiltered a (getfiles [] l) ) where getfiles c [] = return (reverse c) @@ -71,8 +81,8 @@ withFilesNotInGit a l = go =<< seek g <- gitRepo liftIO $ Git.Command.leaveZombie <$> LsFiles.notInRepo [] force (map (\(WorkTreeItem f) -> toRawFilePath f) l) g - go fs = seekActions $ prepFiltered a $ - return $ concat $ segmentPaths (map (\(WorkTreeItem f) -> toRawFilePath f) l) fs + go fs = seekFiltered a $ + return $ concat $ segmentPaths id (map (\(WorkTreeItem f) -> toRawFilePath f) l) fs withPathContents :: ((FilePath, FilePath) -> CommandSeek) -> CmdParams -> CommandSeek withPathContents a params = do @@ -94,21 +104,21 @@ withPathContents a params = do } withWords :: ([String] -> CommandSeek) -> CmdParams -> CommandSeek -withWords a params = seekActions $ return [a params] +withWords a params = a params withStrings :: (String -> CommandSeek) -> CmdParams -> CommandSeek -withStrings a params = seekActions $ return $ map a params +withStrings a params = sequence_ $ map a params withPairs :: ((String, String) -> CommandSeek) -> CmdParams -> CommandSeek -withPairs a params = seekActions $ return $ map a $ pairs [] params +withPairs a params = sequence_ $ map a $ pairs [] params where pairs c [] = reverse c pairs c (x:y:xs) = pairs ((x,y):c) xs pairs _ _ = giveup "expected pairs" withFilesToBeCommitted :: (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek -withFilesToBeCommitted a l = seekActions $ prepFiltered a $ - seekHelper WarnUnmatchWorkTreeItems (const LsFiles.stagedNotDeleted) l +withFilesToBeCommitted a l = seekFiltered a $ + seekHelper id WarnUnmatchWorkTreeItems (const LsFiles.stagedNotDeleted) l isOldUnlocked :: RawFilePath -> Annex Bool isOldUnlocked f = liftIO (notSymlink f) <&&> @@ -117,11 +127,10 @@ isOldUnlocked f = liftIO (notSymlink f) <&&> {- unlocked pointer files that are staged, and whose content has not been - modified-} withUnmodifiedUnlockedPointers :: WarnUnmatchWhen -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek -withUnmodifiedUnlockedPointers ww a l = seekActions $ - prepFiltered a unlockedfiles +withUnmodifiedUnlockedPointers ww a l = seekFiltered a unlockedfiles where unlockedfiles = filterM isUnmodifiedUnlocked - =<< seekHelper ww (const LsFiles.typeChangedStaged) l + =<< seekHelper id ww (const LsFiles.typeChangedStaged) l isUnmodifiedUnlocked :: RawFilePath -> Annex Bool isUnmodifiedUnlocked f = catKeyFile f >>= \case @@ -130,11 +139,11 @@ isUnmodifiedUnlocked f = catKeyFile f >>= \case {- Finds files that may be modified. -} withFilesMaybeModified :: WarnUnmatchWhen -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek -withFilesMaybeModified ww a params = seekActions $ - prepFiltered a $ seekHelper ww LsFiles.modified params +withFilesMaybeModified ww a params = seekFiltered a $ + seekHelper id ww LsFiles.modified params withKeys :: (Key -> CommandSeek) -> CmdParams -> CommandSeek -withKeys a l = seekActions $ return $ map (a . parse) l +withKeys a l = sequence_ $ map (a . parse) l where parse p = fromMaybe (giveup "bad key") $ deserializeKey p @@ -251,23 +260,51 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do forM_ ts $ \(t, i) -> keyaction (transferKey t, mkActionItem (t, i)) -prepFiltered :: (RawFilePath -> CommandSeek) -> Annex [RawFilePath] -> Annex [CommandSeek] -prepFiltered a fs = do +seekFiltered :: (RawFilePath -> CommandSeek) -> Annex [RawFilePath] -> Annex () +seekFiltered a fs = do matcher <- Limit.getMatcher - map (process matcher) <$> fs + sequence_ =<< (map (process matcher) <$> fs) where process matcher f = whenM (matcher $ MatchingFile $ FileInfo f f) $ a f -seekActions :: Annex [CommandSeek] -> Annex () -seekActions gen = sequence_ =<< gen +seekFiltered' :: (RawFilePath -> Key -> CommandSeek) -> Annex [(RawFilePath, Git.Sha, FileMode)] -> Annex () +seekFiltered' a fs = do + g <- Annex.gitRepo + catObjectStream' g $ \feeder closer reader -> do + tid <- liftIO . async =<< forkState (gofeed feeder closer) + goread reader + join (liftIO (wait tid)) + where + gofeed feeder closer = do + matcher <- Limit.getMatcher + l <- fs + forM_ l $ process matcher feeder + liftIO closer + + process matcher feeder (f, sha, mode) = + -- TODO handle non-symlink separately to avoid + -- catting large files + -- If the matcher needs to look up a key, it should be run + -- in goread, not here, and the key passed in. OTOH, if + -- the matcher does not need to look up a key, it's more + -- efficient to put it here, to avoid catting files that + -- will not be matched. + whenM (matcher $ MatchingFile $ FileInfo f f) $ + liftIO $ feeder (f, sha) -seekHelper :: WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([RawFilePath], IO Bool)) -> [WorkTreeItem] -> Annex [RawFilePath] -seekHelper ww a l = do + goread reader = liftIO reader >>= \case + Just (f, content) -> do + maybe noop (a f) (parseLinkTargetOrPointerLazy =<< content) + goread reader + _ -> return () + +seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> [WorkTreeItem] -> Annex [a] +seekHelper c ww a l = do os <- seekOptions ww inRepo $ \g -> concat . concat <$> forM (segmentXargsOrdered l') - (runSegmentPaths (\fs -> Git.Command.leaveZombie <$> a os fs g) . map toRawFilePath) + (runSegmentPaths c (\fs -> Git.Command.leaveZombie <$> a os fs g) . map toRawFilePath) where l' = map (\(WorkTreeItem f) -> f) l diff --git a/Command/Export.hs b/Command/Export.hs index eba9532a76..d1fde25d1e 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -200,7 +200,7 @@ changeExport r db (PreferredFiltered new) = do mapdiff a oldtreesha newtreesha = do (diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive oldtreesha newtreesha - seekActions $ pure $ map a diff + sequence_ $ map a diff void $ liftIO cleanup -- Map of old and new filenames for each changed ExportKey in a diff. diff --git a/Command/Get.hs b/Command/Get.hs index b4cd4d62e4..36156a49b4 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -40,12 +40,13 @@ optParser desc = GetOptions seek :: GetOptions -> CommandSeek seek o = startConcurrency downloadStages $ do from <- maybe (pure Nothing) (Just <$$> getParsed) (getFrom o) - let go = whenAnnexed $ start o from + let go = start o from case batchOption o of - Batch fmt -> batchFilesMatching fmt (go . toRawFilePath) + Batch fmt -> batchFilesMatching fmt + (whenAnnexed go . toRawFilePath) NoBatch -> withKeyOptions (keyOptions o) (autoMode o) (commandAction . startKeys from) - (withFilesInGit ww (commandAction . go)) + (withFilesInGitAnnex ww (\f k -> commandAction (go f k))) =<< workTreeItems ww (getFiles o) where ww = WarnUnmatchLsFiles diff --git a/Command/Multicast.hs b/Command/Multicast.hs index 9dcded33db..5cd688601d 100644 --- a/Command/Multicast.hs +++ b/Command/Multicast.hs @@ -130,7 +130,7 @@ send ups fs = do starting "sending files" (ActionItemOther Nothing) $ withTmpFile "send" $ \t h -> do let ww = WarnUnmatchLsFiles - fs' <- seekHelper ww LsFiles.inRepo + fs' <- seekHelper id ww LsFiles.inRepo =<< workTreeItems ww fs matcher <- Limit.getMatcher let addlist f o = whenM (matcher $ MatchingFile $ FileInfo f f) $ diff --git a/Command/Sync.hs b/Command/Sync.hs index 4e4940fafd..ff977795c8 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -652,11 +652,11 @@ seekSyncContent o rs currbranch = do liftIO $ not <$> isEmptyMVar mvar where seekworktree mvar l bloomfeeder = - seekHelper ww LsFiles.inRepo l + seekHelper id ww LsFiles.inRepo l >>= gofiles bloomfeeder mvar seekincludinghidden origbranch mvar l bloomfeeder = - seekHelper ww (LsFiles.inRepoOrBranch origbranch) l + seekHelper id ww (LsFiles.inRepoOrBranch origbranch) l >>= gofiles bloomfeeder mvar ww = WarnUnmatchLsFiles diff --git a/Utility/Path.hs b/Utility/Path.hs index 4228e200c6..56eed759ab 100644 --- a/Utility/Path.hs +++ b/Utility/Path.hs @@ -214,22 +214,23 @@ prop_relPathDirToFile_regressionTest = same_dir_shortcurcuits_at_difference - we stop preserving ordering at that point. Presumably a user passing - that many paths in doesn't care too much about order of the later ones. -} -segmentPaths :: [RawFilePath] -> [RawFilePath] -> [[RawFilePath]] -segmentPaths [] new = [new] -segmentPaths [_] new = [new] -- optimisation -segmentPaths (l:ls) new = found : segmentPaths ls rest +segmentPaths :: (a -> RawFilePath) -> [RawFilePath] -> [a] -> [[a]] +segmentPaths _ [] new = [new] +segmentPaths _ [_] new = [new] -- optimisation +segmentPaths c (l:ls) new = found : segmentPaths c ls rest where (found, rest) = if length ls < 100 then partition inl new else break (not . inl) new - inl f = fromRawFilePath l `dirContains` fromRawFilePath f + inl f = l' `dirContains` fromRawFilePath (c f) + l' = fromRawFilePath l {- This assumes that it's cheaper to call segmentPaths on the result, - than it would be to run the action separately with each path. In - the case of git file list commands, that assumption tends to hold. -} -runSegmentPaths :: ([RawFilePath] -> IO [RawFilePath]) -> [RawFilePath] -> IO [[RawFilePath]] -runSegmentPaths a paths = segmentPaths paths <$> a paths +runSegmentPaths :: (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[a]] +runSegmentPaths c a paths = segmentPaths c paths <$> a paths {- Converts paths in the home directory to use ~/ -} relHome :: FilePath -> IO String From 7a42a479021b46c6e09f74bb7d41b4b083e90981 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 10 Jul 2020 14:17:35 -0400 Subject: [PATCH 2/6] renaming --- Annex/View.hs | 2 +- Annex/WorkTree.hs | 14 +++++++------- Assistant/Threads/TransferScanner.hs | 2 +- Assistant/Threads/Watcher.hs | 2 +- Build/DistributionUpdate.hs | 2 +- CmdLine/Seek.hs | 11 ++++------- Command/FromKey.hs | 2 +- Command/MetaData.hs | 2 +- Command/Multicast.hs | 2 +- Command/TestRemote.hs | 2 +- Command/Unused.hs | 2 +- Limit.hs | 2 +- Test.hs | 4 ++-- Test/Framework.hs | 4 ++-- Upgrade/V0.hs | 4 ++-- Upgrade/V1.hs | 6 +++--- Upgrade/V5.hs | 2 +- 17 files changed, 31 insertions(+), 34 deletions(-) diff --git a/Annex/View.hs b/Annex/View.hs index fecee7395f..fa9a7b632d 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -351,7 +351,7 @@ applyView' mkviewedfile getfilemetadata view = do withUpdateIndex viewg $ \uh -> do forM_ l $ \(f, sha, mode) -> do topf <- inRepo (toTopFilePath f) - go uh topf sha (toTreeItemType mode) =<< lookupFile f + go uh topf sha (toTreeItemType mode) =<< lookupKey f liftIO $ void clean genViewBranch view where diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs index f8ab63a1bb..ab28614b7d 100644 --- a/Annex/WorkTree.hs +++ b/Annex/WorkTree.hs @@ -35,8 +35,8 @@ import Control.Concurrent - When in an adjusted branch that may have hidden the file, looks for a - pointer to a key in the original branch. -} -lookupFile :: RawFilePath -> Annex (Maybe Key) -lookupFile = lookupFile' catkeyfile +lookupKey :: RawFilePath -> Annex (Maybe Key) +lookupKey = lookupKey' catkeyfile where catkeyfile file = ifM (liftIO $ doesFileExist $ fromRawFilePath file) @@ -44,8 +44,8 @@ lookupFile = lookupFile' catkeyfile , catKeyFileHidden file =<< getCurrentBranch ) -lookupFileNotHidden :: RawFilePath -> Annex (Maybe Key) -lookupFileNotHidden = lookupFile' catkeyfile +lookupKeyNotHidden :: RawFilePath -> Annex (Maybe Key) +lookupKeyNotHidden = lookupKey' catkeyfile where catkeyfile file = ifM (liftIO $ doesFileExist $ fromRawFilePath file) @@ -53,8 +53,8 @@ lookupFileNotHidden = lookupFile' catkeyfile , return Nothing ) -lookupFile' :: (RawFilePath -> Annex (Maybe Key)) -> RawFilePath -> Annex (Maybe Key) -lookupFile' catkeyfile file = isAnnexLink file >>= \case +lookupKey' :: (RawFilePath -> Annex (Maybe Key)) -> RawFilePath -> Annex (Maybe Key) +lookupKey' catkeyfile file = isAnnexLink file >>= \case Just key -> return (Just key) Nothing -> catkeyfile file @@ -64,7 +64,7 @@ whenAnnexed :: (RawFilePath -> Key -> Annex (Maybe a)) -> RawFilePath -> Annex ( whenAnnexed a file = ifAnnexed file (a file) (return Nothing) ifAnnexed :: RawFilePath -> (Key -> Annex a) -> Annex a -> Annex a -ifAnnexed file yes no = maybe no yes =<< lookupFile file +ifAnnexed file yes no = maybe no yes =<< lookupKey file {- Find all unlocked files and update the keys database for them. - diff --git a/Assistant/Threads/TransferScanner.hs b/Assistant/Threads/TransferScanner.hs index ff6404cb86..5c4e8313cb 100644 --- a/Assistant/Threads/TransferScanner.hs +++ b/Assistant/Threads/TransferScanner.hs @@ -145,7 +145,7 @@ expensiveScan urlrenderer rs = batch <~> do (unwanted', ts) <- maybe (return (unwanted, [])) (findtransfers f unwanted) - =<< liftAnnex (lookupFile f) + =<< liftAnnex (lookupKey f) mapM_ (enqueue f) ts {- Delay for a short time to avoid using too much CPU. -} diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index c5b730c696..20874d27a7 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -289,7 +289,7 @@ onAddUnlocked' contentchanged addassociatedfile addlink samefilestatus symlinkss onAddSymlink :: Handler onAddSymlink file filestatus = unlessIgnored file $ do linktarget <- liftIO (catchMaybeIO $ readSymbolicLink file) - kv <- liftAnnex (lookupFile (toRawFilePath file)) + kv <- liftAnnex (lookupKey (toRawFilePath file)) onAddSymlink' linktarget kv file filestatus onAddSymlink' :: Maybe String -> Maybe Key -> Handler diff --git a/Build/DistributionUpdate.hs b/Build/DistributionUpdate.hs index b221518879..5e83d62600 100644 --- a/Build/DistributionUpdate.hs +++ b/Build/DistributionUpdate.hs @@ -128,7 +128,7 @@ makeinfos updated version = do now <- liftIO getCurrentTime liftIO $ putStrLn $ "building info files" forM_ updated $ \(f, bv) -> do - v <- lookupFile (toRawFilePath f) + v <- lookupKey (toRawFilePath f) case v of Nothing -> noop Just k -> whenM (inAnnex k) $ do diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 542f1cfecd..61093974c5 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -49,7 +49,7 @@ withFilesInGit ww a l = seekFiltered a $ seekHelper id ww LsFiles.inRepo l withFilesInGitAnnex :: WarnUnmatchWhen -> (RawFilePath -> Key -> CommandSeek) -> [WorkTreeItem] -> CommandSeek -withFilesInGitAnnex ww a l = seekFiltered' a $ +withFilesInGitAnnex ww a l = seekFilteredKeys a $ seekHelper fst3 ww LsFiles.inRepoDetails l withFilesInGitNonRecursive :: WarnUnmatchWhen -> String -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek @@ -120,10 +120,6 @@ withFilesToBeCommitted :: (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> Comm withFilesToBeCommitted a l = seekFiltered a $ seekHelper id WarnUnmatchWorkTreeItems (const LsFiles.stagedNotDeleted) l -isOldUnlocked :: RawFilePath -> Annex Bool -isOldUnlocked f = liftIO (notSymlink f) <&&> - (isJust <$> catKeyFile f <||> isJust <$> catKeyFileHEAD f) - {- unlocked pointer files that are staged, and whose content has not been - modified-} withUnmodifiedUnlockedPointers :: WarnUnmatchWhen -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek @@ -268,8 +264,9 @@ seekFiltered a fs = do process matcher f = whenM (matcher $ MatchingFile $ FileInfo f f) $ a f -seekFiltered' :: (RawFilePath -> Key -> CommandSeek) -> Annex [(RawFilePath, Git.Sha, FileMode)] -> Annex () -seekFiltered' a fs = do +-- This is siginificantly faster than using lookupKey after seekFiltered. +seekFilteredKeys :: (RawFilePath -> Key -> CommandSeek) -> Annex [(RawFilePath, Git.Sha, FileMode)] -> Annex () +seekFilteredKeys a fs = do g <- Annex.gitRepo catObjectStream' g $ \feeder closer reader -> do tid <- liftIO . async =<< forkState (gofeed feeder closer) diff --git a/Command/FromKey.hs b/Command/FromKey.hs index ecd8bd294f..0b874ac5fd 100644 --- a/Command/FromKey.hs +++ b/Command/FromKey.hs @@ -84,7 +84,7 @@ keyOpt s = case parseURI s of Nothing -> giveup $ "bad key/url " ++ s perform :: Key -> FilePath -> CommandPerform -perform key file = lookupFileNotHidden (toRawFilePath file) >>= \case +perform key file = lookupKeyNotHidden (toRawFilePath file) >>= \case Nothing -> ifM (liftIO $ doesFileExist file) ( hasothercontent , do diff --git a/Command/MetaData.hs b/Command/MetaData.hs index 038b2f32e9..29229ac9c2 100644 --- a/Command/MetaData.hs +++ b/Command/MetaData.hs @@ -168,7 +168,7 @@ parseJSONInput i = case eitherDecode (BU.fromString i) of startBatch :: (Either RawFilePath Key, MetaData) -> CommandStart startBatch (i, (MetaData m)) = case i of Left f -> do - mk <- lookupFile f + mk <- lookupKey f case mk of Just k -> go k (mkActionItem (k, AssociatedFile (Just f))) Nothing -> giveup $ "not an annexed file: " ++ fromRawFilePath f diff --git a/Command/Multicast.hs b/Command/Multicast.hs index 5cd688601d..472d26ceb9 100644 --- a/Command/Multicast.hs +++ b/Command/Multicast.hs @@ -136,7 +136,7 @@ send ups fs = do let addlist f o = whenM (matcher $ MatchingFile $ FileInfo f f) $ liftIO $ hPutStrLn h o forM_ fs' $ \f -> do - mk <- lookupFile f + mk <- lookupKey f case mk of Nothing -> noop Just k -> withObjectLoc k $ diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 221bec4876..19a522ba65 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -439,7 +439,7 @@ randKey sz = withTmpFile "randkey" $ \f h -> do return k getReadonlyKey :: Remote -> FilePath -> Annex Key -getReadonlyKey r f = lookupFile (toRawFilePath f) >>= \case +getReadonlyKey r f = lookupKey (toRawFilePath f) >>= \case Nothing -> giveup $ f ++ " is not an annexed file" Just k -> do unlessM (inAnnex k) $ diff --git a/Command/Unused.hs b/Command/Unused.hs index ee777e79d3..8fd61e7dfd 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -215,7 +215,7 @@ withKeysReferenced' mdir initial a = do Just dir -> inRepo $ LsFiles.inRepo [] [toRawFilePath dir] go v [] = return v go v (f:fs) = do - mk <- lookupFile f + mk <- lookupKey f case mk of Nothing -> go v fs Just k -> do diff --git a/Limit.hs b/Limit.hs index 2069822711..c5a3b85658 100644 --- a/Limit.hs +++ b/Limit.hs @@ -372,7 +372,7 @@ addAccessedWithin duration = do secs = fromIntegral (durationSeconds duration) lookupFileKey :: FileInfo -> Annex (Maybe Key) -lookupFileKey = lookupFile . currFile +lookupFileKey = lookupKey . currFile checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a diff --git a/Test.hs b/Test.hs index f9c1b97f91..6f0565625e 100644 --- a/Test.hs +++ b/Test.hs @@ -704,7 +704,7 @@ test_lock_force = intmpclonerepo $ do git_annex "get" [annexedfile] @? "get of file failed" git_annex "unlock" [annexedfile] @? "unlock failed" annexeval $ do - Just k <- Annex.WorkTree.lookupFile (toRawFilePath annexedfile) + Just k <- Annex.WorkTree.lookupKey (toRawFilePath annexedfile) Database.Keys.removeInodeCaches k Database.Keys.closeDb liftIO . nukeFile =<< Annex.fromRepo Annex.Locations.gitAnnexKeysDbIndexCache @@ -1680,7 +1680,7 @@ test_crypto = do (c,k) <- annexeval $ do uuid <- Remote.nameToUUID "foo" rs <- Logs.Remote.readRemoteLog - Just k <- Annex.WorkTree.lookupFile (toRawFilePath annexedfile) + Just k <- Annex.WorkTree.lookupKey (toRawFilePath annexedfile) return (fromJust $ M.lookup uuid rs, k) let key = if scheme `elem` ["hybrid","pubkey"] then Just $ Utility.Gpg.KeyIds [Utility.Gpg.testKeyId] diff --git a/Test/Framework.hs b/Test/Framework.hs index 5e95d750d8..ca4d009a5c 100644 --- a/Test/Framework.hs +++ b/Test/Framework.hs @@ -316,7 +316,7 @@ checkdangling f = ifM (annexeval Config.crippledFileSystem) checklocationlog :: FilePath -> Bool -> Assertion checklocationlog f expected = do thisuuid <- annexeval Annex.UUID.getUUID - r <- annexeval $ Annex.WorkTree.lookupFile (toRawFilePath f) + r <- annexeval $ Annex.WorkTree.lookupKey (toRawFilePath f) case r of Just k -> do uuids <- annexeval $ Remote.keyLocations k @@ -327,7 +327,7 @@ checklocationlog f expected = do checkbackend :: FilePath -> Types.Backend -> Assertion checkbackend file expected = do b <- annexeval $ maybe (return Nothing) (Backend.getBackend file) - =<< Annex.WorkTree.lookupFile (toRawFilePath file) + =<< Annex.WorkTree.lookupKey (toRawFilePath file) assertEqual ("backend for " ++ file) (Just expected) b checkispointerfile :: FilePath -> Assertion diff --git a/Upgrade/V0.hs b/Upgrade/V0.hs index 2b5b2d4eba..01f4e0670c 100644 --- a/Upgrade/V0.hs +++ b/Upgrade/V0.hs @@ -31,8 +31,8 @@ keyFile0 :: Key -> FilePath keyFile0 = Upgrade.V1.keyFile1 fileKey0 :: FilePath -> Key fileKey0 = Upgrade.V1.fileKey1 -lookupFile0 :: FilePath -> Annex (Maybe (Key, Backend)) -lookupFile0 = Upgrade.V1.lookupFile1 +lookupKey0 :: FilePath -> Annex (Maybe (Key, Backend)) +lookupKey0 = Upgrade.V1.lookupKey1 getKeysPresent0 :: FilePath -> Annex [Key] getKeysPresent0 dir = ifM (liftIO $ doesDirectoryExist dir) diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index 4b85bf91b6..5b9620b257 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -90,7 +90,7 @@ updateSymlinks = do void $ liftIO cleanup where fixlink f = do - r <- lookupFile1 f + r <- lookupKey1 f case r of Nothing -> noop Just (k, _) -> do @@ -191,8 +191,8 @@ readLog1 :: FilePath -> IO [LogLine] readLog1 file = catchDefaultIO [] $ parseLog . encodeBL <$> readFileStrict file -lookupFile1 :: FilePath -> Annex (Maybe (Key, Backend)) -lookupFile1 file = do +lookupKey1 :: FilePath -> Annex (Maybe (Key, Backend)) +lookupKey1 file = do tl <- liftIO $ tryIO getsymlink case tl of Left _ -> return Nothing diff --git a/Upgrade/V5.hs b/Upgrade/V5.hs index 45351d4f4d..3e8dedaef9 100644 --- a/Upgrade/V5.hs +++ b/Upgrade/V5.hs @@ -115,7 +115,7 @@ upgradeDirectWorkTree = do void $ liftIO clean where go (f, _sha, mode) | isSymLink mode = do - -- Cannot use lookupFile here, as we're in between direct + -- Cannot use lookupKey here, as we're in between direct -- mode and v6. mk <- catKeyFile f case mk of From 5387b95dcdc4e6cedf5f217165bd63662d938a45 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 10 Jul 2020 14:36:18 -0400 Subject: [PATCH 3/6] add catObjectMetaDataStream --- Git/CatFile.hs | 60 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/Git/CatFile.hs b/Git/CatFile.hs index da21f76500..0b0327bddc 100644 --- a/Git/CatFile.hs +++ b/Git/CatFile.hs @@ -20,8 +20,9 @@ module Git.CatFile ( catObject, catObjectDetails, catObjectMetaData, + catObjectStreamLsTree, catObjectStream, - catObjectStream', + catObjectMetaDataStream, ) where import System.IO @@ -289,18 +290,18 @@ parseCommit b = Commit - While this could be made more polymorhpic, specialization is important - to its performance. -} -catObjectStream +catObjectStreamLsTree :: (MonadMask m, MonadIO m) => [LsTree.TreeItem] -> (LsTree.TreeItem -> Bool) -> Repo -> (IO (Maybe (TopFilePath, Maybe L.ByteString)) -> m ()) -> m () -catObjectStream l want repo reader = withCatObjectStream repo $ +catObjectStreamLsTree l want repo reader = withCatFileStream False repo $ \c hin hout -> bracketIO (async $ feeder c hin) cancel - (const (reader (catObjectReader c hout))) + (const (reader (catObjectReader readObjectContent c hout))) where feeder c h = do forM_ l $ \ti -> @@ -311,7 +312,7 @@ catObjectStream l want repo reader = withCatObjectStream repo $ S8.hPutStrLn h (fromRef' sha) hClose h -catObjectStream' +catObjectStream :: (MonadMask m, MonadIO m) => Repo -> ( @@ -321,41 +322,70 @@ catObjectStream' -> m () ) -> m () -catObjectStream' repo a = withCatObjectStream repo go +catObjectStream repo a = withCatFileStream False repo go where go c hin hout = a (feeder c hin) (hClose hin) - (catObjectReader c hout) + (catObjectReader readObjectContent c hout) feeder c h (v, ref) = do liftIO $ writeChan c (ref, v) S8.hPutStrLn h (fromRef' ref) -catObjectReader :: Chan (Ref, a) -> Handle -> IO (Maybe (a, Maybe L.ByteString)) -catObjectReader c h = ifM (hIsEOF h) +catObjectMetaDataStream + :: (MonadMask m, MonadIO m) + => Repo + -> ( + ((v, Ref) -> IO ()) -- ^ call to feed values in + -> IO () -- call once all values are fed in + -> IO (Maybe (v, Maybe (Sha, FileSize, ObjectType))) -- call to read results + -> m () + ) + -> m () +catObjectMetaDataStream repo a = withCatFileStream True repo go + where + go c hin hout = a + (feeder c hin) + (hClose hin) + (catObjectReader (\_h r -> pure (conv r)) c hout) + + feeder c h (v, ref) = do + liftIO $ writeChan c (ref, v) + S8.hPutStrLn h (fromRef' ref) + + conv (ParsedResp sha ty sz) = (sha, sz, ty) + conv DNE = error "internal" + +catObjectReader + :: (Handle -> ParsedResp -> IO t) + -> Chan (Ref, a) + -> Handle + -> IO (Maybe (a, Maybe t)) +catObjectReader getv c h = ifM (hIsEOF h) ( return Nothing , do (ref, f) <- liftIO $ readChan c resp <- S8.hGetLine h case parseResp ref resp of Just r@(ParsedResp {}) -> do - content <- readObjectContent h r - return (Just (f, Just content)) + v <- getv h r + return (Just (f, Just v)) Just DNE -> return (Just (f, Nothing)) Nothing -> error $ "unknown response from git cat-file " ++ show resp ) -withCatObjectStream +withCatFileStream :: (MonadMask m, MonadIO m) - => Repo + => Bool + -> Repo -> (Chan a -> Handle -> Handle -> m ()) -> m () -withCatObjectStream repo reader = assertLocal repo $ +withCatFileStream check repo reader = assertLocal repo $ bracketIO start stop $ \(c, hin, hout, _) -> reader c hin hout where params = [ Param "cat-file" - , Param ("--batch=" ++ batchFormat) + , Param ("--batch" ++ (if check then "-check" else "") ++ "=" ++ batchFormat) , Param "--buffer" ] From 0f6b1ee048d763205f0b4c198b12b0f391b35e4c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 10 Jul 2020 15:11:14 -0400 Subject: [PATCH 4/6] check pointer file size This is all good, except for one small problem... When a pointer file has to be fed into the metadata cat-file, it's possible for a non-pointer file that comes after it to get fed into the main cat-file first, so the two files will be processed in a different order than the user specified. So, while this is the fast way, I guess I'll have to change it to be slower, but sequential.. --- CmdLine/Seek.hs | 63 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 61093974c5..b59de39bcf 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -19,6 +19,7 @@ import qualified Git import qualified Git.Command import qualified Git.LsFiles as LsFiles import qualified Git.LsTree as LsTree +import qualified Git.Types as Git import Git.FilePath import qualified Limit import CmdLine.GitAnnex.Options @@ -229,7 +230,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do keyaction (k, mkActionItem k) Nothing -> return () go reader - catObjectStream l (isJust . getk . LsTree.file) g go + catObjectStreamLsTree l (isJust . getk . LsTree.file) g go liftIO $ void cleanup runkeyaction getks = do @@ -264,37 +265,55 @@ seekFiltered a fs = do process matcher f = whenM (matcher $ MatchingFile $ FileInfo f f) $ a f --- This is siginificantly faster than using lookupKey after seekFiltered. +-- This is significantly faster than using lookupKey after seekFiltered. seekFilteredKeys :: (RawFilePath -> Key -> CommandSeek) -> Annex [(RawFilePath, Git.Sha, FileMode)] -> Annex () seekFilteredKeys a fs = do g <- Annex.gitRepo - catObjectStream' g $ \feeder closer reader -> do - tid <- liftIO . async =<< forkState (gofeed feeder closer) - goread reader - join (liftIO (wait tid)) + matcher <- Limit.getMatcher + catObjectMetaDataStream g $ \mdfeeder mdcloser mdreader -> + catObjectStream g $ \feeder closer reader -> do + processertid <- liftIO . async =<< forkState + (gofeed matcher feeder closer mdfeeder mdcloser) + mdprocessertid <- liftIO . async =<< forkState + (mdprocess matcher mdreader feeder) + goread reader + join (liftIO (wait mdprocessertid)) + join (liftIO (wait processertid)) where - gofeed feeder closer = do - matcher <- Limit.getMatcher + gofeed matcher feeder closer mdfeeder mdcloser = do l <- fs - forM_ l $ process matcher feeder - liftIO closer - - process matcher feeder (f, sha, mode) = - -- TODO handle non-symlink separately to avoid - -- catting large files - -- If the matcher needs to look up a key, it should be run - -- in goread, not here, and the key passed in. OTOH, if - -- the matcher does not need to look up a key, it's more - -- efficient to put it here, to avoid catting files that - -- will not be matched. - whenM (matcher $ MatchingFile $ FileInfo f f) $ - liftIO $ feeder (f, sha) + forM_ l $ process matcher feeder mdfeeder + liftIO $ void closer + liftIO $ void mdcloser goread reader = liftIO reader >>= \case Just (f, content) -> do maybe noop (a f) (parseLinkTargetOrPointerLazy =<< content) goread reader - _ -> return () + Nothing -> return () + + feedmatches matcher feeder f sha = + whenM (matcher $ MatchingFile $ FileInfo f f) $ + liftIO $ feeder (f, sha) + + process matcher feeder mdfeeder (f, sha, mode) = case + Git.toTreeItemType mode of + Just Git.TreeSymlink -> + feedmatches matcher feeder f sha + Just Git.TreeSubmodule -> return () + -- Might be a pointer file, might be other + -- file in git, possibly large. Avoid catting + -- large files by first looking up the size. + Just _ -> liftIO $ mdfeeder (f, sha) + Nothing -> return () + + mdprocess matcher mdreader feeder = liftIO mdreader >>= \case + Just (f, Just (sha, size, _type)) + | size < maxPointerSz -> do + feedmatches matcher feeder f sha + mdprocess matcher mdreader feeder + Just _ -> mdprocess matcher mdreader feeder + Nothing -> return () seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> [WorkTreeItem] -> Annex [a] seekHelper c ww a l = do From b4d0f6dfc2e47fb1a19f22bbe9684ff118669258 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 10 Jul 2020 15:18:42 -0400 Subject: [PATCH 5/6] slower but sequential filtering of large files from pointer files There should still be a speedup seeking over pointer files, just not as large as the one seeking over symlinks. --- CHANGELOG | 1 + CmdLine/Seek.hs | 38 ++++++++++++++------------------------ 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c26b2c4e0f..271d65f2d9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -29,6 +29,7 @@ git-annex (8.20200618) UNRELEASED; urgency=medium after getting several thousand files. * Sped up the --all option by 2x to 16x by using git cat-file --buffer. Thanks to Lukey for finding this optimisation. + * Sped up seeking for annexed files to operate on by a factor of nearly 2x. * fsck: Detect if WORM keys contain a carriage return, and recommend upgrading the key. (git-annex could have maybe created such keys back in 2013). diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index b59de39bcf..943e391eb2 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -30,7 +30,7 @@ import Logs.Transfer import Remote.List import qualified Remote import Annex.CatFile -import Git.CatFile +import Git.CatFile (catObjectStreamLsTree, catObjectStream) import Annex.CurrentBranch import Annex.Content import Annex.Link @@ -270,21 +270,16 @@ seekFilteredKeys :: (RawFilePath -> Key -> CommandSeek) -> Annex [(RawFilePath, seekFilteredKeys a fs = do g <- Annex.gitRepo matcher <- Limit.getMatcher - catObjectMetaDataStream g $ \mdfeeder mdcloser mdreader -> - catObjectStream g $ \feeder closer reader -> do - processertid <- liftIO . async =<< forkState - (gofeed matcher feeder closer mdfeeder mdcloser) - mdprocessertid <- liftIO . async =<< forkState - (mdprocess matcher mdreader feeder) - goread reader - join (liftIO (wait mdprocessertid)) - join (liftIO (wait processertid)) + catObjectStream g $ \feeder closer reader -> do + processertid <- liftIO . async =<< forkState + (gofeed matcher feeder closer) + goread reader + join (liftIO (wait processertid)) where - gofeed matcher feeder closer mdfeeder mdcloser = do + gofeed matcher feeder closer = do l <- fs - forM_ l $ process matcher feeder mdfeeder - liftIO $ void closer - liftIO $ void mdcloser + forM_ l $ process matcher feeder + liftIO closer goread reader = liftIO reader >>= \case Just (f, content) -> do @@ -296,7 +291,7 @@ seekFilteredKeys a fs = do whenM (matcher $ MatchingFile $ FileInfo f f) $ liftIO $ feeder (f, sha) - process matcher feeder mdfeeder (f, sha, mode) = case + process matcher feeder (f, sha, mode) = case Git.toTreeItemType mode of Just Git.TreeSymlink -> feedmatches matcher feeder f sha @@ -304,17 +299,12 @@ seekFilteredKeys a fs = do -- Might be a pointer file, might be other -- file in git, possibly large. Avoid catting -- large files by first looking up the size. - Just _ -> liftIO $ mdfeeder (f, sha) + Just _ -> catObjectMetaData sha >>= \case + Just (_, sz, _) | sz <= maxPointerSz -> + feedmatches matcher feeder f sha + _ -> return () Nothing -> return () - mdprocess matcher mdreader feeder = liftIO mdreader >>= \case - Just (f, Just (sha, size, _type)) - | size < maxPointerSz -> do - feedmatches matcher feeder f sha - mdprocess matcher mdreader feeder - Just _ -> mdprocess matcher mdreader feeder - Nothing -> return () - seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> [WorkTreeItem] -> Annex [a] seekHelper c ww a l = do os <- seekOptions ww From 88a7fb5cbb7358f8d395f6c306fb9e94e1f1a724 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 10 Jul 2020 15:40:06 -0400 Subject: [PATCH 6/6] convert all applicable commands to new 2x faster annexed file seeking This removes all calls to inAnnex, except for some involving --batch. It may be that the batch code could get a similar speedup, but I don't know if people habitually pass a huge number of files through --batch that git-annex does not need to do anything to process, so I skipped it for now. A few calls to ifAnnexed remain, and might be worth doing more to convert. In particular, Command.Sync has one that would probably speed it up by a good amount. (also removed some dead code from Command.Lock) --- CmdLine/Action.hs | 3 +++ CmdLine/Seek.hs | 10 +++++----- Command/Copy.hs | 7 ++++--- Command/Drop.hs | 7 ++++--- Command/Find.hs | 7 ++++--- Command/Fix.hs | 5 ++--- Command/Get.hs | 2 +- Command/Inprogress.hs | 4 ++-- Command/List.hs | 2 +- Command/Lock.hs | 31 +++++++++---------------------- Command/Log.hs | 4 ++-- Command/MetaData.hs | 8 ++++---- Command/Migrate.hs | 2 +- Command/Mirror.hs | 2 +- Command/Move.hs | 7 ++++--- Command/Unannex.hs | 2 +- Command/Uninit.hs | 2 +- Command/Unlock.hs | 2 +- Command/Whereis.hs | 7 ++++--- 19 files changed, 54 insertions(+), 60 deletions(-) diff --git a/CmdLine/Action.hs b/CmdLine/Action.hs index f1d9eda298..e42c2a8c7b 100644 --- a/CmdLine/Action.hs +++ b/CmdLine/Action.hs @@ -43,6 +43,9 @@ performCommandAction Command { cmdcheck = c, cmdname = name } seek cont = do commandActions :: [CommandStart] -> Annex () commandActions = mapM_ commandAction +commandAction' :: (a -> b -> CommandStart) -> a -> b -> Annex () +commandAction' start a b = commandAction $ start a b + {- Runs one of the actions needed to perform a command. - Individual actions can fail without stopping the whole command, - including by throwing non-async exceptions. diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 943e391eb2..43912ecc40 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -53,18 +53,18 @@ withFilesInGitAnnex :: WarnUnmatchWhen -> (RawFilePath -> Key -> CommandSeek) -> withFilesInGitAnnex ww a l = seekFilteredKeys a $ seekHelper fst3 ww LsFiles.inRepoDetails l -withFilesInGitNonRecursive :: WarnUnmatchWhen -> String -> (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek -withFilesInGitNonRecursive ww needforce a l = ifM (Annex.getState Annex.force) - ( withFilesInGit ww a l +withFilesInGitAnnexNonRecursive :: WarnUnmatchWhen -> String -> (RawFilePath -> Key -> CommandSeek) -> [WorkTreeItem] -> CommandSeek +withFilesInGitAnnexNonRecursive ww needforce a l = ifM (Annex.getState Annex.force) + ( withFilesInGitAnnex ww a l , if null l then giveup needforce - else seekFiltered a (getfiles [] l) + else seekFilteredKeys a (getfiles [] l) ) where getfiles c [] = return (reverse c) getfiles c ((WorkTreeItem p):ps) = do os <- seekOptions ww - (fs, cleanup) <- inRepo $ LsFiles.inRepo os [toRawFilePath p] + (fs, cleanup) <- inRepo $ LsFiles.inRepoDetails os [toRawFilePath p] case fs of [f] -> do void $ liftIO $ cleanup diff --git a/Command/Copy.hs b/Command/Copy.hs index 13bfc30915..222081600c 100644 --- a/Command/Copy.hs +++ b/Command/Copy.hs @@ -45,13 +45,14 @@ instance DeferredParseClass CopyOptions where seek :: CopyOptions -> CommandSeek seek o = startConcurrency commandStages $ do - let go = whenAnnexed $ start o + let go = start o case batchOption o of - Batch fmt -> batchFilesMatching fmt (go . toRawFilePath) + Batch fmt -> batchFilesMatching fmt + (whenAnnexed go . toRawFilePath) NoBatch -> withKeyOptions (keyOptions o) (autoMode o) (commandAction . Command.Move.startKey (fromToOptions o) Command.Move.RemoveNever) - (withFilesInGit ww $ commandAction . go) + (withFilesInGitAnnex ww (commandAction' go)) =<< workTreeItems ww (copyFiles o) where ww = WarnUnmatchLsFiles diff --git a/Command/Drop.hs b/Command/Drop.hs index ba9ade8c0c..f7c1be0ab5 100644 --- a/Command/Drop.hs +++ b/Command/Drop.hs @@ -54,13 +54,14 @@ parseDropFromOption = parseRemoteOption <$> strOption seek :: DropOptions -> CommandSeek seek o = startConcurrency commandStages $ case batchOption o of - Batch fmt -> batchFilesMatching fmt (go . toRawFilePath) + Batch fmt -> batchFilesMatching fmt + (whenAnnexed go . toRawFilePath) NoBatch -> withKeyOptions (keyOptions o) (autoMode o) (commandAction . startKeys o) - (withFilesInGit ww (commandAction . go)) + (withFilesInGitAnnex ww (commandAction' go)) =<< workTreeItems ww (dropFiles o) where - go = whenAnnexed $ start o + go = start o ww = WarnUnmatchLsFiles start :: DropOptions -> RawFilePath -> Key -> CommandStart diff --git a/Command/Find.hs b/Command/Find.hs index 0e2d35c18b..0e4bc80ab3 100644 --- a/Command/Find.hs +++ b/Command/Find.hs @@ -57,11 +57,12 @@ seek :: FindOptions -> CommandSeek seek o = case batchOption o of NoBatch -> withKeyOptions (keyOptions o) False (commandAction . startKeys o) - (withFilesInGit ww (commandAction . go)) + (withFilesInGitAnnex ww (commandAction' go)) =<< workTreeItems ww (findThese o) - Batch fmt -> batchFilesMatching fmt (go . toRawFilePath) + Batch fmt -> batchFilesMatching fmt + (whenAnnexed go . toRawFilePath) where - go = whenAnnexed $ start o + go = start o ww = WarnUnmatchLsFiles -- only files inAnnex are shown, unless the user has requested diff --git a/Command/Fix.hs b/Command/Fix.hs index 94d40a0eb9..347c538fe5 100644 --- a/Command/Fix.hs +++ b/Command/Fix.hs @@ -31,9 +31,8 @@ cmd = noCommit $ withGlobalOptions [annexedMatchingOptions] $ paramPaths (withParams seek) seek :: CmdParams -> CommandSeek -seek ps = unlessM crippledFileSystem $ do - withFilesInGit ww - (commandAction . (whenAnnexed $ start FixAll)) +seek ps = unlessM crippledFileSystem $ + withFilesInGitAnnex ww (commandAction' (start FixAll)) =<< workTreeItems ww ps where ww = WarnUnmatchLsFiles diff --git a/Command/Get.hs b/Command/Get.hs index 36156a49b4..cf1fcc9fc1 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -46,7 +46,7 @@ seek o = startConcurrency downloadStages $ do (whenAnnexed go . toRawFilePath) NoBatch -> withKeyOptions (keyOptions o) (autoMode o) (commandAction . startKeys from) - (withFilesInGitAnnex ww (\f k -> commandAction (go f k))) + (withFilesInGitAnnex ww (commandAction' go)) =<< workTreeItems ww (getFiles o) where ww = WarnUnmatchLsFiles diff --git a/Command/Inprogress.hs b/Command/Inprogress.hs index d2fb04a2a5..17d4f9239a 100644 --- a/Command/Inprogress.hs +++ b/Command/Inprogress.hs @@ -38,8 +38,8 @@ seek o = do | otherwise -> commandAction stop _ -> do let s = S.fromList ts - withFilesInGit ww - (commandAction . (whenAnnexed (start s))) + withFilesInGitAnnex ww + (commandAction' (start s)) =<< workTreeItems ww (inprogressFiles o) where ww = WarnUnmatchLsFiles diff --git a/Command/List.hs b/Command/List.hs index 92e18b654c..59bafb39ce 100644 --- a/Command/List.hs +++ b/Command/List.hs @@ -44,7 +44,7 @@ seek :: ListOptions -> CommandSeek seek o = do list <- getList o printHeader list - withFilesInGit ww (commandAction . (whenAnnexed $ start list)) + withFilesInGitAnnex ww (commandAction' (start list)) =<< workTreeItems ww (listThese o) where ww = WarnUnmatchLsFiles diff --git a/Command/Lock.hs b/Command/Lock.hs index a32adb56bf..b3e56f0121 100644 --- a/Command/Lock.hs +++ b/Command/Lock.hs @@ -8,7 +8,6 @@ module Command.Lock where import Command -import qualified Annex.Queue import qualified Annex import Annex.Content import Annex.Link @@ -31,12 +30,12 @@ cmd = withGlobalOptions [jsonOptions, annexedMatchingOptions] $ seek :: CmdParams -> CommandSeek seek ps = do l <- workTreeItems ww ps - withFilesInGit ww (commandAction . (whenAnnexed startNew)) l + withFilesInGitAnnex ww (commandAction' start) l where ww = WarnUnmatchLsFiles -startNew :: RawFilePath -> Key -> CommandStart -startNew file key = ifM (isJust <$> isAnnexLink file) +start :: RawFilePath -> Key -> CommandStart +start file key = ifM (isJust <$> isAnnexLink file) ( stop , starting "lock" (mkActionItem (key, file)) $ go =<< liftIO (isPointerFile file) @@ -53,14 +52,14 @@ startNew file key = ifM (isJust <$> isAnnexLink file) , errorModified ) ) - cont = performNew file key + cont = perform file key -performNew :: RawFilePath -> Key -> CommandPerform -performNew file key = do +perform :: RawFilePath -> Key -> CommandPerform +perform file key = do lockdown =<< calcRepo (gitAnnexLocation key) addLink (fromRawFilePath file) key =<< withTSDelta (liftIO . genInodeCache file) - next $ cleanupNew file key + next $ cleanup file key where lockdown obj = do ifM (isUnmodified key obj) @@ -96,22 +95,10 @@ performNew file key = do lostcontent = logStatus key InfoMissing -cleanupNew :: RawFilePath -> Key -> CommandCleanup -cleanupNew file key = do +cleanup :: RawFilePath -> Key -> CommandCleanup +cleanup file key = do Database.Keys.removeAssociatedFile key =<< inRepo (toTopFilePath file) return True -startOld :: RawFilePath -> CommandStart -startOld file = do - unlessM (Annex.getState Annex.force) - errorModified - starting "lock" (ActionItemWorkTreeFile file) $ - performOld file - -performOld :: RawFilePath -> CommandPerform -performOld file = do - Annex.Queue.addCommand "checkout" [Param "--"] [fromRawFilePath file] - next $ return True - errorModified :: a errorModified = giveup "Locking this file would discard any changes you have made to it. Use 'git annex add' to stage your changes. (Or, use --force to override)" diff --git a/Command/Log.hs b/Command/Log.hs index 5ca6160671..48e5ec5bc2 100644 --- a/Command/Log.hs +++ b/Command/Log.hs @@ -86,8 +86,8 @@ seek o = do zone <- liftIO getCurrentTimeZone let outputter = mkOutputter m zone o case (logFiles o, allOption o) of - (fs, False) -> withFilesInGit ww - (commandAction . (whenAnnexed $ start o outputter)) + (fs, False) -> withFilesInGitAnnex ww + (commandAction' (start o outputter)) =<< workTreeItems ww fs ([], True) -> commandAction (startAll o outputter) (_, True) -> giveup "Cannot specify both files and --all" diff --git a/Command/MetaData.hs b/Command/MetaData.hs index 29229ac9c2..9f23da3295 100644 --- a/Command/MetaData.hs +++ b/Command/MetaData.hs @@ -77,13 +77,13 @@ seek o = case batchOption o of c <- liftIO currentVectorClock let ww = WarnUnmatchLsFiles let seeker = case getSet o of - Get _ -> withFilesInGit ww - GetAll -> withFilesInGit ww - Set _ -> withFilesInGitNonRecursive ww + Get _ -> withFilesInGitAnnex ww + GetAll -> withFilesInGitAnnex ww + Set _ -> withFilesInGitAnnexNonRecursive ww "Not recursively setting metadata. Use --force to do that." withKeyOptions (keyOptions o) False (commandAction . startKeys c o) - (seeker (commandAction . (whenAnnexed (start c o)))) + (seeker (commandAction' (start c o))) =<< workTreeItems ww (forFiles o) Batch fmt -> withMessageState $ \s -> case outputType s of JSONOutput _ -> ifM limited diff --git a/Command/Migrate.hs b/Command/Migrate.hs index 33b7d4d2c8..49ae1e7ec9 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -26,7 +26,7 @@ cmd = withGlobalOptions [annexedMatchingOptions] $ paramPaths (withParams seek) seek :: CmdParams -> CommandSeek -seek = withFilesInGit ww (commandAction . (whenAnnexed start)) +seek = withFilesInGitAnnex ww (commandAction' start) <=< workTreeItems ww where ww = WarnUnmatchLsFiles diff --git a/Command/Mirror.hs b/Command/Mirror.hs index eef184ed81..97295e7a8f 100644 --- a/Command/Mirror.hs +++ b/Command/Mirror.hs @@ -44,7 +44,7 @@ seek :: MirrorOptions -> CommandSeek seek o = startConcurrency stages $ withKeyOptions (keyOptions o) False (commandAction . startKey o (AssociatedFile Nothing)) - (withFilesInGit ww (commandAction . (whenAnnexed $ start o))) + (withFilesInGitAnnex ww (commandAction' (start o))) =<< workTreeItems ww (mirrorFiles o) where stages = case fromToOptions o of diff --git a/Command/Move.hs b/Command/Move.hs index e754372763..d612275a89 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -55,12 +55,13 @@ data RemoveWhen = RemoveSafe | RemoveNever seek :: MoveOptions -> CommandSeek seek o = startConcurrency stages $ do - let go = whenAnnexed $ start (fromToOptions o) (removeWhen o) + let go = start (fromToOptions o) (removeWhen o) case batchOption o of - Batch fmt -> batchFilesMatching fmt (go . toRawFilePath) + Batch fmt -> batchFilesMatching fmt + (whenAnnexed go . toRawFilePath) NoBatch -> withKeyOptions (keyOptions o) False (commandAction . startKey (fromToOptions o) (removeWhen o)) - (withFilesInGit ww (commandAction . go)) + (withFilesInGitAnnex ww (commandAction' go)) =<< workTreeItems ww (moveFiles o) where stages = case fromToOptions o of diff --git a/Command/Unannex.hs b/Command/Unannex.hs index b41a053597..ef26afc1f1 100644 --- a/Command/Unannex.hs +++ b/Command/Unannex.hs @@ -23,7 +23,7 @@ cmd = withGlobalOptions [annexedMatchingOptions] $ paramPaths (withParams seek) seek :: CmdParams -> CommandSeek -seek ps = (withFilesInGit ww $ commandAction . whenAnnexed start) +seek ps = (withFilesInGitAnnex ww (commandAction' start)) =<< workTreeItems ww ps where ww = WarnUnmatchLsFiles diff --git a/Command/Uninit.hs b/Command/Uninit.hs index 925fbc7086..b9eeaaf814 100644 --- a/Command/Uninit.hs +++ b/Command/Uninit.hs @@ -44,7 +44,7 @@ seek ps = do l <- workTreeItems ww ps withFilesNotInGit (commandAction . whenAnnexed (startCheckIncomplete . fromRawFilePath)) l Annex.changeState $ \s -> s { Annex.fast = True } - withFilesInGit ww (commandAction . whenAnnexed Command.Unannex.start) l + withFilesInGitAnnex ww (commandAction' Command.Unannex.start) l finish where ww = WarnUnmatchLsFiles diff --git a/Command/Unlock.hs b/Command/Unlock.hs index a1fa559669..7f2f26df6d 100644 --- a/Command/Unlock.hs +++ b/Command/Unlock.hs @@ -27,7 +27,7 @@ mkcmd n d = withGlobalOptions [jsonOptions, annexedMatchingOptions] $ command n SectionCommon d paramPaths (withParams seek) seek :: CmdParams -> CommandSeek -seek ps = withFilesInGit ww (commandAction . whenAnnexed start) +seek ps = withFilesInGitAnnex ww (commandAction' start) =<< workTreeItems ww ps where ww = WarnUnmatchLsFiles diff --git a/Command/Whereis.hs b/Command/Whereis.hs index fda4825c32..bb28b4af05 100644 --- a/Command/Whereis.hs +++ b/Command/Whereis.hs @@ -51,13 +51,14 @@ parseFormatOption = option (Utility.Format.gen <$> str) seek :: WhereisOptions -> CommandSeek seek o = do m <- remoteMap id - let go = whenAnnexed $ start o m + let go = start o m case batchOption o of - Batch fmt -> batchFilesMatching fmt (go . toRawFilePath) + Batch fmt -> batchFilesMatching fmt + (whenAnnexed go . toRawFilePath) NoBatch -> withKeyOptions (keyOptions o) False (commandAction . startKeys o m) - (withFilesInGit ww (commandAction . go)) + (withFilesInGitAnnex ww (commandAction' go)) =<< workTreeItems ww (whereisFiles o) where ww = WarnUnmatchLsFiles