mostly done with location log precaching

Some nice wins.
This commit is contained in:
Joey Hess 2020-07-13 17:04:02 -04:00
parent df58609804
commit 75aab72d23
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
22 changed files with 217 additions and 68 deletions

View file

@ -30,6 +30,8 @@ git-annex (8.20200618) UNRELEASED; urgency=medium
* Sped up the --all option by 2x to 16x by using git cat-file --buffer. * Sped up the --all option by 2x to 16x by using git cat-file --buffer.
Thanks to Lukey for finding this optimisation. Thanks to Lukey for finding this optimisation.
* Sped up seeking for annexed files to operate on by a factor of nearly 2x. * Sped up seeking for annexed files to operate on by a factor of nearly 2x.
* Sped up sync --content by 100% (without --all).
* Sped up some other commands like fsck --fast and whereis by around 50%.
* fsck: Detect if WORM keys contain a carriage return, and recommend * fsck: Detect if WORM keys contain a carriage return, and recommend
upgrading the key. (git-annex could have maybe created such keys back upgrading the key. (git-annex could have maybe created such keys back
in 2013). in 2013).

View file

@ -20,6 +20,7 @@ import qualified Git.Command
import qualified Git.LsFiles as LsFiles import qualified Git.LsFiles as LsFiles
import qualified Git.LsTree as LsTree import qualified Git.LsTree as LsTree
import qualified Git.Types as Git import qualified Git.Types as Git
import qualified Git.Ref
import Git.FilePath import Git.FilePath
import qualified Limit import qualified Limit
import CmdLine.GitAnnex.Options import CmdLine.GitAnnex.Options
@ -49,11 +50,17 @@ withFilesInGit :: WarnUnmatchWhen -> (RawFilePath -> CommandSeek) -> [WorkTreeIt
withFilesInGit ww a l = seekFiltered a $ withFilesInGit ww a l = seekFiltered a $
seekHelper id ww LsFiles.inRepo l seekHelper id ww LsFiles.inRepo l
withFilesInGitAnnex :: WarnUnmatchWhen -> (RawFilePath -> Key -> CommandSeek) -> [WorkTreeItem] -> CommandSeek data AnnexedFileSeeker = AnnexedFileSeeker
{ seekAction :: RawFilePath -> Key -> CommandSeek
, checkContentPresent :: Maybe Bool
, usesLocationLog :: Bool
}
withFilesInGitAnnex :: WarnUnmatchWhen -> AnnexedFileSeeker -> [WorkTreeItem] -> CommandSeek
withFilesInGitAnnex ww a l = seekFilteredKeys a $ withFilesInGitAnnex ww a l = seekFilteredKeys a $
seekHelper fst3 ww LsFiles.inRepoDetails l seekHelper fst3 ww LsFiles.inRepoDetails l
withFilesInGitAnnexNonRecursive :: WarnUnmatchWhen -> String -> (RawFilePath -> Key -> CommandSeek) -> [WorkTreeItem] -> CommandSeek withFilesInGitAnnexNonRecursive :: WarnUnmatchWhen -> String -> AnnexedFileSeeker -> [WorkTreeItem] -> CommandSeek
withFilesInGitAnnexNonRecursive ww needforce a l = ifM (Annex.getState Annex.force) withFilesInGitAnnexNonRecursive ww needforce a l = ifM (Annex.getState Annex.force)
( withFilesInGitAnnex ww a l ( withFilesInGitAnnex ww a l
, if null l , if null l
@ -265,35 +272,72 @@ seekFiltered a fs = do
process matcher f = process matcher f =
whenM (matcher $ MatchingFile $ FileInfo f f) $ a f whenM (matcher $ MatchingFile $ FileInfo f f) $ a f
-- This is significantly faster than using lookupKey after seekFiltered. -- This is significantly faster than using lookupKey after seekFiltered,
seekFilteredKeys :: (RawFilePath -> Key -> CommandSeek) -> Annex [(RawFilePath, Git.Sha, FileMode)] -> Annex () -- because of the way data is streamed through git cat-file.
seekFilteredKeys a listfs = do --
-- It can also precache location logs using the same efficient streaming.
seekFilteredKeys :: AnnexedFileSeeker -> Annex [(RawFilePath, Git.Sha, FileMode)] -> Annex ()
seekFilteredKeys seeker listfs = do
g <- Annex.gitRepo g <- Annex.gitRepo
matcher <- Limit.getMatcher matcher <- Limit.getMatcher
config <- Annex.getGitConfig
-- Run here, not in the async, because it could throw an exception -- Run here, not in the async, because it could throw an exception
-- The list should be built lazily. -- The list should be built lazily.
l <- listfs l <- listfs
catObjectMetaDataStream g $ \mdfeeder mdcloser mdreader -> catObjectMetaDataStream g $ \mdfeeder mdcloser mdreader ->
catObjectStream g $ \feeder closer reader -> do catObjectStream g $ \ofeeder ocloser oreader -> do
processertid <- liftIO . async =<< forkState processertid <- liftIO . async =<< forkState
(process matcher feeder mdfeeder mdcloser False l) (process matcher ofeeder mdfeeder mdcloser False l)
mdprocessertid <- liftIO . async =<< forkState mdprocessertid <- liftIO . async =<< forkState
(mdprocess matcher mdreader feeder closer) (mdprocess matcher mdreader ofeeder ocloser)
goread reader if usesLocationLog seeker
then catObjectStream g $ \lfeeder lcloser lreader -> do
precachertid <- liftIO . async =<< forkState
(precacher config oreader lfeeder lcloser)
precachefinisher lreader
join (liftIO (wait precachertid))
else finisher oreader
join (liftIO (wait mdprocessertid)) join (liftIO (wait mdprocessertid))
join (liftIO (wait processertid)) join (liftIO (wait processertid))
where where
goread reader = liftIO reader >>= \case checkpresence k cont = case checkContentPresent seeker of
Just v -> do
present <- inAnnex k
when (present == v) cont
Nothing -> cont
finisher oreader = liftIO oreader >>= \case
Just (f, content) -> do Just (f, content) -> do
maybe noop (a f) (parseLinkTargetOrPointerLazy =<< content) case parseLinkTargetOrPointerLazy =<< content of
goread reader Just k -> checkpresence k $
seekAction seeker f k
Nothing -> noop
finisher oreader
Nothing -> return ()
precachefinisher lreader = liftIO lreader >>= \case
Just ((logf, f, k), logcontent) -> do
maybe noop (Annex.BranchState.setCache logf) logcontent
seekAction seeker f k
precachefinisher lreader
Nothing -> return () Nothing -> return ()
feedmatches matcher feeder f sha = precacher config oreader lfeeder lcloser = liftIO oreader >>= \case
Just (f, content) -> do
case parseLinkTargetOrPointerLazy =<< content of
Just k -> checkpresence k $
let logf = locationLogFile config k
ref = Git.Ref.branchFileRef Annex.Branch.fullname logf
in liftIO $ lfeeder ((logf, f, k), ref)
Nothing -> noop
precacher config oreader lfeeder lcloser
Nothing -> liftIO $ void lcloser
feedmatches matcher ofeeder f sha =
whenM (matcher $ MatchingFile $ FileInfo f f) $ whenM (matcher $ MatchingFile $ FileInfo f f) $
liftIO $ feeder (f, sha) liftIO $ ofeeder (f, sha)
process matcher feeder mdfeeder mdcloser seenpointer ((f, sha, mode):rest) = process matcher ofeeder mdfeeder mdcloser seenpointer ((f, sha, mode):rest) =
case Git.toTreeItemType mode of case Git.toTreeItemType mode of
Just Git.TreeSymlink -> do Just Git.TreeSymlink -> do
-- Once a pointer file has been seen, -- Once a pointer file has been seen,
@ -303,27 +347,27 @@ seekFilteredKeys a listfs = do
-- file order. -- file order.
if seenpointer if seenpointer
then liftIO $ mdfeeder (f, sha) then liftIO $ mdfeeder (f, sha)
else feedmatches matcher feeder f sha else feedmatches matcher ofeeder f sha
process matcher feeder mdfeeder mdcloser seenpointer rest process matcher ofeeder mdfeeder mdcloser seenpointer rest
Just Git.TreeSubmodule -> Just Git.TreeSubmodule ->
process matcher feeder mdfeeder mdcloser seenpointer rest process matcher ofeeder mdfeeder mdcloser seenpointer rest
-- Might be a pointer file, might be other -- Might be a pointer file, might be other
-- file in git, possibly large. Avoid catting -- file in git, possibly large. Avoid catting
-- large files by first looking up the size. -- large files by first looking up the size.
Just _ -> do Just _ -> do
liftIO $ mdfeeder (f, sha) liftIO $ mdfeeder (f, sha)
process matcher feeder mdfeeder mdcloser True rest process matcher ofeeder mdfeeder mdcloser True rest
Nothing -> Nothing ->
process matcher feeder mdfeeder mdcloser seenpointer rest process matcher ofeeder mdfeeder mdcloser seenpointer rest
process _ _ _ mdcloser _ [] = liftIO $ void mdcloser process _ _ _ mdcloser _ [] = liftIO $ void mdcloser
mdprocess matcher mdreader feeder closer = liftIO mdreader >>= \case mdprocess matcher mdreader ofeeder ocloser = liftIO mdreader >>= \case
Just (f, Just (sha, size, _type)) Just (f, Just (sha, size, _type))
| size < maxPointerSz -> do | size < maxPointerSz -> do
feedmatches matcher feeder f sha feedmatches matcher ofeeder f sha
mdprocess matcher mdreader feeder closer mdprocess matcher mdreader ofeeder ocloser
Just _ -> mdprocess matcher mdreader feeder closer Just _ -> mdprocess matcher mdreader ofeeder ocloser
Nothing -> liftIO $ void closer Nothing -> liftIO $ void ocloser
seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> [WorkTreeItem] -> Annex [a] seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> [WorkTreeItem] -> Annex [a]
seekHelper c ww a l = do seekHelper c ww a l = do

View file

@ -46,13 +46,18 @@ instance DeferredParseClass CopyOptions where
seek :: CopyOptions -> CommandSeek seek :: CopyOptions -> CommandSeek
seek o = startConcurrency commandStages $ do seek o = startConcurrency commandStages $ do
let go = start o let go = start o
let seeker = AnnexedFileSeeker
{ seekAction = commandAction' go
, checkContentPresent = Nothing
, usesLocationLog = False
}
case batchOption o of case batchOption o of
Batch fmt -> batchFilesMatching fmt Batch fmt -> batchFilesMatching fmt
(whenAnnexed go . toRawFilePath) (whenAnnexed go . toRawFilePath)
NoBatch -> withKeyOptions NoBatch -> withKeyOptions
(keyOptions o) (autoMode o) (keyOptions o) (autoMode o)
(commandAction . Command.Move.startKey (fromToOptions o) Command.Move.RemoveNever) (commandAction . Command.Move.startKey (fromToOptions o) Command.Move.RemoveNever)
(withFilesInGitAnnex ww (commandAction' go)) (withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (copyFiles o) =<< workTreeItems ww (copyFiles o)
where where
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles

View file

@ -58,12 +58,18 @@ seek o = startConcurrency commandStages $
(whenAnnexed go . toRawFilePath) (whenAnnexed go . toRawFilePath)
NoBatch -> withKeyOptions (keyOptions o) (autoMode o) NoBatch -> withKeyOptions (keyOptions o) (autoMode o)
(commandAction . startKeys o) (commandAction . startKeys o)
(withFilesInGitAnnex ww (commandAction' go)) (withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (dropFiles o) =<< workTreeItems ww (dropFiles o)
where where
go = start o go = start o
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles
seeker = AnnexedFileSeeker
{ seekAction = commandAction' go
, checkContentPresent = Nothing
, usesLocationLog = False
}
start :: DropOptions -> RawFilePath -> Key -> CommandStart start :: DropOptions -> RawFilePath -> Key -> CommandStart
start o file key = start' o key afile ai start o file key = start' o key afile ai
where where

View file

@ -13,7 +13,6 @@ import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Char8 as S8
import Command import Command
import Annex.Content
import Limit import Limit
import Types.Key import Types.Key
import Git.FilePath import Git.FilePath
@ -55,24 +54,31 @@ parseFormatOption =
seek :: FindOptions -> CommandSeek seek :: FindOptions -> CommandSeek
seek o = case batchOption o of seek o = case batchOption o of
NoBatch -> withKeyOptions (keyOptions o) False NoBatch -> do
(commandAction . startKeys o) islimited <- limited
(withFilesInGitAnnex ww (commandAction' go)) let seeker = AnnexedFileSeeker
=<< workTreeItems ww (findThese o) { seekAction = commandAction' go
-- only files with content present are shown, unless
-- the user has requested others via a limit
, checkContentPresent = if islimited
then Nothing
else Just True
, usesLocationLog = False
}
withKeyOptions (keyOptions o) False
(commandAction . startKeys o)
(withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (findThese o)
Batch fmt -> batchFilesMatching fmt Batch fmt -> batchFilesMatching fmt
(whenAnnexed go . toRawFilePath) (whenAnnexed go . toRawFilePath)
where where
go = start o go = start o
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles
-- only files inAnnex are shown, unless the user has requested
-- others via a limit
start :: FindOptions -> RawFilePath -> Key -> CommandStart start :: FindOptions -> RawFilePath -> Key -> CommandStart
start o file key = start o file key = startingCustomOutput key $ do
stopUnless (limited <||> inAnnex key) $ showFormatted (formatOption o) file $ ("file", fromRawFilePath file) : keyVars key
startingCustomOutput key $ do next $ return True
showFormatted (formatOption o) file $ ("file", fromRawFilePath file) : keyVars key
next $ return True
startKeys :: FindOptions -> (Key, ActionItem) -> CommandStart startKeys :: FindOptions -> (Key, ActionItem) -> CommandStart
startKeys o (key, ActionItemBranchFilePath (BranchFilePath _ topf) _) = startKeys o (key, ActionItemBranchFilePath (BranchFilePath _ topf) _) =

View file

@ -32,10 +32,14 @@ cmd = noCommit $ withGlobalOptions [annexedMatchingOptions] $
seek :: CmdParams -> CommandSeek seek :: CmdParams -> CommandSeek
seek ps = unlessM crippledFileSystem $ seek ps = unlessM crippledFileSystem $
withFilesInGitAnnex ww (commandAction' (start FixAll)) withFilesInGitAnnex ww seeker =<< workTreeItems ww ps
=<< workTreeItems ww ps
where where
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles
seeker = AnnexedFileSeeker
{ seekAction = commandAction' (start FixAll)
, checkContentPresent = Nothing
, usesLocationLog = False
}
data FixWhat = FixSymlinks | FixAll data FixWhat = FixSymlinks | FixAll

View file

@ -92,9 +92,14 @@ seek o = startConcurrency commandStages $ do
u <- maybe getUUID (pure . Remote.uuid) from u <- maybe getUUID (pure . Remote.uuid) from
checkDeadRepo u checkDeadRepo u
i <- prepIncremental u (incrementalOpt o) i <- prepIncremental u (incrementalOpt o)
let seeker = AnnexedFileSeeker
{ seekAction = commandAction' (start from i)
, checkContentPresent = Just True
, usesLocationLog = True
}
withKeyOptions (keyOptions o) False withKeyOptions (keyOptions o) False
(\kai -> commandAction . startKey from i kai =<< getNumCopies) (\kai -> commandAction . startKey from i kai =<< getNumCopies)
(withFilesInGit ww $ commandAction . (whenAnnexed (start from i))) (withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (fsckFiles o) =<< workTreeItems ww (fsckFiles o)
cleanupIncremental i cleanupIncremental i
void $ tryIO $ recordActivity Fsck u void $ tryIO $ recordActivity Fsck u

View file

@ -41,12 +41,17 @@ seek :: GetOptions -> CommandSeek
seek o = startConcurrency downloadStages $ do seek o = startConcurrency downloadStages $ do
from <- maybe (pure Nothing) (Just <$$> getParsed) (getFrom o) from <- maybe (pure Nothing) (Just <$$> getParsed) (getFrom o)
let go = start o from let go = start o from
let seeker = AnnexedFileSeeker
{ seekAction = commandAction' go
, checkContentPresent = Just False
, usesLocationLog = True
}
case batchOption o of case batchOption o of
Batch fmt -> batchFilesMatching fmt Batch fmt -> batchFilesMatching fmt
(whenAnnexed go . toRawFilePath) (whenAnnexed go . toRawFilePath)
NoBatch -> withKeyOptions (keyOptions o) (autoMode o) NoBatch -> withKeyOptions (keyOptions o) (autoMode o)
(commandAction . startKeys from) (commandAction . startKeys from)
(withFilesInGitAnnex ww (commandAction' go)) (withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (getFiles o) =<< workTreeItems ww (getFiles o)
where where
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles

View file

@ -38,8 +38,12 @@ seek o = do
| otherwise -> commandAction stop | otherwise -> commandAction stop
_ -> do _ -> do
let s = S.fromList ts let s = S.fromList ts
withFilesInGitAnnex ww let seeker = AnnexedFileSeeker
(commandAction' (start s)) { seekAction = commandAction' (start s)
, checkContentPresent = Nothing
, usesLocationLog = False
}
withFilesInGitAnnex ww seeker
=<< workTreeItems ww (inprogressFiles o) =<< workTreeItems ww (inprogressFiles o)
where where
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles

View file

@ -44,8 +44,12 @@ seek :: ListOptions -> CommandSeek
seek o = do seek o = do
list <- getList o list <- getList o
printHeader list printHeader list
withFilesInGitAnnex ww (commandAction' (start list)) let seeker = AnnexedFileSeeker
=<< workTreeItems ww (listThese o) { seekAction = commandAction' (start list)
, checkContentPresent = Nothing
, usesLocationLog = True
}
withFilesInGitAnnex ww seeker =<< workTreeItems ww (listThese o)
where where
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles

View file

@ -28,11 +28,14 @@ cmd = withGlobalOptions [jsonOptions, annexedMatchingOptions] $
paramPaths (withParams seek) paramPaths (withParams seek)
seek :: CmdParams -> CommandSeek seek :: CmdParams -> CommandSeek
seek ps = do seek ps = withFilesInGitAnnex ww seeker =<< workTreeItems ww ps
l <- workTreeItems ww ps
withFilesInGitAnnex ww (commandAction' start) l
where where
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles
seeker = AnnexedFileSeeker
{ seekAction = commandAction' start
, checkContentPresent = Nothing
, usesLocationLog = False
}
start :: RawFilePath -> Key -> CommandStart start :: RawFilePath -> Key -> CommandStart
start file key = ifM (isJust <$> isAnnexLink file) start file key = ifM (isJust <$> isAnnexLink file)

View file

@ -85,9 +85,15 @@ seek o = do
m <- Remote.uuidDescriptions m <- Remote.uuidDescriptions
zone <- liftIO getCurrentTimeZone zone <- liftIO getCurrentTimeZone
let outputter = mkOutputter m zone o let outputter = mkOutputter m zone o
let seeker = AnnexedFileSeeker
{ seekAction = commandAction' (start o outputter)
, checkContentPresent = Nothing
-- the way this uses the location log would not be helped
-- by precaching the current value
, usesLocationLog = False
}
case (logFiles o, allOption o) of case (logFiles o, allOption o) of
(fs, False) -> withFilesInGitAnnex ww (fs, False) -> withFilesInGitAnnex ww seeker
(commandAction' (start o outputter))
=<< workTreeItems ww fs =<< workTreeItems ww fs
([], True) -> commandAction (startAll o outputter) ([], True) -> commandAction (startAll o outputter)
(_, True) -> giveup "Cannot specify both files and --all" (_, True) -> giveup "Cannot specify both files and --all"

View file

@ -76,14 +76,19 @@ seek o = case batchOption o of
NoBatch -> do NoBatch -> do
c <- liftIO currentVectorClock c <- liftIO currentVectorClock
let ww = WarnUnmatchLsFiles let ww = WarnUnmatchLsFiles
let seeker = case getSet o of let seeker = AnnexedFileSeeker
{ seekAction = commandAction' (start c o)
, checkContentPresent = Nothing
, usesLocationLog = False
}
let seekaction = case getSet o of
Get _ -> withFilesInGitAnnex ww Get _ -> withFilesInGitAnnex ww
GetAll -> withFilesInGitAnnex ww GetAll -> withFilesInGitAnnex ww
Set _ -> withFilesInGitAnnexNonRecursive ww Set _ -> withFilesInGitAnnexNonRecursive ww
"Not recursively setting metadata. Use --force to do that." "Not recursively setting metadata. Use --force to do that."
withKeyOptions (keyOptions o) False withKeyOptions (keyOptions o) False
(commandAction . startKeys c o) (commandAction . startKeys c o)
(seeker (commandAction' (start c o))) (seekaction seeker)
=<< workTreeItems ww (forFiles o) =<< workTreeItems ww (forFiles o)
Batch fmt -> withMessageState $ \s -> case outputType s of Batch fmt -> withMessageState $ \s -> case outputType s of
JSONOutput _ -> ifM limited JSONOutput _ -> ifM limited

View file

@ -26,10 +26,14 @@ cmd = withGlobalOptions [annexedMatchingOptions] $
paramPaths (withParams seek) paramPaths (withParams seek)
seek :: CmdParams -> CommandSeek seek :: CmdParams -> CommandSeek
seek = withFilesInGitAnnex ww (commandAction' start) seek = withFilesInGitAnnex ww seeker <=< workTreeItems ww
<=< workTreeItems ww
where where
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles
seeker = AnnexedFileSeeker
{ seekAction = commandAction' start
, checkContentPresent = Nothing
, usesLocationLog = False
}
start :: RawFilePath -> Key -> CommandStart start :: RawFilePath -> Key -> CommandStart
start file key = do start file key = do

View file

@ -44,13 +44,18 @@ seek :: MirrorOptions -> CommandSeek
seek o = startConcurrency stages $ seek o = startConcurrency stages $
withKeyOptions (keyOptions o) False withKeyOptions (keyOptions o) False
(commandAction . startKey o (AssociatedFile Nothing)) (commandAction . startKey o (AssociatedFile Nothing))
(withFilesInGitAnnex ww (commandAction' (start o))) (withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (mirrorFiles o) =<< workTreeItems ww (mirrorFiles o)
where where
stages = case fromToOptions o of stages = case fromToOptions o of
FromRemote _ -> downloadStages FromRemote _ -> downloadStages
ToRemote _ -> commandStages ToRemote _ -> commandStages
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles
seeker = AnnexedFileSeeker
{ seekAction = commandAction' (start o)
, checkContentPresent = Nothing
, usesLocationLog = False
}
start :: MirrorOptions -> RawFilePath -> Key -> CommandStart start :: MirrorOptions -> RawFilePath -> Key -> CommandStart
start o file k = startKey o afile (k, ai) start o file k = startKey o afile (k, ai)

View file

@ -56,12 +56,17 @@ data RemoveWhen = RemoveSafe | RemoveNever
seek :: MoveOptions -> CommandSeek seek :: MoveOptions -> CommandSeek
seek o = startConcurrency stages $ do seek o = startConcurrency stages $ do
let go = start (fromToOptions o) (removeWhen o) let go = start (fromToOptions o) (removeWhen o)
let seeker = AnnexedFileSeeker
{ seekAction = commandAction' go
, checkContentPresent = Nothing
, usesLocationLog = False
}
case batchOption o of case batchOption o of
Batch fmt -> batchFilesMatching fmt Batch fmt -> batchFilesMatching fmt
(whenAnnexed go . toRawFilePath) (whenAnnexed go . toRawFilePath)
NoBatch -> withKeyOptions (keyOptions o) False NoBatch -> withKeyOptions (keyOptions o) False
(commandAction . startKey (fromToOptions o) (removeWhen o)) (commandAction . startKey (fromToOptions o) (removeWhen o))
(withFilesInGitAnnex ww (commandAction' go)) (withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (moveFiles o) =<< workTreeItems ww (moveFiles o)
where where
stages = case fromToOptions o of stages = case fromToOptions o of

View file

@ -652,8 +652,13 @@ seekSyncContent o rs currbranch = do
waitForAllRunningCommandActions waitForAllRunningCommandActions
liftIO $ not <$> isEmptyMVar mvar liftIO $ not <$> isEmptyMVar mvar
where where
seekworktree mvar l bloomfeeder = seekworktree mvar l bloomfeeder = do
seekFilteredKeys (gofile bloomfeeder mvar) $ let seeker = AnnexedFileSeeker
{ seekAction = gofile bloomfeeder mvar
, checkContentPresent = Nothing
, usesLocationLog = True
}
seekFilteredKeys seeker $
seekHelper fst3 ww LsFiles.inRepoDetails l seekHelper fst3 ww LsFiles.inRepoDetails l
seekincludinghidden origbranch mvar l bloomfeeder = seekincludinghidden origbranch mvar l bloomfeeder =

View file

@ -23,13 +23,19 @@ cmd = withGlobalOptions [annexedMatchingOptions] $
paramPaths (withParams seek) paramPaths (withParams seek)
seek :: CmdParams -> CommandSeek seek :: CmdParams -> CommandSeek
seek ps = (withFilesInGitAnnex ww (commandAction' start)) seek ps = withFilesInGitAnnex ww seeker =<< workTreeItems ww ps
=<< workTreeItems ww ps
where where
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles
seeker :: AnnexedFileSeeker
seeker = AnnexedFileSeeker
{ seekAction = commandAction' start
, checkContentPresent = Nothing
, usesLocationLog = False
}
start :: RawFilePath -> Key -> CommandStart start :: RawFilePath -> Key -> CommandStart
start file key = stopUnless (inAnnex key) $ start file key =
starting "unannex" (mkActionItem (key, file)) $ starting "unannex" (mkActionItem (key, file)) $
perform file key perform file key

View file

@ -44,7 +44,7 @@ seek ps = do
l <- workTreeItems ww ps l <- workTreeItems ww ps
withFilesNotInGit (commandAction . whenAnnexed (startCheckIncomplete . fromRawFilePath)) l withFilesNotInGit (commandAction . whenAnnexed (startCheckIncomplete . fromRawFilePath)) l
Annex.changeState $ \s -> s { Annex.fast = True } Annex.changeState $ \s -> s { Annex.fast = True }
withFilesInGitAnnex ww (commandAction' Command.Unannex.start) l withFilesInGitAnnex ww Command.Unannex.seeker l
finish finish
where where
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles

View file

@ -27,10 +27,14 @@ mkcmd n d = withGlobalOptions [jsonOptions, annexedMatchingOptions] $
command n SectionCommon d paramPaths (withParams seek) command n SectionCommon d paramPaths (withParams seek)
seek :: CmdParams -> CommandSeek seek :: CmdParams -> CommandSeek
seek ps = withFilesInGitAnnex ww (commandAction' start) seek ps = withFilesInGitAnnex ww seeker =<< workTreeItems ww ps
=<< workTreeItems ww ps
where where
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles
seeker = AnnexedFileSeeker
{ seekAction = commandAction' start
, checkContentPresent = Nothing
, usesLocationLog = False
}
start :: RawFilePath -> Key -> CommandStart start :: RawFilePath -> Key -> CommandStart
start file key = ifM (isJust <$> isAnnexLink file) start file key = ifM (isJust <$> isAnnexLink file)

View file

@ -55,10 +55,15 @@ seek o = do
case batchOption o of case batchOption o of
Batch fmt -> batchFilesMatching fmt Batch fmt -> batchFilesMatching fmt
(whenAnnexed go . toRawFilePath) (whenAnnexed go . toRawFilePath)
NoBatch -> NoBatch -> do
let seeker = AnnexedFileSeeker
{ seekAction = commandAction' go
, checkContentPresent = Nothing
, usesLocationLog = True
}
withKeyOptions (keyOptions o) False withKeyOptions (keyOptions o) False
(commandAction . startKeys o m) (commandAction . startKeys o m)
(withFilesInGitAnnex ww (commandAction' go)) (withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (whereisFiles o) =<< workTreeItems ww (whereisFiles o)
where where
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles

View file

@ -25,6 +25,22 @@ and precache them.
> > > >
> > So, this needs some more work, but is promising. > > So, this needs some more work, but is promising.
> > > Second try at this, results:
> > >
> > > * `get` in a full repo is not any slower. And presumably in an
> > > empty repo, `get` is faster, but I didn't try it and the transfers
> > > will dominate that anyway
> > > * `sync --content` 2x speedup!
> > > * `fsck --fast` 1.5x speedup
> > > * `whereis` 1.5x speedup
> > >
> > > Still todo:
> > >
> > > * move, copy, drop, and mirror were left not using the location log caching yet
> > > * get is left with an unncessary inAnnex check so could be sped up
> > > a little bit more
> > >
Another thing that the same cat-file --buffer approach could be used with Another thing that the same cat-file --buffer approach could be used with
is to cat the annex links. Git.LsFiles.inRepoDetails provides the Sha is to cat the annex links. Git.LsFiles.inRepoDetails provides the Sha
of file contents, which can be fed through cat-file --buffer to get keys. of file contents, which can be fed through cat-file --buffer to get keys.