wip RawFilePath 2x git-annex find speedup

Finally builds (oh the agoncy of making it build), but still very
unmergable, only Command.Find is included and lots of stuff is badly
hacked to make it compile.

Benchmarking vs master, this git-annex find is significantly faster!
Specifically:

	num files	old	new	speedup
	48500		4.77	3.73	28%
	12500		1.36	1.02	66%
	20		0.075	0.074	0% (so startup time is unchanged)

That's without really finishing the optimization. Things still to do:

* Eliminate all the fromRawFilePath, toRawFilePath, encodeBS,
  decodeBS conversions.
* Use versions of IO actions like getFileStatus that take a RawFilePath.
* Eliminate some Data.ByteString.Lazy.toStrict, which is a slow copy.
* Use ByteString for parsing git config to speed up startup.

It's likely several of those will speed up git-annex find further.
And other commands will certianly benefit even more.
This commit is contained in:
Joey Hess 2019-11-26 15:27:22 -04:00
parent 6a97ff6b3a
commit 067aabdd48
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
61 changed files with 380 additions and 296 deletions

View file

@ -33,35 +33,35 @@ import Config
- When in an adjusted branch that may have hidden the file, looks for a
- pointer to a key in the original branch.
-}
lookupFile :: FilePath -> Annex (Maybe Key)
lookupFile :: RawFilePath -> Annex (Maybe Key)
lookupFile = lookupFile' catkeyfile
where
catkeyfile file =
ifM (liftIO $ doesFileExist file)
ifM (liftIO $ doesFileExist $ fromRawFilePath file)
( catKeyFile file
, catKeyFileHidden file =<< getCurrentBranch
)
lookupFileNotHidden :: FilePath -> Annex (Maybe Key)
lookupFileNotHidden :: RawFilePath -> Annex (Maybe Key)
lookupFileNotHidden = lookupFile' catkeyfile
where
catkeyfile file =
ifM (liftIO $ doesFileExist file)
ifM (liftIO $ doesFileExist $ fromRawFilePath file)
( catKeyFile file
, return Nothing
)
lookupFile' :: (FilePath -> Annex (Maybe Key)) -> FilePath -> Annex (Maybe Key)
lookupFile' :: (RawFilePath -> Annex (Maybe Key)) -> RawFilePath -> Annex (Maybe Key)
lookupFile' catkeyfile file = isAnnexLink file >>= \case
Just key -> return (Just key)
Nothing -> catkeyfile file
{- Modifies an action to only act on files that are already annexed,
- and passes the key on to it. -}
whenAnnexed :: (FilePath -> Key -> Annex (Maybe a)) -> FilePath -> Annex (Maybe a)
whenAnnexed :: (RawFilePath -> Key -> Annex (Maybe a)) -> RawFilePath -> Annex (Maybe a)
whenAnnexed a file = ifAnnexed file (a file) (return Nothing)
ifAnnexed :: FilePath -> (Key -> Annex a) -> Annex a -> Annex a
ifAnnexed :: RawFilePath -> (Key -> Annex a) -> Annex a -> Annex a
ifAnnexed file yes no = maybe no yes =<< lookupFile file
{- Find all unlocked files and update the keys database for them.
@ -96,7 +96,7 @@ scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $
liftIO . Database.Keys.SQL.addAssociatedFileFast (toIKey k) tf
whenM (inAnnex k) $ do
f <- fromRepo $ fromTopFilePath tf
liftIO (isPointerFile f) >>= \case
liftIO (isPointerFile (toRawFilePath f)) >>= \case
Just k' | k' == k -> do
destmode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus f
ic <- replaceFile f $ \tmp ->
@ -105,7 +105,7 @@ scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $
withTSDelta (liftIO . genInodeCache tmp)
LinkAnnexNoop -> return Nothing
LinkAnnexFailed -> liftIO $ do
writePointerFile tmp k destmode
writePointerFile (toRawFilePath tmp) k destmode
return Nothing
maybe noop (restagePointerFile (Restage True) f) ic
maybe noop (restagePointerFile (Restage True) (toRawFilePath f)) ic
_ -> noop