wip RawFilePath 2x git-annex find speedup

Finally builds (oh the agoncy of making it build), but still very
unmergable, only Command.Find is included and lots of stuff is badly
hacked to make it compile.

Benchmarking vs master, this git-annex find is significantly faster!
Specifically:

	num files	old	new	speedup
	48500		4.77	3.73	28%
	12500		1.36	1.02	66%
	20		0.075	0.074	0% (so startup time is unchanged)

That's without really finishing the optimization. Things still to do:

* Eliminate all the fromRawFilePath, toRawFilePath, encodeBS,
  decodeBS conversions.
* Use versions of IO actions like getFileStatus that take a RawFilePath.
* Eliminate some Data.ByteString.Lazy.toStrict, which is a slow copy.
* Use ByteString for parsing git config to speed up startup.

It's likely several of those will speed up git-annex find further.
And other commands will certianly benefit even more.
This commit is contained in:
Joey Hess 2019-11-26 15:27:22 -04:00
parent 6a97ff6b3a
commit 067aabdd48
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
61 changed files with 380 additions and 296 deletions

View file

@ -97,7 +97,7 @@ matchGlobFile glob = go
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
go (MatchingInfo p) = matchGlob cglob <$> getInfo (providedFilePath p)
go (MatchingKey _ (AssociatedFile Nothing)) = pure False
go (MatchingKey _ (AssociatedFile (Just af))) = pure $ matchGlob cglob af
go (MatchingKey _ (AssociatedFile (Just af))) = pure $ matchGlob cglob (fromRawFilePath af)
addMimeType :: String -> Annex ()
addMimeType = addMagicLimit "mimetype" getMagicMimeType providedMimeType
@ -110,13 +110,13 @@ addMagicLimit limitname querymagic selectprovidedinfo glob = do
magic <- liftIO initMagicMime
addLimit $ matchMagic limitname querymagic' selectprovidedinfo magic glob
where
querymagic' magic f = liftIO (isPointerFile f) >>= \case
querymagic' magic f = liftIO (isPointerFile (toRawFilePath f)) >>= \case
-- Avoid getting magic of a pointer file, which would
-- wrongly be detected as text.
Just _ -> return Nothing
-- When the file is an annex symlink, get magic of the
-- object file.
Nothing -> isAnnexLink f >>= \case
Nothing -> isAnnexLink (toRawFilePath f) >>= \case
Just k -> withObjectLoc k $ querymagic magic
Nothing -> querymagic magic f
@ -143,7 +143,7 @@ matchLockStatus :: Bool -> MatchInfo -> Annex Bool
matchLockStatus _ (MatchingKey _ _) = pure False
matchLockStatus _ (MatchingInfo _) = pure False
matchLockStatus wantlocked (MatchingFile fi) = liftIO $ do
islocked <- isPointerFile (currFile fi) >>= \case
islocked <- isPointerFile (toRawFilePath (currFile fi)) >>= \case
Just _key -> return False
Nothing -> isSymbolicLink
<$> getSymbolicLinkStatus (currFile fi)
@ -192,7 +192,7 @@ limitInDir dir = const go
where
go (MatchingFile fi) = checkf $ matchFile fi
go (MatchingKey _ (AssociatedFile Nothing)) = return False
go (MatchingKey _ (AssociatedFile (Just af))) = checkf af
go (MatchingKey _ (AssociatedFile (Just af))) = checkf (fromRawFilePath af)
go (MatchingInfo p) = checkf =<< getInfo (providedFilePath p)
checkf = return . elem dir . splitPath . takeDirectory
@ -368,7 +368,7 @@ addAccessedWithin duration = do
secs = fromIntegral (durationSeconds duration)
lookupFileKey :: FileInfo -> Annex (Maybe Key)
lookupFileKey = lookupFile . currFile
lookupFileKey = lookupFile . toRawFilePath . currFile
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a