wip RawFilePath 2x git-annex find speedup

Finally builds (oh the agoncy of making it build), but still very
unmergable, only Command.Find is included and lots of stuff is badly
hacked to make it compile.

Benchmarking vs master, this git-annex find is significantly faster!
Specifically:

	num files	old	new	speedup
	48500		4.77	3.73	28%
	12500		1.36	1.02	66%
	20		0.075	0.074	0% (so startup time is unchanged)

That's without really finishing the optimization. Things still to do:

* Eliminate all the fromRawFilePath, toRawFilePath, encodeBS,
  decodeBS conversions.
* Use versions of IO actions like getFileStatus that take a RawFilePath.
* Eliminate some Data.ByteString.Lazy.toStrict, which is a slow copy.
* Use ByteString for parsing git config to speed up startup.

It's likely several of those will speed up git-annex find further.
And other commands will certianly benefit even more.
This commit is contained in:
Joey Hess 2019-11-26 15:27:22 -04:00
parent 6a97ff6b3a
commit 067aabdd48
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
61 changed files with 380 additions and 296 deletions

View file

@ -295,18 +295,18 @@ renameExportM d _k oldloc newloc = liftIO $ Just <$> go
dest = exportPath d newloc
exportPath :: FilePath -> ExportLocation -> FilePath
exportPath d loc = d </> fromExportLocation loc
exportPath d loc = d </> fromRawFilePath (fromExportLocation loc)
{- Removes the ExportLocation's parent directory and its parents, so long as
- they're empty, up to but not including the topdir. -}
removeExportLocation :: FilePath -> ExportLocation -> IO ()
removeExportLocation topdir loc =
go (Just $ takeDirectory $ fromExportLocation loc) (Right ())
go (Just $ takeDirectory $ fromRawFilePath $ fromExportLocation loc) (Right ())
where
go _ (Left _e) = return ()
go Nothing _ = return ()
go (Just loc') _ = go (upFrom loc')
=<< tryIO (removeDirectory $ exportPath topdir (mkExportLocation loc'))
=<< tryIO (removeDirectory $ exportPath topdir (mkExportLocation (toRawFilePath loc')))
listImportableContentsM :: FilePath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsM dir = catchMaybeIO $ liftIO $ do
@ -319,7 +319,7 @@ listImportableContentsM dir = catchMaybeIO $ liftIO $ do
mkContentIdentifier f st >>= \case
Nothing -> return Nothing
Just cid -> do
relf <- relPathDirToFile dir f
relf <- toRawFilePath <$> relPathDirToFile dir f
sz <- getFileSize' f st
return $ Just (mkImportLocation relf, (cid, sz))

View file

@ -549,7 +549,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _) key file dest meter
u <- getUUID
let AssociatedFile afile = file
let fields = (Fields.remoteUUID, fromUUID u)
: maybe [] (\f -> [(Fields.associatedFile, f)]) afile
: maybe [] (\f -> [(Fields.associatedFile, fromRawFilePath f)]) afile
Just (cmd, params) <- Ssh.git_annex_shell ConsumeStdin
repo "transferinfo"
[Param $ serializeKey key] fields

View file

@ -137,7 +137,7 @@ rsyncParamsRemote unlocked r direction key file (AssociatedFile afile) = do
-- Send direct field for unlocked content, for backwards
-- compatability.
: (Fields.direct, if unlocked then "1" else "")
: maybe [] (\f -> [(Fields.associatedFile, f)]) afile
: maybe [] (\f -> [(Fields.associatedFile, fromRawFilePath f)]) afile
repo <- getRepo r
Just (shellcmd, shellparams) <- git_annex_shell ConsumeStdin repo
(if direction == Download then "sendkey" else "recvkey")

View file

@ -24,6 +24,7 @@ import qualified Git
import qualified Git.Config
import qualified Remote.Git
{-
import qualified Remote.GCrypt
import qualified Remote.P2P
#ifdef WITH_S3
@ -44,10 +45,12 @@ import qualified Remote.Ddar
import qualified Remote.GitLFS
import qualified Remote.Hook
import qualified Remote.External
-}
remoteTypes :: [RemoteType]
remoteTypes = map adjustExportImportRemoteType
[ Remote.Git.remote
{-
, Remote.GCrypt.remote
, Remote.P2P.remote
#ifdef WITH_S3
@ -68,6 +71,7 @@ remoteTypes = map adjustExportImportRemoteType
, Remote.GitLFS.remote
, Remote.Hook.remote
, Remote.External.remote
-}
]
{- Builds a list of all available Remotes.
@ -129,7 +133,9 @@ updateRemote remote = do
gitSyncableRemote :: Remote -> Bool
gitSyncableRemote r = remotetype r `elem`
[ Remote.Git.remote
{-
, Remote.GCrypt.remote
, Remote.P2P.remote
, Remote.GitLFS.remote
-}
]

View file

@ -268,22 +268,22 @@ storeExportM :: RsyncOpts -> FilePath -> Key -> ExportLocation -> MeterUpdate ->
storeExportM o src _k loc meterupdate =
storeGeneric o meterupdate basedest populatedest
where
basedest = fromExportLocation loc
basedest = fromRawFilePath (fromExportLocation loc)
populatedest = liftIO . createLinkOrCopy src
retrieveExportM :: RsyncOpts -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Bool
retrieveExportM o _k loc dest p = rsyncRetrieve o [rsyncurl] dest (Just p)
where
rsyncurl = mkRsyncUrl o (fromExportLocation loc)
rsyncurl = mkRsyncUrl o (fromRawFilePath (fromExportLocation loc))
checkPresentExportM :: RsyncOpts -> Key -> ExportLocation -> Annex Bool
checkPresentExportM o _k loc = checkPresentGeneric o [rsyncurl]
where
rsyncurl = mkRsyncUrl o (fromExportLocation loc)
rsyncurl = mkRsyncUrl o (fromRawFilePath (fromExportLocation loc))
removeExportM :: RsyncOpts -> Key -> ExportLocation -> Annex Bool
removeExportM o _k loc =
removeGeneric o (includes (fromExportLocation loc))
removeGeneric o $ includes $ fromRawFilePath $ fromExportLocation loc
where
includes f = f : case upFrom f of
Nothing -> []
@ -292,7 +292,7 @@ removeExportM o _k loc =
removeExportDirectoryM :: RsyncOpts -> ExportDirectory -> Annex Bool
removeExportDirectoryM o ed = removeGeneric o (allbelow d : includes d)
where
d = fromExportDirectory ed
d = fromRawFilePath $ fromExportDirectory ed
allbelow f = f </> "***"
includes f = f : case upFrom f of
Nothing -> []