wip RawFilePath 2x git-annex find speedup
Finally builds (oh the agoncy of making it build), but still very unmergable, only Command.Find is included and lots of stuff is badly hacked to make it compile. Benchmarking vs master, this git-annex find is significantly faster! Specifically: num files old new speedup 48500 4.77 3.73 28% 12500 1.36 1.02 66% 20 0.075 0.074 0% (so startup time is unchanged) That's without really finishing the optimization. Things still to do: * Eliminate all the fromRawFilePath, toRawFilePath, encodeBS, decodeBS conversions. * Use versions of IO actions like getFileStatus that take a RawFilePath. * Eliminate some Data.ByteString.Lazy.toStrict, which is a slow copy. * Use ByteString for parsing git config to speed up startup. It's likely several of those will speed up git-annex find further. And other commands will certianly benefit even more.
This commit is contained in:
parent
6a97ff6b3a
commit
067aabdd48
61 changed files with 380 additions and 296 deletions
|
@ -145,7 +145,7 @@ updateFromLog db (oldtree, currtree) = do
|
|||
recordAnnexBranchTree db currtree
|
||||
flushDbQueue db
|
||||
where
|
||||
go ti = case extLogFileKey remoteContentIdentifierExt (getTopFilePath (DiffTree.file ti)) of
|
||||
go ti = case extLogFileKey remoteContentIdentifierExt (toRawFilePath (getTopFilePath (DiffTree.file ti))) of
|
||||
Nothing -> return ()
|
||||
Just k -> do
|
||||
l <- Log.getContentIdentifiers k
|
||||
|
|
|
@ -128,28 +128,28 @@ addExportedLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
|
|||
addExportedLocation h k el = queueDb h $ do
|
||||
void $ insertUnique $ Exported ik ef
|
||||
let edirs = map
|
||||
(\ed -> ExportedDirectory (toSFilePath (fromExportDirectory ed)) ef)
|
||||
(\ed -> ExportedDirectory (toSFilePath (fromRawFilePath (fromExportDirectory ed))) ef)
|
||||
(exportDirectories el)
|
||||
putMany edirs
|
||||
where
|
||||
ik = toIKey k
|
||||
ef = toSFilePath (fromExportLocation el)
|
||||
ef = toSFilePath $ fromRawFilePath $ fromExportLocation el
|
||||
|
||||
removeExportedLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
|
||||
removeExportedLocation h k el = queueDb h $ do
|
||||
deleteWhere [ExportedKey ==. ik, ExportedFile ==. ef]
|
||||
let subdirs = map (toSFilePath . fromExportDirectory)
|
||||
let subdirs = map (toSFilePath . fromRawFilePath . fromExportDirectory)
|
||||
(exportDirectories el)
|
||||
deleteWhere [ExportedDirectoryFile ==. ef, ExportedDirectorySubdir <-. subdirs]
|
||||
where
|
||||
ik = toIKey k
|
||||
ef = toSFilePath (fromExportLocation el)
|
||||
ef = toSFilePath $ fromRawFilePath $ fromExportLocation el
|
||||
|
||||
{- Note that this does not see recently queued changes. -}
|
||||
getExportedLocation :: ExportHandle -> Key -> IO [ExportLocation]
|
||||
getExportedLocation (ExportHandle h _) k = H.queryDbQueue h $ do
|
||||
l <- selectList [ExportedKey ==. ik] []
|
||||
return $ map (mkExportLocation . fromSFilePath . exportedFile . entityVal) l
|
||||
return $ map (mkExportLocation . toRawFilePath . fromSFilePath . exportedFile . entityVal) l
|
||||
where
|
||||
ik = toIKey k
|
||||
|
||||
|
@ -159,13 +159,13 @@ isExportDirectoryEmpty (ExportHandle h _) d = H.queryDbQueue h $ do
|
|||
l <- selectList [ExportedDirectorySubdir ==. ed] []
|
||||
return $ null l
|
||||
where
|
||||
ed = toSFilePath $ fromExportDirectory d
|
||||
ed = toSFilePath $ fromRawFilePath $ fromExportDirectory d
|
||||
|
||||
{- Get locations in the export that might contain a key. -}
|
||||
getExportTree :: ExportHandle -> Key -> IO [ExportLocation]
|
||||
getExportTree (ExportHandle h _) k = H.queryDbQueue h $ do
|
||||
l <- selectList [ExportTreeKey ==. ik] []
|
||||
return $ map (mkExportLocation . fromSFilePath . exportTreeFile . entityVal) l
|
||||
return $ map (mkExportLocation . toRawFilePath . fromSFilePath . exportTreeFile . entityVal) l
|
||||
where
|
||||
ik = toIKey k
|
||||
|
||||
|
@ -181,21 +181,21 @@ getExportTreeKey (ExportHandle h _) el = H.queryDbQueue h $ do
|
|||
map (fromIKey . exportTreeKey . entityVal)
|
||||
<$> selectList [ExportTreeFile ==. ef] []
|
||||
where
|
||||
ef = toSFilePath (fromExportLocation el)
|
||||
ef = toSFilePath (fromRawFilePath $ fromExportLocation el)
|
||||
|
||||
addExportTree :: ExportHandle -> Key -> ExportLocation -> IO ()
|
||||
addExportTree h k loc = queueDb h $
|
||||
void $ insertUnique $ ExportTree ik ef
|
||||
where
|
||||
ik = toIKey k
|
||||
ef = toSFilePath (fromExportLocation loc)
|
||||
ef = toSFilePath (fromRawFilePath $ fromExportLocation loc)
|
||||
|
||||
removeExportTree :: ExportHandle -> Key -> ExportLocation -> IO ()
|
||||
removeExportTree h k loc = queueDb h $
|
||||
deleteWhere [ExportTreeKey ==. ik, ExportTreeFile ==. ef]
|
||||
where
|
||||
ik = toIKey k
|
||||
ef = toSFilePath (fromExportLocation loc)
|
||||
ef = toSFilePath (fromRawFilePath $ fromExportLocation loc)
|
||||
|
||||
-- An action that is passed the old and new values that were exported,
|
||||
-- and updates state.
|
||||
|
@ -220,7 +220,7 @@ mkExportDiffUpdater removeold addnew h srcek dstek i = do
|
|||
Nothing -> return ()
|
||||
Just k -> liftIO $ addnew h (asKey k) loc
|
||||
where
|
||||
loc = mkExportLocation $ getTopFilePath $ Git.DiffTree.file i
|
||||
loc = mkExportLocation $ toRawFilePath $ getTopFilePath $ Git.DiffTree.file i
|
||||
|
||||
runExportDiffUpdater :: ExportDiffUpdater -> ExportHandle -> Sha -> Sha -> Annex ()
|
||||
runExportDiffUpdater updater h old new = do
|
||||
|
|
|
@ -235,7 +235,7 @@ reconcileStaged qh = do
|
|||
where
|
||||
go cur indexcache = do
|
||||
(l, cleanup) <- inRepo $ pipeNullSplit diff
|
||||
changed <- procdiff l False
|
||||
changed <- procdiff (map decodeBL' l) False
|
||||
void $ liftIO cleanup
|
||||
-- Flush database changes immediately
|
||||
-- so other processes can see them.
|
||||
|
@ -262,7 +262,8 @@ reconcileStaged qh = do
|
|||
-- perfect. A file could start with this and not be a
|
||||
-- pointer file. And a pointer file that is replaced with
|
||||
-- a non-pointer file will match this.
|
||||
, Param $ "-G^" ++ toInternalGitPath (pathSeparator:objectDir)
|
||||
, Param $ "-G^" ++ fromRawFilePath (toInternalGitPath $
|
||||
toRawFilePath (pathSeparator:objectDir))
|
||||
-- Don't include files that were deleted, because this only
|
||||
-- wants to update information for files that are present
|
||||
-- in the index.
|
||||
|
@ -277,7 +278,7 @@ reconcileStaged qh = do
|
|||
procdiff (info:file:rest) changed = case words info of
|
||||
((':':_srcmode):dstmode:_srcsha:dstsha:_change:[])
|
||||
-- Only want files, not symlinks
|
||||
| dstmode /= fmtTreeItemType TreeSymlink -> do
|
||||
| dstmode /= decodeBS' (fmtTreeItemType TreeSymlink) -> do
|
||||
maybe noop (reconcile (asTopFilePath file))
|
||||
=<< catKey (Ref dstsha)
|
||||
procdiff rest True
|
||||
|
@ -293,11 +294,11 @@ reconcileStaged qh = do
|
|||
caches <- liftIO $ SQL.getInodeCaches ikey (SQL.ReadHandle qh)
|
||||
keyloc <- calcRepo (gitAnnexLocation key)
|
||||
keypopulated <- sameInodeCache keyloc caches
|
||||
p <- fromRepo $ fromTopFilePath file
|
||||
filepopulated <- sameInodeCache p caches
|
||||
p <- fromRepo $ toRawFilePath . fromTopFilePath file
|
||||
filepopulated <- sameInodeCache (fromRawFilePath p) caches
|
||||
case (keypopulated, filepopulated) of
|
||||
(True, False) ->
|
||||
populatePointerFile (Restage True) key keyloc p >>= \case
|
||||
populatePointerFile (Restage True) key (toRawFilePath keyloc) p >>= \case
|
||||
Nothing -> return ()
|
||||
Just ic -> liftIO $
|
||||
SQL.addInodeCaches ikey [ic] (SQL.WriteHandle qh)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue