wip RawFilePath 2x git-annex find speedup
Finally builds (oh the agoncy of making it build), but still very unmergable, only Command.Find is included and lots of stuff is badly hacked to make it compile. Benchmarking vs master, this git-annex find is significantly faster! Specifically: num files old new speedup 48500 4.77 3.73 28% 12500 1.36 1.02 66% 20 0.075 0.074 0% (so startup time is unchanged) That's without really finishing the optimization. Things still to do: * Eliminate all the fromRawFilePath, toRawFilePath, encodeBS, decodeBS conversions. * Use versions of IO actions like getFileStatus that take a RawFilePath. * Eliminate some Data.ByteString.Lazy.toStrict, which is a slow copy. * Use ByteString for parsing git config to speed up startup. It's likely several of those will speed up git-annex find further. And other commands will certianly benefit even more.
This commit is contained in:
parent
6a97ff6b3a
commit
067aabdd48
61 changed files with 380 additions and 296 deletions
|
@ -37,6 +37,8 @@ import qualified Data.ByteString.Lazy as L
|
|||
import qualified Data.Attoparsec.ByteString.Lazy as A
|
||||
import qualified Data.Attoparsec.ByteString.Char8 as A8
|
||||
import Data.ByteString.Builder
|
||||
import Data.Either
|
||||
import Data.Char
|
||||
|
||||
-- This constuctor is not itself exported to other modules, to enforce
|
||||
-- consistent use of exportedTreeishes.
|
||||
|
@ -176,8 +178,9 @@ logExportExcluded u a = do
|
|||
getExportExcluded :: UUID -> Annex [Git.Tree.TreeItem]
|
||||
getExportExcluded u = do
|
||||
logf <- fromRepo $ gitAnnexExportExcludeLog u
|
||||
liftIO $ catchDefaultIO [] $
|
||||
(map parser . lines)
|
||||
<$> readFile logf
|
||||
liftIO $ catchDefaultIO [] $ parser <$> L.readFile logf
|
||||
where
|
||||
parser = Git.Tree.lsTreeItemToTreeItem . Git.LsTree.parseLsTree
|
||||
parser = map Git.Tree.lsTreeItemToTreeItem
|
||||
. rights
|
||||
. map Git.LsTree.parseLsTree
|
||||
. L.split (fromIntegral $ ord '\n')
|
||||
|
|
|
@ -71,7 +71,7 @@ loggedLocationsHistorical = getLoggedLocations . historicalLogInfo
|
|||
loggedLocationsRef :: Ref -> Annex [UUID]
|
||||
loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref
|
||||
|
||||
getLoggedLocations :: (FilePath -> Annex [LogInfo]) -> Key -> Annex [UUID]
|
||||
getLoggedLocations :: (RawFilePath -> Annex [LogInfo]) -> Key -> Annex [UUID]
|
||||
getLoggedLocations getter key = do
|
||||
config <- Annex.getGitConfig
|
||||
map (toUUID . fromLogInfo) <$> getter (locationLogFile config key)
|
||||
|
|
|
@ -57,7 +57,7 @@ import qualified Data.Map as M
|
|||
getCurrentMetaData :: Key -> Annex MetaData
|
||||
getCurrentMetaData = getCurrentMetaData' metaDataLogFile
|
||||
|
||||
getCurrentMetaData' :: (GitConfig -> Key -> FilePath) -> Key -> Annex MetaData
|
||||
getCurrentMetaData' :: (GitConfig -> Key -> RawFilePath) -> Key -> Annex MetaData
|
||||
getCurrentMetaData' getlogfile k = do
|
||||
config <- Annex.getGitConfig
|
||||
ls <- S.toAscList <$> readLog (getlogfile config k)
|
||||
|
@ -95,7 +95,7 @@ getCurrentRemoteMetaData (RemoteStateHandle u) k = extractRemoteMetaData u <$>
|
|||
addMetaData :: Key -> MetaData -> Annex ()
|
||||
addMetaData = addMetaData' metaDataLogFile
|
||||
|
||||
addMetaData' :: (GitConfig -> Key -> FilePath) -> Key -> MetaData -> Annex ()
|
||||
addMetaData' :: (GitConfig -> Key -> RawFilePath) -> Key -> MetaData -> Annex ()
|
||||
addMetaData' getlogfile k metadata =
|
||||
addMetaDataClocked' getlogfile k metadata =<< liftIO currentVectorClock
|
||||
|
||||
|
@ -106,7 +106,7 @@ addMetaData' getlogfile k metadata =
|
|||
addMetaDataClocked :: Key -> MetaData -> VectorClock -> Annex ()
|
||||
addMetaDataClocked = addMetaDataClocked' metaDataLogFile
|
||||
|
||||
addMetaDataClocked' :: (GitConfig -> Key -> FilePath) -> Key -> MetaData -> VectorClock -> Annex ()
|
||||
addMetaDataClocked' :: (GitConfig -> Key -> RawFilePath) -> Key -> MetaData -> VectorClock -> Annex ()
|
||||
addMetaDataClocked' getlogfile k d@(MetaData m) c
|
||||
| d == emptyMetaData = noop
|
||||
| otherwise = do
|
||||
|
@ -151,5 +151,5 @@ copyMetaData oldkey newkey
|
|||
const $ buildLog l
|
||||
return True
|
||||
|
||||
readLog :: FilePath -> Annex (Log MetaData)
|
||||
readLog :: RawFilePath -> Annex (Log MetaData)
|
||||
readLog = parseLog <$$> Annex.Branch.get
|
||||
|
|
|
@ -28,7 +28,7 @@ preferredContentSet = setLog preferredContentLog
|
|||
requiredContentSet :: UUID -> PreferredContentExpression -> Annex ()
|
||||
requiredContentSet = setLog requiredContentLog
|
||||
|
||||
setLog :: FilePath -> UUID -> PreferredContentExpression -> Annex ()
|
||||
setLog :: RawFilePath -> UUID -> PreferredContentExpression -> Annex ()
|
||||
setLog logfile uuid@(UUID _) val = do
|
||||
c <- liftIO currentVectorClock
|
||||
Annex.Branch.change logfile $
|
||||
|
|
|
@ -30,7 +30,7 @@ import Git.Types (RefDate)
|
|||
|
||||
{- Adds a LogLine to the log, removing any LogLines that are obsoleted by
|
||||
- adding it. -}
|
||||
addLog :: FilePath -> LogLine -> Annex ()
|
||||
addLog :: RawFilePath -> LogLine -> Annex ()
|
||||
addLog file line = Annex.Branch.change file $ \b ->
|
||||
buildLog $ compactLog (line : parseLog b)
|
||||
|
||||
|
@ -38,14 +38,14 @@ addLog file line = Annex.Branch.change file $ \b ->
|
|||
- older timestamp, that LogLine is preserved, rather than updating the log
|
||||
- with a newer timestamp.
|
||||
-}
|
||||
maybeAddLog :: FilePath -> LogLine -> Annex ()
|
||||
maybeAddLog :: RawFilePath -> LogLine -> Annex ()
|
||||
maybeAddLog file line = Annex.Branch.maybeChange file $ \s -> do
|
||||
m <- insertNewStatus line $ logMap $ parseLog s
|
||||
return $ buildLog $ mapLog m
|
||||
|
||||
{- Reads a log file.
|
||||
- Note that the LogLines returned may be in any order. -}
|
||||
readLog :: FilePath -> Annex [LogLine]
|
||||
readLog :: RawFilePath -> Annex [LogLine]
|
||||
readLog = parseLog <$$> Annex.Branch.get
|
||||
|
||||
{- Generates a new LogLine with the current time. -}
|
||||
|
@ -55,10 +55,10 @@ logNow s i = do
|
|||
return $ LogLine c s i
|
||||
|
||||
{- Reads a log and returns only the info that is still in effect. -}
|
||||
currentLogInfo :: FilePath -> Annex [LogInfo]
|
||||
currentLogInfo :: RawFilePath -> Annex [LogInfo]
|
||||
currentLogInfo file = map info <$> currentLog file
|
||||
|
||||
currentLog :: FilePath -> Annex [LogLine]
|
||||
currentLog :: RawFilePath -> Annex [LogLine]
|
||||
currentLog file = filterPresent <$> readLog file
|
||||
|
||||
{- Reads a historical version of a log and returns the info that was in
|
||||
|
@ -66,6 +66,6 @@ currentLog file = filterPresent <$> readLog file
|
|||
-
|
||||
- The date is formatted as shown in gitrevisions man page.
|
||||
-}
|
||||
historicalLogInfo :: RefDate -> FilePath -> Annex [LogInfo]
|
||||
historicalLogInfo :: RefDate -> RawFilePath -> Annex [LogInfo]
|
||||
historicalLogInfo refdate file = map info . filterPresent . parseLog
|
||||
<$> Annex.Branch.getHistorical refdate file
|
||||
|
|
|
@ -25,13 +25,13 @@ import Annex.VectorClock
|
|||
|
||||
import qualified Data.Set as S
|
||||
|
||||
readLog :: (Ord v, SingleValueSerializable v) => FilePath -> Annex (Log v)
|
||||
readLog :: (Ord v, SingleValueSerializable v) => RawFilePath -> Annex (Log v)
|
||||
readLog = parseLog <$$> Annex.Branch.get
|
||||
|
||||
getLog :: (Ord v, SingleValueSerializable v) => FilePath -> Annex (Maybe v)
|
||||
getLog :: (Ord v, SingleValueSerializable v) => RawFilePath -> Annex (Maybe v)
|
||||
getLog = newestValue <$$> readLog
|
||||
|
||||
setLog :: (SingleValueSerializable v) => FilePath -> v -> Annex ()
|
||||
setLog :: (SingleValueSerializable v) => RawFilePath -> v -> Annex ()
|
||||
setLog f v = do
|
||||
c <- liftIO currentVectorClock
|
||||
let ent = LogEntry c v
|
||||
|
|
|
@ -31,7 +31,7 @@ describeTransfer :: Transfer -> TransferInfo -> String
|
|||
describeTransfer t info = unwords
|
||||
[ show $ transferDirection t
|
||||
, show $ transferUUID t
|
||||
, actionItemDesc $ ActionItemAssociatedFile
|
||||
, decodeBS' $ actionItemDesc $ ActionItemAssociatedFile
|
||||
(associatedFile info)
|
||||
(transferKey t)
|
||||
, show $ bytesComplete info
|
||||
|
@ -245,7 +245,7 @@ writeTransferInfo info = unlines
|
|||
#endif
|
||||
-- comes last; arbitrary content
|
||||
, let AssociatedFile afile = associatedFile info
|
||||
in fromMaybe "" afile
|
||||
in maybe "" fromRawFilePath afile
|
||||
]
|
||||
|
||||
readTransferInfoFile :: Maybe PID -> FilePath -> IO (Maybe TransferInfo)
|
||||
|
@ -263,7 +263,7 @@ readTransferInfo mpid s = TransferInfo
|
|||
<*> pure Nothing
|
||||
<*> pure Nothing
|
||||
<*> bytes
|
||||
<*> pure (AssociatedFile (if null filename then Nothing else Just filename))
|
||||
<*> pure (AssociatedFile (if null filename then Nothing else Just (toRawFilePath filename)))
|
||||
<*> pure False
|
||||
where
|
||||
#ifdef mingw32_HOST_OS
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Logs.Transitions where
|
||||
|
||||
import Annex.Common
|
||||
|
@ -26,7 +28,7 @@ import qualified Data.ByteString.Lazy as L
|
|||
import qualified Data.Attoparsec.ByteString.Lazy as A
|
||||
import qualified Data.Attoparsec.ByteString.Char8 as A8
|
||||
|
||||
transitionsLog :: FilePath
|
||||
transitionsLog :: RawFilePath
|
||||
transitionsLog = "transitions.log"
|
||||
|
||||
data Transition
|
||||
|
@ -94,6 +96,6 @@ knownTransitionList = nub . rights . map transition . S.elems
|
|||
|
||||
{- Typically ran with Annex.Branch.change, but we can't import Annex.Branch
|
||||
- here since it depends on this module. -}
|
||||
recordTransitions :: (FilePath -> (L.ByteString -> Builder) -> Annex ()) -> Transitions -> Annex ()
|
||||
recordTransitions :: (RawFilePath -> (L.ByteString -> Builder) -> Annex ()) -> Transitions -> Annex ()
|
||||
recordTransitions changer t = changer transitionsLog $
|
||||
buildTransitions . S.union t . parseTransitionsStrictly "local"
|
||||
|
|
|
@ -93,7 +93,7 @@ knownUrls = do
|
|||
Annex.Branch.update
|
||||
Annex.Branch.commit =<< Annex.Branch.commitMessage
|
||||
Annex.Branch.withIndex $ do
|
||||
top <- fromRepo Git.repoPath
|
||||
top <- toRawFilePath <$> fromRepo Git.repoPath
|
||||
(l, cleanup) <- inRepo $ Git.LsFiles.stagedDetails [top]
|
||||
r <- mapM getkeyurls l
|
||||
void $ liftIO cleanup
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue