Sped back up fsck, copy --from etc
All commands that often have to read a lot of information from the git-annex branch should now be nearly as fast as before the branch was introduced. Before fsck was taking approximatly 3 hours, now it's running in 8 minutes. The code is very nasty. It should be rewritten to read the header line from git cat-file, and then read the specified number of bytes of content.
This commit is contained in:
parent
8725fde5c6
commit
e1c18ddec4
4 changed files with 54 additions and 36 deletions
74
Branch.hs
74
Branch.hs
|
@ -26,9 +26,6 @@ import System.Cmd.Utils
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
import Data.List
|
import Data.List
|
||||||
import System.IO
|
import System.IO
|
||||||
import System.Posix.IO
|
|
||||||
import System.Posix.Process
|
|
||||||
import System.Log.Logger
|
|
||||||
|
|
||||||
import Types.BranchState
|
import Types.BranchState
|
||||||
import qualified GitRepo as Git
|
import qualified GitRepo as Git
|
||||||
|
@ -142,7 +139,7 @@ commit message = whenM stageJournalFiles $ do
|
||||||
- data is read from it. Runs only once per git-annex run. -}
|
- data is read from it. Runs only once per git-annex run. -}
|
||||||
update :: Annex ()
|
update :: Annex ()
|
||||||
update = do
|
update = do
|
||||||
state <- Annex.getState Annex.branchstate
|
state <- getState
|
||||||
unless (branchUpdated state) $ withIndex $ do
|
unless (branchUpdated state) $ withIndex $ do
|
||||||
{- Since branches get merged into the index, it's important to
|
{- Since branches get merged into the index, it's important to
|
||||||
- first stage the journal into the index. Otherwise, any
|
- first stage the journal into the index. Otherwise, any
|
||||||
|
@ -226,39 +223,48 @@ get file = do
|
||||||
setCache file content
|
setCache file content
|
||||||
return content
|
return content
|
||||||
Nothing -> withIndexUpdate $ do
|
Nothing -> withIndexUpdate $ do
|
||||||
g <- Annex.gitRepo
|
content <- catFile file
|
||||||
content <- liftIO $ catch (cat g) (const $ return "")
|
|
||||||
setCache file content
|
setCache file content
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
{- Uses git cat-file in batch mode to read the content of a file.
|
||||||
|
-
|
||||||
|
- Only one process is run, and it persists and is used for all accesses. -}
|
||||||
|
catFile :: FilePath -> Annex String
|
||||||
|
catFile file = do
|
||||||
|
state <- getState
|
||||||
|
maybe (startup state) ask (catFileHandles state)
|
||||||
where
|
where
|
||||||
cat g = cmdOutput "git" $ toCommand $ Git.gitCommandLine g
|
startup state = do
|
||||||
[Param "cat-file", Param "blob", Param $ ':':file]
|
g <- Annex.gitRepo
|
||||||
|
let cmd = Git.gitCommandLine g
|
||||||
{- Runs a command, returning its output, ignoring nonzero exit
|
[Param "cat-file", Param "--batch"]
|
||||||
- status, and discarding stderr. -}
|
let gitcmd = join " " $ "git" : toCommand cmd
|
||||||
cmdOutput :: FilePath -> [String] -> IO String
|
(_, from, to) <- liftIO $ hPipeBoth "sh"
|
||||||
cmdOutput cmd params = do
|
-- want stderr on stdin for sentinal, and
|
||||||
pipepair <- createPipe
|
-- to ignore other error messages
|
||||||
let callfunc _ = do
|
["-c", gitcmd ++ " 2>&1"]
|
||||||
closeFd (snd pipepair)
|
setState state { catFileHandles = Just (from, to) }
|
||||||
h <- fdToHandle (fst pipepair)
|
ask (from, to)
|
||||||
x <- hGetContentsStrict h
|
ask (from, to) = do
|
||||||
hClose h
|
_ <- liftIO $ do
|
||||||
return $! x
|
hPutStr to $
|
||||||
let child = do
|
fullname ++ ":" ++ file ++ "\n" ++
|
||||||
closeFd (fst pipepair)
|
sentinal ++ "\n"
|
||||||
-- disable stderr output by this child,
|
hFlush to
|
||||||
-- and since the logger uses it, also disable it
|
return . unlines =<< readContent from []
|
||||||
liftIO $ updateGlobalLogger rootLoggerName $ setLevel EMERGENCY
|
readContent from ls = do
|
||||||
closeFd stdError
|
l <- liftIO $ hGetLine from
|
||||||
|
if l == sentinal_line
|
||||||
debugM "Utility.executeFile" $ cmd ++ " " ++ show params
|
-- first line is blob info,
|
||||||
|
-- or maybe an error message
|
||||||
pid <- pOpen3Raw Nothing (Just (snd pipepair)) Nothing cmd params child
|
then return $ drop 1 $ reverse ls
|
||||||
retval <- callfunc $! pid
|
else readContent from (l:ls)
|
||||||
let rv = seq retval retval
|
-- To find the end of a catted file, ask for a sentinal
|
||||||
_ <- getProcessStatus True False pid
|
-- value that is always missing, and look for the error
|
||||||
return rv
|
-- message. Utterly nasty, probably will break one day.
|
||||||
|
sentinal = ":"
|
||||||
|
sentinal_line = sentinal ++ " missing"
|
||||||
|
|
||||||
{- Lists all files on the branch. There may be duplicates in the list. -}
|
{- Lists all files on the branch. There may be duplicates in the list. -}
|
||||||
files :: Annex [FilePath]
|
files :: Annex [FilePath]
|
||||||
|
|
|
@ -7,11 +7,18 @@
|
||||||
|
|
||||||
module Types.BranchState where
|
module Types.BranchState where
|
||||||
|
|
||||||
|
import System.IO
|
||||||
|
|
||||||
data BranchState = BranchState {
|
data BranchState = BranchState {
|
||||||
branchUpdated :: Bool,
|
branchUpdated :: Bool, -- has the branch been updated this run?
|
||||||
|
|
||||||
|
-- (from, to) handles used to talk to a git-cat-file process
|
||||||
|
catFileHandles :: Maybe (Handle, Handle),
|
||||||
|
|
||||||
|
-- the content of one file is cached
|
||||||
cachedFile :: Maybe FilePath,
|
cachedFile :: Maybe FilePath,
|
||||||
cachedContent :: String
|
cachedContent :: String
|
||||||
}
|
}
|
||||||
|
|
||||||
startBranchState :: BranchState
|
startBranchState :: BranchState
|
||||||
startBranchState = BranchState False Nothing ""
|
startBranchState = BranchState False Nothing Nothing ""
|
||||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -3,6 +3,9 @@ git-annex (3.20110625) UNRELEASED; urgency=low
|
||||||
* Always ensure git-annex branch exists.
|
* Always ensure git-annex branch exists.
|
||||||
* Modify location log parser to allow future expansion.
|
* Modify location log parser to allow future expansion.
|
||||||
* --force will cause add, etc, to operate on ignored files.
|
* --force will cause add, etc, to operate on ignored files.
|
||||||
|
* Sped back up fsck, copy --from, and other commands that often
|
||||||
|
have to read a lot of information from the git-annex branch. Should
|
||||||
|
now be nearly as fast as before the branch was introduced.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Sun, 26 Jun 2011 21:01:06 -0400
|
-- Joey Hess <joeyh@debian.org> Sun, 26 Jun 2011 21:01:06 -0400
|
||||||
|
|
||||||
|
|
|
@ -36,3 +36,5 @@ commands like whereis and add. --[[Joey]]
|
||||||
> Hmm, except that's actually an error message sent to stderr. Unless
|
> Hmm, except that's actually an error message sent to stderr. Unless
|
||||||
> stderr is connected to stdout, it might be better to look for a known,
|
> stderr is connected to stdout, it might be better to look for a known,
|
||||||
> empty object. Could just add a git-annex:empty file to that end.
|
> empty object. Could just add a git-annex:empty file to that end.
|
||||||
|
|
||||||
|
[[done]] --[[Joey]]
|
||||||
|
|
Loading…
Reference in a new issue