Sped back up fsck, copy --from etc
All commands that often have to read a lot of information from the git-annex branch should now be nearly as fast as before the branch was introduced. Before fsck was taking approximatly 3 hours, now it's running in 8 minutes. The code is very nasty. It should be rewritten to read the header line from git cat-file, and then read the specified number of bytes of content.
This commit is contained in:
parent
8725fde5c6
commit
e1c18ddec4
4 changed files with 54 additions and 36 deletions
74
Branch.hs
74
Branch.hs
|
@ -26,9 +26,6 @@ import System.Cmd.Utils
|
|||
import Data.Maybe
|
||||
import Data.List
|
||||
import System.IO
|
||||
import System.Posix.IO
|
||||
import System.Posix.Process
|
||||
import System.Log.Logger
|
||||
|
||||
import Types.BranchState
|
||||
import qualified GitRepo as Git
|
||||
|
@ -142,7 +139,7 @@ commit message = whenM stageJournalFiles $ do
|
|||
- data is read from it. Runs only once per git-annex run. -}
|
||||
update :: Annex ()
|
||||
update = do
|
||||
state <- Annex.getState Annex.branchstate
|
||||
state <- getState
|
||||
unless (branchUpdated state) $ withIndex $ do
|
||||
{- Since branches get merged into the index, it's important to
|
||||
- first stage the journal into the index. Otherwise, any
|
||||
|
@ -226,39 +223,48 @@ get file = do
|
|||
setCache file content
|
||||
return content
|
||||
Nothing -> withIndexUpdate $ do
|
||||
g <- Annex.gitRepo
|
||||
content <- liftIO $ catch (cat g) (const $ return "")
|
||||
content <- catFile file
|
||||
setCache file content
|
||||
return content
|
||||
|
||||
{- Uses git cat-file in batch mode to read the content of a file.
|
||||
-
|
||||
- Only one process is run, and it persists and is used for all accesses. -}
|
||||
catFile :: FilePath -> Annex String
|
||||
catFile file = do
|
||||
state <- getState
|
||||
maybe (startup state) ask (catFileHandles state)
|
||||
where
|
||||
cat g = cmdOutput "git" $ toCommand $ Git.gitCommandLine g
|
||||
[Param "cat-file", Param "blob", Param $ ':':file]
|
||||
|
||||
{- Runs a command, returning its output, ignoring nonzero exit
|
||||
- status, and discarding stderr. -}
|
||||
cmdOutput :: FilePath -> [String] -> IO String
|
||||
cmdOutput cmd params = do
|
||||
pipepair <- createPipe
|
||||
let callfunc _ = do
|
||||
closeFd (snd pipepair)
|
||||
h <- fdToHandle (fst pipepair)
|
||||
x <- hGetContentsStrict h
|
||||
hClose h
|
||||
return $! x
|
||||
let child = do
|
||||
closeFd (fst pipepair)
|
||||
-- disable stderr output by this child,
|
||||
-- and since the logger uses it, also disable it
|
||||
liftIO $ updateGlobalLogger rootLoggerName $ setLevel EMERGENCY
|
||||
closeFd stdError
|
||||
|
||||
debugM "Utility.executeFile" $ cmd ++ " " ++ show params
|
||||
|
||||
pid <- pOpen3Raw Nothing (Just (snd pipepair)) Nothing cmd params child
|
||||
retval <- callfunc $! pid
|
||||
let rv = seq retval retval
|
||||
_ <- getProcessStatus True False pid
|
||||
return rv
|
||||
startup state = do
|
||||
g <- Annex.gitRepo
|
||||
let cmd = Git.gitCommandLine g
|
||||
[Param "cat-file", Param "--batch"]
|
||||
let gitcmd = join " " $ "git" : toCommand cmd
|
||||
(_, from, to) <- liftIO $ hPipeBoth "sh"
|
||||
-- want stderr on stdin for sentinal, and
|
||||
-- to ignore other error messages
|
||||
["-c", gitcmd ++ " 2>&1"]
|
||||
setState state { catFileHandles = Just (from, to) }
|
||||
ask (from, to)
|
||||
ask (from, to) = do
|
||||
_ <- liftIO $ do
|
||||
hPutStr to $
|
||||
fullname ++ ":" ++ file ++ "\n" ++
|
||||
sentinal ++ "\n"
|
||||
hFlush to
|
||||
return . unlines =<< readContent from []
|
||||
readContent from ls = do
|
||||
l <- liftIO $ hGetLine from
|
||||
if l == sentinal_line
|
||||
-- first line is blob info,
|
||||
-- or maybe an error message
|
||||
then return $ drop 1 $ reverse ls
|
||||
else readContent from (l:ls)
|
||||
-- To find the end of a catted file, ask for a sentinal
|
||||
-- value that is always missing, and look for the error
|
||||
-- message. Utterly nasty, probably will break one day.
|
||||
sentinal = ":"
|
||||
sentinal_line = sentinal ++ " missing"
|
||||
|
||||
{- Lists all files on the branch. There may be duplicates in the list. -}
|
||||
files :: Annex [FilePath]
|
||||
|
|
|
@ -7,11 +7,18 @@
|
|||
|
||||
module Types.BranchState where
|
||||
|
||||
import System.IO
|
||||
|
||||
data BranchState = BranchState {
|
||||
branchUpdated :: Bool,
|
||||
branchUpdated :: Bool, -- has the branch been updated this run?
|
||||
|
||||
-- (from, to) handles used to talk to a git-cat-file process
|
||||
catFileHandles :: Maybe (Handle, Handle),
|
||||
|
||||
-- the content of one file is cached
|
||||
cachedFile :: Maybe FilePath,
|
||||
cachedContent :: String
|
||||
}
|
||||
|
||||
startBranchState :: BranchState
|
||||
startBranchState = BranchState False Nothing ""
|
||||
startBranchState = BranchState False Nothing Nothing ""
|
||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -3,6 +3,9 @@ git-annex (3.20110625) UNRELEASED; urgency=low
|
|||
* Always ensure git-annex branch exists.
|
||||
* Modify location log parser to allow future expansion.
|
||||
* --force will cause add, etc, to operate on ignored files.
|
||||
* Sped back up fsck, copy --from, and other commands that often
|
||||
have to read a lot of information from the git-annex branch. Should
|
||||
now be nearly as fast as before the branch was introduced.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Sun, 26 Jun 2011 21:01:06 -0400
|
||||
|
||||
|
|
|
@ -36,3 +36,5 @@ commands like whereis and add. --[[Joey]]
|
|||
> Hmm, except that's actually an error message sent to stderr. Unless
|
||||
> stderr is connected to stdout, it might be better to look for a known,
|
||||
> empty object. Could just add a git-annex:empty file to that end.
|
||||
|
||||
[[done]] --[[Joey]]
|
||||
|
|
Loading…
Reference in a new issue