crazy optimisation

Crazy like a fox..
This commit is contained in:
Joey Hess 2012-06-10 19:58:34 -04:00
parent c1b432ee54
commit ca9ee21bd7
4 changed files with 54 additions and 22 deletions

View file

@ -8,6 +8,7 @@
module Annex.CatFile ( module Annex.CatFile (
catFile, catFile,
catObject, catObject,
catObjectDetails,
catFileHandle catFileHandle
) where ) where
@ -17,6 +18,7 @@ import Common.Annex
import qualified Git import qualified Git
import qualified Git.CatFile import qualified Git.CatFile
import qualified Annex import qualified Annex
import Git.Types
catFile :: Git.Branch -> FilePath -> Annex L.ByteString catFile :: Git.Branch -> FilePath -> Annex L.ByteString
catFile branch file = do catFile branch file = do
@ -28,6 +30,11 @@ catObject ref = do
h <- catFileHandle h <- catFileHandle
liftIO $ Git.CatFile.catObject h ref liftIO $ Git.CatFile.catObject h ref
catObjectDetails :: Git.Ref -> Annex (Maybe (L.ByteString, Sha))
catObjectDetails ref = do
h <- catFileHandle
liftIO $ Git.CatFile.catObjectDetails h ref
catFileHandle :: Annex Git.CatFile.CatFileHandle catFileHandle :: Annex Git.CatFile.CatFileHandle
catFileHandle = maybe startup return =<< Annex.getState Annex.catfilehandle catFileHandle = maybe startup return =<< Annex.getState Annex.catfilehandle
where where

View file

@ -18,12 +18,17 @@ import qualified Annex.Queue
import qualified Command.Add import qualified Command.Add
import qualified Git.Command import qualified Git.Command
import qualified Git.UpdateIndex import qualified Git.UpdateIndex
import qualified Git.HashObject
import qualified Backend import qualified Backend
import Annex.Content import Annex.Content
import Annex.CatFile
import Git.Types
import Control.Concurrent import Control.Concurrent
import Control.Concurrent.STM import Control.Concurrent.STM
import Data.Time.Clock import Data.Time.Clock
import Data.Bits.Utils
import qualified Data.ByteString.Lazy as L
#if defined linux_HOST_OS #if defined linux_HOST_OS
import Utility.Inotify import Utility.Inotify
@ -127,6 +132,9 @@ madeChange :: FilePath -> String -> Annex (Maybe Change)
madeChange file desc = liftIO $ madeChange file desc = liftIO $
Just <$> (Change <$> getCurrentTime <*> pure file <*> pure desc) Just <$> (Change <$> getCurrentTime <*> pure file <*> pure desc)
noChange :: Annex (Maybe Change)
noChange = return Nothing
{- Adding a file is tricky; the file has to be replaced with a symlink {- Adding a file is tricky; the file has to be replaced with a symlink
- but this is race prone, as the symlink could be changed immediately - but this is race prone, as the symlink could be changed immediately
- after creation. To avoid that race, git add is not used to stage the - after creation. To avoid that race, git add is not used to stage the
@ -139,7 +147,7 @@ onAdd :: Handler
onAdd file = do onAdd file = do
showStart "add" file showStart "add" file
handle =<< Command.Add.ingest file handle =<< Command.Add.ingest file
return Nothing noChange
where where
handle Nothing = showEndFail handle Nothing = showEndFail
handle (Just key) = do handle (Just key) = do
@ -153,22 +161,35 @@ onAdd file = do
onAddSymlink :: Handler onAddSymlink :: Handler
onAddSymlink file = go =<< Backend.lookupFile file onAddSymlink file = go =<< Backend.lookupFile file
where where
go Nothing = do go Nothing = addlink =<< liftIO (readSymbolicLink file)
addlink =<< liftIO (readSymbolicLink file)
madeChange file "add"
go (Just (key, _)) = do go (Just (key, _)) = do
link <- calcGitLink file key link <- calcGitLink file key
ifM ((==) link <$> liftIO (readSymbolicLink file)) ifM ((==) link <$> liftIO (readSymbolicLink file))
( do ( addlink link
addlink link
madeChange file "add"
, do , do
liftIO $ removeFile file liftIO $ removeFile file
liftIO $ createSymbolicLink link file liftIO $ createSymbolicLink link file
addlink link addlink link
madeChange file "fix"
) )
addlink link = stageSymlink file link {- This is often called on symlinks that are already staged
- correctly, especially during the startup scan. A symlink
- may have been deleted and re-added, or added when
- the watcher was not running; so it always stages
- even symlinks that already exist.
-
- So for speed, tries to reuse the existing blob for
- the symlink target. -}
addlink link = do
v <- catObjectDetails $ Ref $ ":" ++ file
case v of
Just (currlink, sha)
| s2w8 link == L.unpack currlink ->
stageSymlink file sha
_ -> do
sha <- inRepo $
Git.HashObject.hashObject BlobObject link
stageSymlink file sha
madeChange file "link"
onDel :: Handler onDel :: Handler
onDel file = do onDel file = do
@ -197,10 +218,10 @@ onErr msg = do
{- Adds a symlink to the index, without ever accessing the actual symlink {- Adds a symlink to the index, without ever accessing the actual symlink
- on disk. -} - on disk. -}
stageSymlink :: FilePath -> String -> Annex () stageSymlink :: FilePath -> Sha -> Annex ()
stageSymlink file linktext = stageSymlink file sha =
Annex.Queue.addUpdateIndex =<< Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.stageSymlink file linktext) inRepo (Git.UpdateIndex.stageSymlink file sha)
{- Signals that a change has been made, that needs to get committed. -} {- Signals that a change has been made, that needs to get committed. -}
signalChange :: ChangeChan -> Change -> Annex () signalChange :: ChangeChan -> Change -> Annex ()

View file

@ -10,7 +10,8 @@ module Git.CatFile (
catFileStart, catFileStart,
catFileStop, catFileStop,
catFile, catFile,
catObject catObject,
catObjectDetails,
) where ) where
import System.IO import System.IO
@ -42,7 +43,11 @@ catFile h branch file = catObject h $ Ref $ show branch ++ ":" ++ file
{- Uses a running git cat-file read the content of an object. {- Uses a running git cat-file read the content of an object.
- Objects that do not exist will have "" returned. -} - Objects that do not exist will have "" returned. -}
catObject :: CatFileHandle -> Ref -> IO L.ByteString catObject :: CatFileHandle -> Ref -> IO L.ByteString
catObject h object = CoProcess.query h send receive catObject h object = maybe L.empty fst <$> catObjectDetails h object
{- Gets both the content of an object, and its Sha. -}
catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha))
catObjectDetails h object = CoProcess.query h send receive
where where
send to = do send to = do
fileEncoding to fileEncoding to
@ -55,16 +60,16 @@ catObject h object = CoProcess.query h send receive
| length sha == shaSize && | length sha == shaSize &&
isJust (readObjectType objtype) -> isJust (readObjectType objtype) ->
case reads size of case reads size of
[(bytes, "")] -> readcontent bytes from [(bytes, "")] -> readcontent bytes from sha
_ -> dne _ -> dne
| otherwise -> dne | otherwise -> dne
_ _
| header == show object ++ " missing" -> dne | header == show object ++ " missing" -> dne
| otherwise -> error $ "unknown response from git cat-file " ++ show (header, object) | otherwise -> error $ "unknown response from git cat-file " ++ show (header, object)
readcontent bytes from = do readcontent bytes from sha = do
content <- S.hGet from bytes content <- S.hGet from bytes
c <- hGetChar from c <- hGetChar from
when (c /= '\n') $ when (c /= '\n') $
error "missing newline from git cat-file" error "missing newline from git cat-file"
return $ L.fromChunks [content] return $ Just (L.fromChunks [content], Ref sha)
dne = return L.empty dne = return Nothing

View file

@ -24,7 +24,6 @@ import Git
import Git.Types import Git.Types
import Git.Command import Git.Command
import Git.FilePath import Git.FilePath
import Git.HashObject
import Git.Sha import Git.Sha
{- Streamers are passed a callback and should feed it lines in the form {- Streamers are passed a callback and should feed it lines in the form
@ -70,10 +69,10 @@ unstageFile file repo = do
return $ pureStreamer $ "0 " ++ show nullSha ++ "\t" ++ getTopFilePath p return $ pureStreamer $ "0 " ++ show nullSha ++ "\t" ++ getTopFilePath p
{- A streamer that adds a symlink to the index. -} {- A streamer that adds a symlink to the index. -}
stageSymlink :: FilePath -> String -> Repo -> IO Streamer stageSymlink :: FilePath -> Sha -> Repo -> IO Streamer
stageSymlink file linktext repo = do stageSymlink file sha repo = do
line <- updateIndexLine line <- updateIndexLine
<$> hashObject BlobObject linktext repo <$> pure sha
<*> pure SymlinkBlob <*> pure SymlinkBlob
<*> toTopFilePath file repo <*> toTopFilePath file repo
return $ pureStreamer line return $ pureStreamer line