Merge branch 'master' into watch

This commit is contained in:
Joey Hess 2012-06-06 02:16:21 -04:00
commit 27cfeca4ea
5 changed files with 113 additions and 48 deletions

View file

@ -33,6 +33,7 @@ import qualified Git.Command
import qualified Git.Ref import qualified Git.Ref
import qualified Git.Branch import qualified Git.Branch
import qualified Git.UnionMerge import qualified Git.UnionMerge
import qualified Git.UpdateIndex
import Git.HashObject import Git.HashObject
import qualified Git.Index import qualified Git.Index
import Annex.CatFile import Annex.CatFile
@ -258,8 +259,8 @@ files = withIndexUpdate $ do
- in changes from other branches. - in changes from other branches.
-} -}
genIndex :: Git.Repo -> IO () genIndex :: Git.Repo -> IO ()
genIndex g = Git.UnionMerge.stream_update_index g genIndex g = Git.UpdateIndex.stream_update_index g
[Git.UnionMerge.ls_tree fullname g] [Git.UpdateIndex.ls_tree fullname g]
{- Merges the specified refs into the index. {- Merges the specified refs into the index.
- Any changes staged in the index will be preserved. -} - Any changes staged in the index will be preserved. -}
@ -335,13 +336,13 @@ stageJournal = do
g <- gitRepo g <- gitRepo
withIndex $ liftIO $ do withIndex $ liftIO $ do
h <- hashObjectStart g h <- hashObjectStart g
Git.UnionMerge.stream_update_index g Git.UpdateIndex.stream_update_index g
[genstream (gitAnnexJournalDir g) h fs] [genstream (gitAnnexJournalDir g) h fs]
hashObjectStop h hashObjectStop h
where where
genstream dir h fs streamer = forM_ fs $ \file -> do genstream dir h fs streamer = forM_ fs $ \file -> do
let path = dir </> file let path = dir </> file
sha <- hashFile h path sha <- hashFile h path
_ <- streamer $ Git.UnionMerge.update_index_line _ <- streamer $ Git.UpdateIndex.update_index_line
sha (fileJournal file) sha (fileJournal file)
removeFile path removeFile path

View file

@ -7,11 +7,7 @@
module Git.UnionMerge ( module Git.UnionMerge (
merge, merge,
merge_index, merge_index
update_index,
stream_update_index,
update_index_line,
ls_tree
) where ) where
import System.Cmd.Utils import System.Cmd.Utils
@ -24,8 +20,7 @@ import Git
import Git.Sha import Git.Sha
import Git.CatFile import Git.CatFile
import Git.Command import Git.Command
import Git.UpdateIndex
type Streamer = (String -> IO ()) -> IO ()
{- Performs a union merge between two branches, staging it in the index. {- Performs a union merge between two branches, staging it in the index.
- Any previously staged changes in the index will be lost. - Any previously staged changes in the index will be lost.
@ -47,38 +42,6 @@ merge_index :: CatFileHandle -> Repo -> [Ref] -> IO ()
merge_index h repo bs = merge_index h repo bs =
stream_update_index repo $ map (\b -> merge_tree_index b h repo) bs stream_update_index repo $ map (\b -> merge_tree_index b h repo) bs
{- Feeds content into update-index. Later items in the list can override
- earlier ones, so the list can be generated from any combination of
- ls_tree, merge_trees, and merge_tree_index. -}
update_index :: Repo -> [String] -> IO ()
update_index repo ls = stream_update_index repo [(`mapM_` ls)]
{- Streams content into update-index. -}
stream_update_index :: Repo -> [Streamer] -> IO ()
stream_update_index repo as = do
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo)
fileEncoding h
forM_ as (stream h)
hClose h
forceSuccess p
where
params = map Param ["update-index", "-z", "--index-info"]
stream h a = a (streamer h)
streamer h s = do
hPutStr h s
hPutStr h "\0"
{- Generates a line suitable to be fed into update-index, to add
- a given file with a given sha. -}
update_index_line :: Sha -> FilePath -> String
update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file
{- Gets the current tree for a ref. -}
ls_tree :: Ref -> Repo -> Streamer
ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo
where
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
{- For merging two trees. -} {- For merging two trees. -}
merge_trees :: Ref -> Ref -> CatFileHandle -> Repo -> Streamer merge_trees :: Ref -> Ref -> CatFileHandle -> Repo -> Streamer
merge_trees (Ref x) (Ref y) h = calc_merge h $ "diff-tree":diff_opts ++ [x, y] merge_trees (Ref x) (Ref y) h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]

49
Git/UpdateIndex.hs Normal file
View file

@ -0,0 +1,49 @@
{- git-update-index library
-
- Copyright 2011, 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Git.UpdateIndex (
Streamer,
stream_update_index,
update_index_line,
ls_tree
) where
import System.Cmd.Utils
import Common
import Git
import Git.Command
{- Streamers are passed a callback and should feed it lines in the form
- read by update-index, and generated by ls-tree. -}
type Streamer = (String -> IO ()) -> IO ()
{- Streams content into update-index from a list of Streamers. -}
stream_update_index :: Repo -> [Streamer] -> IO ()
stream_update_index repo as = do
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo)
fileEncoding h
forM_ as (stream h)
hClose h
forceSuccess p
where
params = map Param ["update-index", "-z", "--index-info"]
stream h a = a (streamer h)
streamer h s = do
hPutStr h s
hPutStr h "\0"
{- Generates a line suitable to be fed into update-index, to add
- a given file with a given sha. -}
update_index_line :: Sha -> FilePath -> String
update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file
{- Gets the current tree for a ref. -}
ls_tree :: Ref -> Repo -> Streamer
ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo
where
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]

View file

@ -0,0 +1,45 @@
Last night I got `git annex watch` to also handle deletion of files.
This was not as tricky as feared; the key is using `git rm --ignore-unmatch`,
which avoids most problimatic situations (such as a just deleted file
being added back before git is run).
Also fixed some races when `git annex watch` is doing its startup scan of
the tree, which might be changed as it's being traversed. Now only one
thread performs actions at a time, so inotify events are queued up during
the scan, and dealt with once it completes. It's worth noting that inotify
can only buffer so many events .. Which might have been a problem except
for a very nice feature of Haskell's inotify interface: It has a thread
that drains the limited inotify buffer and does its own buffering.
----
Right now, `git annex watch` is not as fast as it could be when doing
something like adding a lot of files, or deleting a lot of files.
For each file, it currently runs a git command that updates the index.
I did some work toward coalescing these into one command (which `git annex`
already does normally). It's not quite ready to be turned on yet,
because of some races involving `git add` that become much worse
if it's delayed by event coalescing.
----
And races were the theme of today. Spent most of the day really
getting to grips with all the fun races that can occur between
modification happening to files, and `git annex watch`. The [[inotify]]
page now has a long list of known races, some benign, and several,
all involving adding files, that are quite nasty.
I fixed one of those races this evening. The rest will probably involve
moving away from using `git add`, which necessarily examines the file
on disk, to directly shoving the symlink into git's index.
BTW, it turns out that `dvcs-autosync` has grappled with some of these same
races: <http://comments.gmane.org/gmane.comp.version-control.home-dir/665>
I hope that `git annex watch` will be in a better place to deal with them,
since it's only dealing with git, and with a restricted portion of it
relevant to git-annex.
It's important that `git annex watch` be rock solid. It's the foundation
of the git annex assistant. Users should not need to worry about races
when using it. Most users won't know what race conditions are. If only I
could be so lucky!

View file

@ -58,12 +58,19 @@ Many races need to be dealt with by this code. Here are some of them.
* File is added and then replaced with another file before the annex add * File is added and then replaced with another file before the annex add
moves its content into the annex. moves its content into the annex.
**Currently unfixed**; The new content will be moved to the annex under the Fixed this problem; Now it hard links the file to a temp directory and
old checksum, and fsck will later catch this inconsistency. operates on the hard link, which is also made unwritable.
Possible fix: Move content someplace before doing checksumming. Perhaps * A process has a file open for write, another one closes it, and so it's
using a hard link and removing the write bit to prevent modification added. Then the first process modifies it.
while checksumming.
**Currently unfixed**; This changes content in the annex, and fsck will
later catch the inconsistency.
Possible fixes: Somehow track or detect if a file is open for write
by any processes. Or, when possible, making a copy on write copy
before adding the file would avoid this. Or, as a last resort, make
an expensive copy of the file and add that.
* File is added and then replaced with another file before the annex add * File is added and then replaced with another file before the annex add
makes its symlink. makes its symlink.