Merge branch 'master' into watch
This commit is contained in:
commit
27cfeca4ea
5 changed files with 113 additions and 48 deletions
|
@ -33,6 +33,7 @@ import qualified Git.Command
|
||||||
import qualified Git.Ref
|
import qualified Git.Ref
|
||||||
import qualified Git.Branch
|
import qualified Git.Branch
|
||||||
import qualified Git.UnionMerge
|
import qualified Git.UnionMerge
|
||||||
|
import qualified Git.UpdateIndex
|
||||||
import Git.HashObject
|
import Git.HashObject
|
||||||
import qualified Git.Index
|
import qualified Git.Index
|
||||||
import Annex.CatFile
|
import Annex.CatFile
|
||||||
|
@ -258,8 +259,8 @@ files = withIndexUpdate $ do
|
||||||
- in changes from other branches.
|
- in changes from other branches.
|
||||||
-}
|
-}
|
||||||
genIndex :: Git.Repo -> IO ()
|
genIndex :: Git.Repo -> IO ()
|
||||||
genIndex g = Git.UnionMerge.stream_update_index g
|
genIndex g = Git.UpdateIndex.stream_update_index g
|
||||||
[Git.UnionMerge.ls_tree fullname g]
|
[Git.UpdateIndex.ls_tree fullname g]
|
||||||
|
|
||||||
{- Merges the specified refs into the index.
|
{- Merges the specified refs into the index.
|
||||||
- Any changes staged in the index will be preserved. -}
|
- Any changes staged in the index will be preserved. -}
|
||||||
|
@ -335,13 +336,13 @@ stageJournal = do
|
||||||
g <- gitRepo
|
g <- gitRepo
|
||||||
withIndex $ liftIO $ do
|
withIndex $ liftIO $ do
|
||||||
h <- hashObjectStart g
|
h <- hashObjectStart g
|
||||||
Git.UnionMerge.stream_update_index g
|
Git.UpdateIndex.stream_update_index g
|
||||||
[genstream (gitAnnexJournalDir g) h fs]
|
[genstream (gitAnnexJournalDir g) h fs]
|
||||||
hashObjectStop h
|
hashObjectStop h
|
||||||
where
|
where
|
||||||
genstream dir h fs streamer = forM_ fs $ \file -> do
|
genstream dir h fs streamer = forM_ fs $ \file -> do
|
||||||
let path = dir </> file
|
let path = dir </> file
|
||||||
sha <- hashFile h path
|
sha <- hashFile h path
|
||||||
_ <- streamer $ Git.UnionMerge.update_index_line
|
_ <- streamer $ Git.UpdateIndex.update_index_line
|
||||||
sha (fileJournal file)
|
sha (fileJournal file)
|
||||||
removeFile path
|
removeFile path
|
||||||
|
|
|
@ -7,11 +7,7 @@
|
||||||
|
|
||||||
module Git.UnionMerge (
|
module Git.UnionMerge (
|
||||||
merge,
|
merge,
|
||||||
merge_index,
|
merge_index
|
||||||
update_index,
|
|
||||||
stream_update_index,
|
|
||||||
update_index_line,
|
|
||||||
ls_tree
|
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import System.Cmd.Utils
|
import System.Cmd.Utils
|
||||||
|
@ -24,8 +20,7 @@ import Git
|
||||||
import Git.Sha
|
import Git.Sha
|
||||||
import Git.CatFile
|
import Git.CatFile
|
||||||
import Git.Command
|
import Git.Command
|
||||||
|
import Git.UpdateIndex
|
||||||
type Streamer = (String -> IO ()) -> IO ()
|
|
||||||
|
|
||||||
{- Performs a union merge between two branches, staging it in the index.
|
{- Performs a union merge between two branches, staging it in the index.
|
||||||
- Any previously staged changes in the index will be lost.
|
- Any previously staged changes in the index will be lost.
|
||||||
|
@ -47,38 +42,6 @@ merge_index :: CatFileHandle -> Repo -> [Ref] -> IO ()
|
||||||
merge_index h repo bs =
|
merge_index h repo bs =
|
||||||
stream_update_index repo $ map (\b -> merge_tree_index b h repo) bs
|
stream_update_index repo $ map (\b -> merge_tree_index b h repo) bs
|
||||||
|
|
||||||
{- Feeds content into update-index. Later items in the list can override
|
|
||||||
- earlier ones, so the list can be generated from any combination of
|
|
||||||
- ls_tree, merge_trees, and merge_tree_index. -}
|
|
||||||
update_index :: Repo -> [String] -> IO ()
|
|
||||||
update_index repo ls = stream_update_index repo [(`mapM_` ls)]
|
|
||||||
|
|
||||||
{- Streams content into update-index. -}
|
|
||||||
stream_update_index :: Repo -> [Streamer] -> IO ()
|
|
||||||
stream_update_index repo as = do
|
|
||||||
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo)
|
|
||||||
fileEncoding h
|
|
||||||
forM_ as (stream h)
|
|
||||||
hClose h
|
|
||||||
forceSuccess p
|
|
||||||
where
|
|
||||||
params = map Param ["update-index", "-z", "--index-info"]
|
|
||||||
stream h a = a (streamer h)
|
|
||||||
streamer h s = do
|
|
||||||
hPutStr h s
|
|
||||||
hPutStr h "\0"
|
|
||||||
|
|
||||||
{- Generates a line suitable to be fed into update-index, to add
|
|
||||||
- a given file with a given sha. -}
|
|
||||||
update_index_line :: Sha -> FilePath -> String
|
|
||||||
update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file
|
|
||||||
|
|
||||||
{- Gets the current tree for a ref. -}
|
|
||||||
ls_tree :: Ref -> Repo -> Streamer
|
|
||||||
ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo
|
|
||||||
where
|
|
||||||
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
|
|
||||||
|
|
||||||
{- For merging two trees. -}
|
{- For merging two trees. -}
|
||||||
merge_trees :: Ref -> Ref -> CatFileHandle -> Repo -> Streamer
|
merge_trees :: Ref -> Ref -> CatFileHandle -> Repo -> Streamer
|
||||||
merge_trees (Ref x) (Ref y) h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]
|
merge_trees (Ref x) (Ref y) h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]
|
||||||
|
|
49
Git/UpdateIndex.hs
Normal file
49
Git/UpdateIndex.hs
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
{- git-update-index library
|
||||||
|
-
|
||||||
|
- Copyright 2011, 2012 Joey Hess <joey@kitenet.net>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Git.UpdateIndex (
|
||||||
|
Streamer,
|
||||||
|
stream_update_index,
|
||||||
|
update_index_line,
|
||||||
|
ls_tree
|
||||||
|
) where
|
||||||
|
|
||||||
|
import System.Cmd.Utils
|
||||||
|
|
||||||
|
import Common
|
||||||
|
import Git
|
||||||
|
import Git.Command
|
||||||
|
|
||||||
|
{- Streamers are passed a callback and should feed it lines in the form
|
||||||
|
- read by update-index, and generated by ls-tree. -}
|
||||||
|
type Streamer = (String -> IO ()) -> IO ()
|
||||||
|
|
||||||
|
{- Streams content into update-index from a list of Streamers. -}
|
||||||
|
stream_update_index :: Repo -> [Streamer] -> IO ()
|
||||||
|
stream_update_index repo as = do
|
||||||
|
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo)
|
||||||
|
fileEncoding h
|
||||||
|
forM_ as (stream h)
|
||||||
|
hClose h
|
||||||
|
forceSuccess p
|
||||||
|
where
|
||||||
|
params = map Param ["update-index", "-z", "--index-info"]
|
||||||
|
stream h a = a (streamer h)
|
||||||
|
streamer h s = do
|
||||||
|
hPutStr h s
|
||||||
|
hPutStr h "\0"
|
||||||
|
|
||||||
|
{- Generates a line suitable to be fed into update-index, to add
|
||||||
|
- a given file with a given sha. -}
|
||||||
|
update_index_line :: Sha -> FilePath -> String
|
||||||
|
update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file
|
||||||
|
|
||||||
|
{- Gets the current tree for a ref. -}
|
||||||
|
ls_tree :: Ref -> Repo -> Streamer
|
||||||
|
ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo
|
||||||
|
where
|
||||||
|
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
|
45
doc/design/assistant/blog/day_2__races.mdwn
Normal file
45
doc/design/assistant/blog/day_2__races.mdwn
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
Last night I got `git annex watch` to also handle deletion of files.
|
||||||
|
This was not as tricky as feared; the key is using `git rm --ignore-unmatch`,
|
||||||
|
which avoids most problimatic situations (such as a just deleted file
|
||||||
|
being added back before git is run).
|
||||||
|
|
||||||
|
Also fixed some races when `git annex watch` is doing its startup scan of
|
||||||
|
the tree, which might be changed as it's being traversed. Now only one
|
||||||
|
thread performs actions at a time, so inotify events are queued up during
|
||||||
|
the scan, and dealt with once it completes. It's worth noting that inotify
|
||||||
|
can only buffer so many events .. Which might have been a problem except
|
||||||
|
for a very nice feature of Haskell's inotify interface: It has a thread
|
||||||
|
that drains the limited inotify buffer and does its own buffering.
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
Right now, `git annex watch` is not as fast as it could be when doing
|
||||||
|
something like adding a lot of files, or deleting a lot of files.
|
||||||
|
For each file, it currently runs a git command that updates the index.
|
||||||
|
I did some work toward coalescing these into one command (which `git annex`
|
||||||
|
already does normally). It's not quite ready to be turned on yet,
|
||||||
|
because of some races involving `git add` that become much worse
|
||||||
|
if it's delayed by event coalescing.
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
And races were the theme of today. Spent most of the day really
|
||||||
|
getting to grips with all the fun races that can occur between
|
||||||
|
modification happening to files, and `git annex watch`. The [[inotify]]
|
||||||
|
page now has a long list of known races, some benign, and several,
|
||||||
|
all involving adding files, that are quite nasty.
|
||||||
|
|
||||||
|
I fixed one of those races this evening. The rest will probably involve
|
||||||
|
moving away from using `git add`, which necessarily examines the file
|
||||||
|
on disk, to directly shoving the symlink into git's index.
|
||||||
|
|
||||||
|
BTW, it turns out that `dvcs-autosync` has grappled with some of these same
|
||||||
|
races: <http://comments.gmane.org/gmane.comp.version-control.home-dir/665>
|
||||||
|
I hope that `git annex watch` will be in a better place to deal with them,
|
||||||
|
since it's only dealing with git, and with a restricted portion of it
|
||||||
|
relevant to git-annex.
|
||||||
|
|
||||||
|
It's important that `git annex watch` be rock solid. It's the foundation
|
||||||
|
of the git annex assistant. Users should not need to worry about races
|
||||||
|
when using it. Most users won't know what race conditions are. If only I
|
||||||
|
could be so lucky!
|
|
@ -58,12 +58,19 @@ Many races need to be dealt with by this code. Here are some of them.
|
||||||
* File is added and then replaced with another file before the annex add
|
* File is added and then replaced with another file before the annex add
|
||||||
moves its content into the annex.
|
moves its content into the annex.
|
||||||
|
|
||||||
**Currently unfixed**; The new content will be moved to the annex under the
|
Fixed this problem; Now it hard links the file to a temp directory and
|
||||||
old checksum, and fsck will later catch this inconsistency.
|
operates on the hard link, which is also made unwritable.
|
||||||
|
|
||||||
Possible fix: Move content someplace before doing checksumming. Perhaps
|
* A process has a file open for write, another one closes it, and so it's
|
||||||
using a hard link and removing the write bit to prevent modification
|
added. Then the first process modifies it.
|
||||||
while checksumming.
|
|
||||||
|
**Currently unfixed**; This changes content in the annex, and fsck will
|
||||||
|
later catch the inconsistency.
|
||||||
|
|
||||||
|
Possible fixes: Somehow track or detect if a file is open for write
|
||||||
|
by any processes. Or, when possible, making a copy on write copy
|
||||||
|
before adding the file would avoid this. Or, as a last resort, make
|
||||||
|
an expensive copy of the file and add that.
|
||||||
|
|
||||||
* File is added and then replaced with another file before the annex add
|
* File is added and then replaced with another file before the annex add
|
||||||
makes its symlink.
|
makes its symlink.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue