Merge branch 'master' into watch
This commit is contained in:
commit
27cfeca4ea
5 changed files with 113 additions and 48 deletions
|
@ -33,6 +33,7 @@ import qualified Git.Command
|
|||
import qualified Git.Ref
|
||||
import qualified Git.Branch
|
||||
import qualified Git.UnionMerge
|
||||
import qualified Git.UpdateIndex
|
||||
import Git.HashObject
|
||||
import qualified Git.Index
|
||||
import Annex.CatFile
|
||||
|
@ -258,8 +259,8 @@ files = withIndexUpdate $ do
|
|||
- in changes from other branches.
|
||||
-}
|
||||
genIndex :: Git.Repo -> IO ()
|
||||
genIndex g = Git.UnionMerge.stream_update_index g
|
||||
[Git.UnionMerge.ls_tree fullname g]
|
||||
genIndex g = Git.UpdateIndex.stream_update_index g
|
||||
[Git.UpdateIndex.ls_tree fullname g]
|
||||
|
||||
{- Merges the specified refs into the index.
|
||||
- Any changes staged in the index will be preserved. -}
|
||||
|
@ -335,13 +336,13 @@ stageJournal = do
|
|||
g <- gitRepo
|
||||
withIndex $ liftIO $ do
|
||||
h <- hashObjectStart g
|
||||
Git.UnionMerge.stream_update_index g
|
||||
Git.UpdateIndex.stream_update_index g
|
||||
[genstream (gitAnnexJournalDir g) h fs]
|
||||
hashObjectStop h
|
||||
where
|
||||
genstream dir h fs streamer = forM_ fs $ \file -> do
|
||||
let path = dir </> file
|
||||
sha <- hashFile h path
|
||||
_ <- streamer $ Git.UnionMerge.update_index_line
|
||||
_ <- streamer $ Git.UpdateIndex.update_index_line
|
||||
sha (fileJournal file)
|
||||
removeFile path
|
||||
|
|
|
@ -7,11 +7,7 @@
|
|||
|
||||
module Git.UnionMerge (
|
||||
merge,
|
||||
merge_index,
|
||||
update_index,
|
||||
stream_update_index,
|
||||
update_index_line,
|
||||
ls_tree
|
||||
merge_index
|
||||
) where
|
||||
|
||||
import System.Cmd.Utils
|
||||
|
@ -24,8 +20,7 @@ import Git
|
|||
import Git.Sha
|
||||
import Git.CatFile
|
||||
import Git.Command
|
||||
|
||||
type Streamer = (String -> IO ()) -> IO ()
|
||||
import Git.UpdateIndex
|
||||
|
||||
{- Performs a union merge between two branches, staging it in the index.
|
||||
- Any previously staged changes in the index will be lost.
|
||||
|
@ -47,38 +42,6 @@ merge_index :: CatFileHandle -> Repo -> [Ref] -> IO ()
|
|||
merge_index h repo bs =
|
||||
stream_update_index repo $ map (\b -> merge_tree_index b h repo) bs
|
||||
|
||||
{- Feeds content into update-index. Later items in the list can override
|
||||
- earlier ones, so the list can be generated from any combination of
|
||||
- ls_tree, merge_trees, and merge_tree_index. -}
|
||||
update_index :: Repo -> [String] -> IO ()
|
||||
update_index repo ls = stream_update_index repo [(`mapM_` ls)]
|
||||
|
||||
{- Streams content into update-index. -}
|
||||
stream_update_index :: Repo -> [Streamer] -> IO ()
|
||||
stream_update_index repo as = do
|
||||
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo)
|
||||
fileEncoding h
|
||||
forM_ as (stream h)
|
||||
hClose h
|
||||
forceSuccess p
|
||||
where
|
||||
params = map Param ["update-index", "-z", "--index-info"]
|
||||
stream h a = a (streamer h)
|
||||
streamer h s = do
|
||||
hPutStr h s
|
||||
hPutStr h "\0"
|
||||
|
||||
{- Generates a line suitable to be fed into update-index, to add
|
||||
- a given file with a given sha. -}
|
||||
update_index_line :: Sha -> FilePath -> String
|
||||
update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file
|
||||
|
||||
{- Gets the current tree for a ref. -}
|
||||
ls_tree :: Ref -> Repo -> Streamer
|
||||
ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo
|
||||
where
|
||||
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
|
||||
|
||||
{- For merging two trees. -}
|
||||
merge_trees :: Ref -> Ref -> CatFileHandle -> Repo -> Streamer
|
||||
merge_trees (Ref x) (Ref y) h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]
|
||||
|
|
49
Git/UpdateIndex.hs
Normal file
49
Git/UpdateIndex.hs
Normal file
|
@ -0,0 +1,49 @@
|
|||
{- git-update-index library
|
||||
-
|
||||
- Copyright 2011, 2012 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Git.UpdateIndex (
|
||||
Streamer,
|
||||
stream_update_index,
|
||||
update_index_line,
|
||||
ls_tree
|
||||
) where
|
||||
|
||||
import System.Cmd.Utils
|
||||
|
||||
import Common
|
||||
import Git
|
||||
import Git.Command
|
||||
|
||||
{- Streamers are passed a callback and should feed it lines in the form
|
||||
- read by update-index, and generated by ls-tree. -}
|
||||
type Streamer = (String -> IO ()) -> IO ()
|
||||
|
||||
{- Streams content into update-index from a list of Streamers. -}
|
||||
stream_update_index :: Repo -> [Streamer] -> IO ()
|
||||
stream_update_index repo as = do
|
||||
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo)
|
||||
fileEncoding h
|
||||
forM_ as (stream h)
|
||||
hClose h
|
||||
forceSuccess p
|
||||
where
|
||||
params = map Param ["update-index", "-z", "--index-info"]
|
||||
stream h a = a (streamer h)
|
||||
streamer h s = do
|
||||
hPutStr h s
|
||||
hPutStr h "\0"
|
||||
|
||||
{- Generates a line suitable to be fed into update-index, to add
|
||||
- a given file with a given sha. -}
|
||||
update_index_line :: Sha -> FilePath -> String
|
||||
update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file
|
||||
|
||||
{- Gets the current tree for a ref. -}
|
||||
ls_tree :: Ref -> Repo -> Streamer
|
||||
ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo
|
||||
where
|
||||
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
|
45
doc/design/assistant/blog/day_2__races.mdwn
Normal file
45
doc/design/assistant/blog/day_2__races.mdwn
Normal file
|
@ -0,0 +1,45 @@
|
|||
Last night I got `git annex watch` to also handle deletion of files.
|
||||
This was not as tricky as feared; the key is using `git rm --ignore-unmatch`,
|
||||
which avoids most problimatic situations (such as a just deleted file
|
||||
being added back before git is run).
|
||||
|
||||
Also fixed some races when `git annex watch` is doing its startup scan of
|
||||
the tree, which might be changed as it's being traversed. Now only one
|
||||
thread performs actions at a time, so inotify events are queued up during
|
||||
the scan, and dealt with once it completes. It's worth noting that inotify
|
||||
can only buffer so many events .. Which might have been a problem except
|
||||
for a very nice feature of Haskell's inotify interface: It has a thread
|
||||
that drains the limited inotify buffer and does its own buffering.
|
||||
|
||||
----
|
||||
|
||||
Right now, `git annex watch` is not as fast as it could be when doing
|
||||
something like adding a lot of files, or deleting a lot of files.
|
||||
For each file, it currently runs a git command that updates the index.
|
||||
I did some work toward coalescing these into one command (which `git annex`
|
||||
already does normally). It's not quite ready to be turned on yet,
|
||||
because of some races involving `git add` that become much worse
|
||||
if it's delayed by event coalescing.
|
||||
|
||||
----
|
||||
|
||||
And races were the theme of today. Spent most of the day really
|
||||
getting to grips with all the fun races that can occur between
|
||||
modification happening to files, and `git annex watch`. The [[inotify]]
|
||||
page now has a long list of known races, some benign, and several,
|
||||
all involving adding files, that are quite nasty.
|
||||
|
||||
I fixed one of those races this evening. The rest will probably involve
|
||||
moving away from using `git add`, which necessarily examines the file
|
||||
on disk, to directly shoving the symlink into git's index.
|
||||
|
||||
BTW, it turns out that `dvcs-autosync` has grappled with some of these same
|
||||
races: <http://comments.gmane.org/gmane.comp.version-control.home-dir/665>
|
||||
I hope that `git annex watch` will be in a better place to deal with them,
|
||||
since it's only dealing with git, and with a restricted portion of it
|
||||
relevant to git-annex.
|
||||
|
||||
It's important that `git annex watch` be rock solid. It's the foundation
|
||||
of the git annex assistant. Users should not need to worry about races
|
||||
when using it. Most users won't know what race conditions are. If only I
|
||||
could be so lucky!
|
|
@ -58,12 +58,19 @@ Many races need to be dealt with by this code. Here are some of them.
|
|||
* File is added and then replaced with another file before the annex add
|
||||
moves its content into the annex.
|
||||
|
||||
**Currently unfixed**; The new content will be moved to the annex under the
|
||||
old checksum, and fsck will later catch this inconsistency.
|
||||
Fixed this problem; Now it hard links the file to a temp directory and
|
||||
operates on the hard link, which is also made unwritable.
|
||||
|
||||
Possible fix: Move content someplace before doing checksumming. Perhaps
|
||||
using a hard link and removing the write bit to prevent modification
|
||||
while checksumming.
|
||||
* A process has a file open for write, another one closes it, and so it's
|
||||
added. Then the first process modifies it.
|
||||
|
||||
**Currently unfixed**; This changes content in the annex, and fsck will
|
||||
later catch the inconsistency.
|
||||
|
||||
Possible fixes: Somehow track or detect if a file is open for write
|
||||
by any processes. Or, when possible, making a copy on write copy
|
||||
before adding the file would avoid this. Or, as a last resort, make
|
||||
an expensive copy of the file and add that.
|
||||
|
||||
* File is added and then replaced with another file before the annex add
|
||||
makes its symlink.
|
||||
|
|
Loading…
Add table
Reference in a new issue