Merge branch 'master' into assistant

This commit is contained in:
Joey Hess 2012-06-27 16:14:33 -04:00
commit 8baff14054
14 changed files with 307 additions and 7 deletions

View file

@ -15,15 +15,22 @@ import Command
import qualified Remote import qualified Remote
import qualified Annex import qualified Annex
import qualified Annex.Branch import qualified Annex.Branch
import qualified Annex.Queue
import Annex.Content
import Annex.CatFile
import qualified Git.Command import qualified Git.Command
import qualified Git.LsFiles as LsFiles
import qualified Git.Merge import qualified Git.Merge
import qualified Git.Branch import qualified Git.Branch
import qualified Git.Ref import qualified Git.Ref
import qualified Git import qualified Git
import Git.Types (BlobType(..))
import qualified Types.Remote import qualified Types.Remote
import qualified Remote.Git import qualified Remote.Git
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
import Data.Hash.MD5
def :: [Command] def :: [Command]
def = [command "sync" (paramOptional (paramRepeating paramRemote)) def = [command "sync" (paramOptional (paramRepeating paramRemote))
@ -168,10 +175,104 @@ mergeAnnex = do
Annex.Branch.forceUpdate Annex.Branch.forceUpdate
stop stop
mergeFrom :: Git.Ref -> CommandCleanup mergeFrom :: Git.Ref -> Annex Bool
mergeFrom branch = do mergeFrom branch = do
showOutput showOutput
inRepo $ Git.Merge.mergeNonInteractive branch ok <- inRepo $ Git.Merge.mergeNonInteractive branch
if ok
then return ok
else resolveMerge
{- Resolves a conflicted merge. It's important that any conflicts be
- resolved in a way that itself avoids later merge conflicts, since
- multiple repositories may be doing this concurrently.
-
- Only annexed files are resolved; other files are left for the user to
- handle.
-
- This uses the Keys pointed to by the files to construct new
- filenames. So when both sides modified file foo,
- it will be deleted, and replaced with files foo.KEYA and foo.KEYB.
-
- On the other hand, when one side deleted foo, and the other modified it,
- it will be deleted, and the modified version stored as file
- foo.KEYA (or KEYB).
-}
resolveMerge :: Annex Bool
resolveMerge = do
top <- fromRepo Git.repoPath
merged <- all id <$> (mapM resolveMerge' =<< inRepo (LsFiles.unmerged [top]))
when merged $ do
Annex.Queue.flush
void $ inRepo $ Git.Command.runBool "commit"
[Param "-m", Param "git-annex automatic merge conflict fix"]
return merged
resolveMerge' :: LsFiles.Unmerged -> Annex Bool
resolveMerge' u
| issymlink LsFiles.valUs && issymlink LsFiles.valThem =
withKey LsFiles.valUs $ \keyUs ->
withKey LsFiles.valThem $ \keyThem -> go keyUs keyThem
| otherwise = return False
where
go keyUs keyThem
| keyUs == keyThem = do
makelink keyUs
return True
| otherwise = do
liftIO $ nukeFile file
Annex.Queue.addCommand "rm" [Params "--quiet -f --"] [file]
makelink keyUs
makelink keyThem
return True
file = LsFiles.unmergedFile u
issymlink select = any (select (LsFiles.unmergedBlobType u) ==)
[Just SymlinkBlob, Nothing]
makelink (Just key) = do
let dest = mergeFile file key
l <- calcGitLink dest key
liftIO $ do
nukeFile dest
createSymbolicLink l dest
Annex.Queue.addCommand "add" [Param "--force", Param "--"] [dest]
makelink _ = noop
withKey select a = do
let msha = select $ LsFiles.unmergedSha u
case msha of
Nothing -> a Nothing
Just sha -> do
key <- fileKey . takeFileName
. encodeW8 . L.unpack
<$> catObject sha
maybe (return False) (a . Just) key
{- The filename to use when resolving a conflicted merge of a file,
- that points to a key.
-
- Something derived from the key needs to be included in the filename,
- but rather than exposing the whole key to the user, a very weak hash
- is used. There is a very real, although still unlikely, chance of
- conflicts using this hash.
-
- In the event that there is a conflict with the filename generated
- for some other key, that conflict will itself be handled by the
- conflicted merge resolution code. That case is detected, and the full
- key is used in the filename.
-}
mergeFile :: FilePath -> Key -> FilePath
mergeFile file key
| doubleconflict = go $ show key
| otherwise = go $ shortHash $ show key
where
varmarker = ".variant-"
doubleconflict = vermarker `isSuffixOf` (dropExtension file)
go v = takeDirectory file
</> dropExtension (takeFileName file)
++ varmarker ++ v
++ takeExtension file
shortHash :: String -> String
shortHash = take 4 . md5s . encodeFilePath
changed :: Remote -> Git.Ref -> Annex Bool changed :: Remote -> Git.Ref -> Annex Bool
changed remote b = do changed remote b = do

View file

@ -54,6 +54,10 @@ read' repo = go repo
{- Reads git config from a handle and populates a repo with it. -} {- Reads git config from a handle and populates a repo with it. -}
hRead :: Repo -> Handle -> IO Repo hRead :: Repo -> Handle -> IO Repo
hRead repo h = do hRead repo h = do
-- We use the FileSystemEncoding when reading from git-config,
-- because it can contain arbitrary filepaths (and other strings)
-- in any encoding.
fileEncoding h
val <- hGetContentsStrict h val <- hGetContentsStrict h
store val repo store val repo

View file

@ -1,6 +1,6 @@
{- git ls-files interface {- git ls-files interface
- -
- Copyright 2010 Joey Hess <joey@kitenet.net> - Copyright 2010,2012 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -13,11 +13,16 @@ module Git.LsFiles (
changedUnstaged, changedUnstaged,
typeChanged, typeChanged,
typeChangedStaged, typeChangedStaged,
Conflicting(..),
Unmerged(..),
unmerged,
) where ) where
import Common import Common
import Git import Git
import Git.Command import Git.Command
import Git.Types
import Git.Sha
{- Scans for files that are checked into git at the specified locations. -} {- Scans for files that are checked into git at the specified locations. -}
inRepo :: [FilePath] -> Repo -> IO [FilePath] inRepo :: [FilePath] -> Repo -> IO [FilePath]
@ -75,3 +80,76 @@ typeChanged' ps l repo = do
where where
prefix = [Params "diff --name-only --diff-filter=T -z"] prefix = [Params "diff --name-only --diff-filter=T -z"]
suffix = Param "--" : map File l suffix = Param "--" : map File l
{- A item in conflict has two possible values.
- Either can be Nothing, when that side deleted the file. -}
data Conflicting v = Conflicting
{ valUs :: Maybe v
, valThem :: Maybe v
} deriving (Show)
data Unmerged = Unmerged
{ unmergedFile :: FilePath
, unmergedBlobType :: Conflicting BlobType
, unmergedSha :: Conflicting Sha
} deriving (Show)
{- Returns a list of the files in the specified locations that have
- unresolved merge conflicts.
-
- ls-files outputs multiple lines per conflicting file, each with its own
- stage number:
- 1 = old version, can be ignored
- 2 = us
- 3 = them
- If a line is omitted, that side deleted the file.
-}
unmerged :: [FilePath] -> Repo -> IO [Unmerged]
unmerged l repo = reduceUnmerged [] . catMaybes . map parseUnmerged <$> list repo
where
files = map File l
list = pipeNullSplit $ Params "ls-files --unmerged -z --" : files
data InternalUnmerged = InternalUnmerged
{ isus :: Bool
, ifile :: FilePath
, iblobtype :: Maybe BlobType
, isha :: Maybe Sha
} deriving (Show)
parseUnmerged :: String -> Maybe InternalUnmerged
parseUnmerged s
| null file || length ws < 3 = Nothing
| otherwise = do
stage <- readish (ws !! 2) :: Maybe Int
unless (stage == 2 || stage == 3) $
fail undefined -- skip stage 1
blobtype <- readBlobType (ws !! 0)
sha <- extractSha (ws !! 1)
return $ InternalUnmerged (stage == 2) file (Just blobtype) (Just sha)
where
(metadata, file) = separate (== '\t') s
ws = words metadata
reduceUnmerged :: [Unmerged] -> [InternalUnmerged] -> [Unmerged]
reduceUnmerged c [] = c
reduceUnmerged c (i:is) = reduceUnmerged (new:c) rest
where
(rest, sibi) = findsib i is
(blobtypeA, blobtypeB, shaA, shaB)
| isus i = (iblobtype i, iblobtype sibi, isha i, isha sibi)
| otherwise = (iblobtype sibi, iblobtype i, isha sibi, isha i)
new = Unmerged
{ unmergedFile = ifile i
, unmergedBlobType = Conflicting blobtypeA blobtypeB
, unmergedSha = Conflicting shaA shaB
}
findsib templatei [] = ([], deleted templatei)
findsib templatei (l:ls)
| ifile l == ifile templatei = (ls, l)
| otherwise = (l:ls, deleted templatei)
deleted templatei = templatei
{ isus = not (isus templatei)
, iblobtype = Nothing
, isha = Nothing
}

View file

@ -51,6 +51,7 @@ type Tag = Ref
{- Types of objects that can be stored in git. -} {- Types of objects that can be stored in git. -}
data ObjectType = BlobObject | CommitObject | TreeObject data ObjectType = BlobObject | CommitObject | TreeObject
deriving (Eq)
instance Show ObjectType where instance Show ObjectType where
show BlobObject = "blob" show BlobObject = "blob"
@ -65,9 +66,16 @@ readObjectType _ = Nothing
{- Types of blobs. -} {- Types of blobs. -}
data BlobType = FileBlob | ExecutableBlob | SymlinkBlob data BlobType = FileBlob | ExecutableBlob | SymlinkBlob
deriving (Eq)
{- Git uses magic numbers to denote the type of a blob. -} {- Git uses magic numbers to denote the type of a blob. -}
instance Show BlobType where instance Show BlobType where
show FileBlob = "100644" show FileBlob = "100644"
show ExecutableBlob = "100755" show ExecutableBlob = "100755"
show SymlinkBlob = "120000" show SymlinkBlob = "120000"
readBlobType :: String -> Maybe BlobType
readBlobType "100644" = Just FileBlob
readBlobType "100755" = Just ExecutableBlob
readBlobType "120000" = Just SymlinkBlob
readBlobType _ = Nothing

View file

@ -10,8 +10,7 @@ module Git.UnionMerge (
mergeIndex mergeIndex
) where ) where
import qualified Data.Text.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.Text.Lazy.Encoding as L
import qualified Data.Set as S import qualified Data.Set as S
import Common import Common
@ -79,10 +78,14 @@ mergeFile info file h repo = case filter (/= nullSha) [Ref asha, Ref bsha] of
=<< calcMerge . zip shas <$> mapM getcontents shas =<< calcMerge . zip shas <$> mapM getcontents shas
where where
[_colonmode, _bmode, asha, bsha, _status] = words info [_colonmode, _bmode, asha, bsha, _status] = words info
getcontents s = map L.unpack . L.lines .
L.decodeUtf8 <$> catObject h s
use sha = return $ Just $ use sha = return $ Just $
updateIndexLine sha FileBlob $ asTopFilePath file updateIndexLine sha FileBlob $ asTopFilePath file
-- We don't know how the file is encoded, but need to
-- split it into lines to union merge. Using the
-- FileSystemEncoding for this is a hack, but ensures there
-- are no decoding errors. Note that this works because
-- streamUpdateIndex sets fileEncoding on its write handle.
getcontents s = lines . encodeW8 . L.unpack <$> catObject h s
{- Calculates a union merge between a list of refs, with contents. {- Calculates a union merge between a list of refs, with contents.
- -

5
debian/changelog vendored
View file

@ -5,6 +5,11 @@ git-annex (3.20120625) UNRELEASED; urgency=low
which disables the watch command. which disables the watch command.
* Avoid ugly failure mode when moving content from a local repository * Avoid ugly failure mode when moving content from a local repository
that is not available. that is not available.
* Got rid of the last place that did utf8 decoding.
* Accept arbitrarily encoded repository filepaths etc when reading
git config output. This fixes support for remotes with unusual characters
in their names.
* sync: Automatically resolves merge conflicts.
-- Joey Hess <joeyh@debian.org> Mon, 25 Jun 2012 11:38:12 -0400 -- Joey Hess <joeyh@debian.org> Mon, 25 Jun 2012 11:38:12 -0400

View file

@ -0,0 +1,34 @@
What steps will reproduce the problem?
alip@hayalet /tmp/aaa (git)-[master] % git annex init aaa
init aaa ok
(Recording state in git...)
alip@hayalet /tmp/aaa (git)-[master] % git remote add çüş /tmp/çüş
alip@hayalet /tmp/aaa (git)-[master] % git annex sync --debug
git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","symbolic-ref","HEAD"]
git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","show-ref","git-annex"]
git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","show-ref","--hash","refs/heads/git-annex"]
git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","log","refs/heads/git-annex..bc45cd9c2cb7c9b0c7a12a4c0210fe6a262abac9","--oneline","-n1"]
git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","log","refs/heads/git-annex..9220bfedd1e13b2d791c918e2d59901af353825f","--oneline","-n1"]
(merging origin/git-annex into git-annex...)
git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","cat-file","--batch"]
git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","update-index","-z","--index-info"]
git ["--git-dir=/tmp/aaa/.git","--work-tree=/tmp/aaa","diff-index","--raw","-z","-r","--no-renames","-l0","--cached","9220bfedd1e13b2d791c918e2d59901af353825f"]
git-annex: Cannot decode byte '\xfc': Data.Text.Encoding.decodeUtf8: Invalid UTF-8 stream
1 alip@hayalet /tmp/aaa (git)-[master] %
What is the expected output? What do you see instead?
Syncing a repository under a path with utf-8 characters in its name fails.
What version of git-annex are you using? On what operating system?
git-annex version: 3.20120624
On Exherbo, linux-3.4
Please provide any additional information below.
'\xfc' is valid UTF-8: 'LATIN SMALL LETTER U WITH DIAERESIS'
> closing as non-reproducible and presumably fixed. [[done]] --[[Joey]]

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="4.153.2.25"
subject="comment 1"
date="2012-06-27T02:48:31Z"
content="""
I don't think this has to do with the path name of the repository containing utf-8 at all.
Your recipe for reproducing this depends on some pre-existing repository that I don't know how to set up to reproduce this bug. All I can guess is that, based on the \"decodeUtf8\" in the error message, it's coming from the one part of the code that still uses that, the union merger.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="4.153.2.25"
subject="comment 2"
date="2012-06-27T03:08:13Z"
content="""
Since I can't reproduce it I am not sure, but it may be fixed by the commits I've just made.
"""]]

View file

@ -0,0 +1,13 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawkWzAq6TusMi9zI3FLkDOETRIAUTtmGZVg"
nickname="Ali"
subject="comment 3"
date="2012-06-27T12:56:37Z"
content="""
Yes, the problem is fixed.
The repository was a normal git repository with path /tmp/çüş (git init)
and with annex description \"çüş\" (git annex init çüş)
afaict, i can't reproduce the problem anymore either :-)
"""]]

View file

@ -0,0 +1,10 @@
I'm not sure if this _feature_ exists already wrapped or provided as a recipe for users or not yet. But it would be nice to be able to do a
git annex du [PATH]
Such that the output that git annex would return is the total disk used locally in the PATH and the theoretical disk used by the PATH if it was fully populated locally. e.g.
$ git annex du FSL0001_ANALYSIS
$ Local: 1000kb, Annex: 2000kb
or something along the lines of that?

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="4.153.2.25"
subject="comment 1"
date="2012-06-27T12:36:08Z"
content="""
Use `du -L` for the disk space used locally. The other number is not currently available, but it would be nice to have. I also sometimes would like to have data on which backends are used how much, so making this `git annex status --subdir` is tempting. Unfortunatly, it's current implementation scans `.git/annex/objects`
and not the disk tree (better for accurate numbers due to copies), so it would not be a very easy thing to add. Not massively hard, but not something I can pound out before I start work today..
"""]]

View file

@ -135,6 +135,11 @@ subdirectories).
commands to do each of those steps by hand, or if you don't want to commands to do each of those steps by hand, or if you don't want to
worry about the details, you can use sync. worry about the details, you can use sync.
Merge conflicts are automatically resolved by sync. When two conflicting
versions of a file have been committed, both will be added to the tree,
under different filenames. For example, file "foo" would be replaced
with "foo.somekey" and "foo.otherkey".
Note that syncing with a remote will not update the remote's working Note that syncing with a remote will not update the remote's working
tree with changes made to the local repository. However, those changes tree with changes made to the local repository. However, those changes
are pushed to the remote, so can be merged into its working tree are pushed to the remote, so can be merged into its working tree

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawkwR9uOA38yi5kEUvcEWNtRiZwpxXskayE"
nickname="Agustin"
subject="comment 11"
date="2012-06-27T08:54:52Z"
content="""
Hi @joey! Perfect!... I'll do that then!
Thanks for your time man!
"""]]