Merge branch 'master' into new-monad-control

Conflicts:
	git-annex.cabal
This commit is contained in:
Joey Hess 2011-12-11 16:55:36 -04:00
commit e04852c8af
66 changed files with 997 additions and 246 deletions

View file

@ -37,6 +37,7 @@ import Types.BranchState
import Types.TrustLevel
import Types.UUID
import qualified Utility.Matcher
import qualified Data.Map as M
-- git-annex's monad
newtype Annex a = Annex { runAnnex :: StateT AnnexState IO a }
@ -81,7 +82,7 @@ data AnnexState = AnnexState
, limit :: Either [Utility.Matcher.Token (FilePath -> Annex Bool)] (Utility.Matcher.Matcher (FilePath -> Annex Bool))
, forcetrust :: [(UUID, TrustLevel)]
, trustmap :: Maybe TrustMap
, cipher :: Maybe Cipher
, ciphers :: M.Map EncryptedCipher Cipher
}
newState :: Git.Repo -> AnnexState
@ -104,7 +105,7 @@ newState gitrepo = AnnexState
, limit = Left []
, forcetrust = []
, trustmap = Nothing
, cipher = Nothing
, ciphers = M.empty
}
{- Create and returns an Annex state object for the specified git repo. -}

View file

@ -43,26 +43,51 @@ fullname = Git.Ref $ "refs/heads/" ++ show name
originname :: Git.Ref
originname = Git.Ref $ "origin/" ++ show name
{- A separate index file for the branch. -}
index :: Git.Repo -> FilePath
index g = gitAnnexDir g </> "index"
{- Populates the branch's index file with the current branch contents.
-
- Usually, this is only done when the index doesn't yet exist, and
- the index is used to build up changes to be commited to the branch,
- and merge in changes from other branches.
- This is only done when the index doesn't yet exist, and the index
- is used to build up changes to be commited to the branch, and merge
- in changes from other branches.
-}
genIndex :: Git.Repo -> IO ()
genIndex g = Git.UnionMerge.stream_update_index g
[Git.UnionMerge.ls_tree fullname g]
{- Merges the specified branches into the index.
- Any changes staged in the index will be preserved. -}
mergeIndex :: [Git.Ref] -> Annex ()
mergeIndex branches = do
h <- catFileHandle
inRepo $ \g -> Git.UnionMerge.merge_index h g branches
{- Updates the branch's index to reflect the current contents of the branch.
- Any changes staged in the index will be preserved.
-
- Compares the ref stored in the lock file with the current
- ref of the branch to see if an update is needed.
-}
updateIndex :: Annex ()
updateIndex = do
lock <- fromRepo gitAnnexIndexLock
lockref <- firstRef <$> liftIO (catchDefaultIO (readFileStrict lock) "")
branchref <- getRef fullname
when (lockref /= branchref) $ do
withIndex $ mergeIndex [fullname]
setIndexRef branchref
{- Record that the branch's index has been updated to correspond to a
- given ref of the branch. -}
setIndexRef :: Git.Ref -> Annex ()
setIndexRef ref = do
lock <- fromRepo gitAnnexIndexLock
liftIO $ writeFile lock $ show ref ++ "\n"
{- Runs an action using the branch's index file. -}
withIndex :: Annex a -> Annex a
withIndex = withIndex' False
withIndex' :: Bool -> Annex a -> Annex a
withIndex' bootstrapping a = do
f <- fromRepo index
f <- fromRepo gitAnnexIndex
bracketIO (Git.useIndex f) id $ do
unlessM (liftIO $ doesFileExist f) $ do
unless bootstrapping create
@ -70,6 +95,8 @@ withIndex' bootstrapping a = do
unless bootstrapping $ inRepo genIndex
a
{- Runs an action using the branch's index file, first making sure that
- the branch and index are up-to-date. -}
withIndexUpdate :: Annex a -> Annex a
withIndexUpdate a = update >> withIndex a
@ -99,22 +126,25 @@ getCache file = getState >>= go
{- Creates the branch, if it does not already exist. -}
create :: Annex ()
create = unlessM hasBranch $ do
e <- hasOrigin
if e
then inRepo $ Git.run "branch"
create = unlessM hasBranch $ hasOrigin >>= go >>= setIndexRef
where
go True = do
inRepo $ Git.run "branch"
[Param $ show name, Param $ show originname]
else withIndex' True $
getRef fullname
go False = withIndex' True $
inRepo $ Git.commit "branch created" fullname []
{- Stages the journal, and commits staged changes to the branch. -}
commit :: String -> Annex ()
commit message = whenM journalDirty $ lockJournal $ do
updateIndex
stageJournalFiles
withIndex $ inRepo $ Git.commit message fullname [fullname]
withIndex $
setIndexRef =<< inRepo (Git.commit message fullname [fullname])
{- Ensures that the branch is up-to-date; should be called before data is
- read from it. Runs only once per git-annex run.
{- Ensures that the branch and index are is up-to-date; should be
- called before data is read from it. Runs only once per git-annex run.
-
- Before refs are merged into the index, it's important to first stage the
- journal into the index. Otherwise, any changes in the journal would
@ -130,8 +160,9 @@ commit message = whenM journalDirty $ lockJournal $ do
-}
update :: Annex ()
update = onceonly $ do
-- ensure branch exists
-- ensure branch exists, and index is up-to-date
create
updateIndex
-- check what needs updating before taking the lock
dirty <- journalDirty
c <- filterM (changedBranch name . snd) =<< siblingBranches
@ -141,21 +172,15 @@ update = onceonly $ do
let merge_desc = if null branches
then "update"
else "merging " ++
(unwords $ map (show . Git.refDescribe) branches) ++
unwords (map Git.refDescribe branches) ++
" into " ++ show name
unless (null branches) $ do
showSideAction merge_desc
{- Note: This merges the branches into the index.
- Any unstaged changes in the git-annex branch
- (if it's checked out) will be removed. So,
- documentation advises users not to directly
- modify the branch.
-}
h <- catFileHandle
inRepo $ \g -> Git.UnionMerge.merge_index h g branches
mergeIndex branches
ff <- if dirty then return False else tryFastForwardTo refs
unless ff $ inRepo $
Git.commit merge_desc fullname (nub $ fullname:refs)
unless ff $
setIndexRef =<<
inRepo (Git.commit merge_desc fullname (nub $ fullname:refs))
invalidateCache
where
onceonly a = unlessM (branchUpdated <$> getState) $ do
@ -248,6 +273,18 @@ siblingBranches = do
gen l = (Git.Ref $ head l, Git.Ref $ last l)
uref (a, _) (b, _) = a == b
{- Get the ref of a branch. -}
getRef :: Git.Ref -> Annex Git.Ref
getRef branch = firstRef . L.unpack <$> showref
where
showref = inRepo $ Git.pipeRead [Param "show-ref",
Param "--hash", -- get the hash
Param "--verify", -- only exact match
Param $ show branch]
firstRef :: String-> Git.Ref
firstRef = Git.Ref . takeWhile (/= '\n')
{- Applies a function to modifiy the content of a file.
-
- Note that this does not cause the branch to be merged, it only

View file

@ -43,7 +43,7 @@ import Annex.Exception
{- Checks if a given key's content is currently present. -}
inAnnex :: Key -> Annex Bool
inAnnex = inAnnex' $ doesFileExist
inAnnex = inAnnex' doesFileExist
inAnnex' :: (FilePath -> IO a) -> Key -> Annex a
inAnnex' a key = do
whenM (fromRepo Git.repoIsUrl) $

View file

@ -43,7 +43,7 @@ git_annex_shell r command params
shellcmd = "git-annex-shell"
shellopts = Param command : File dir : params
sshcmd uuid = unwords $
shellcmd : (map shellEscape $ toCommand shellopts) ++
shellcmd : map shellEscape (toCommand shellopts) ++
uuidcheck uuid
uuidcheck NoUUID = []
uuidcheck (UUID u) = ["--uuid", u]

View file

@ -64,7 +64,13 @@ genKey' (b:bs) file = do
r <- (B.getKey b) file
case r of
Nothing -> genKey' bs file
Just k -> return $ Just (k, b)
Just k -> return $ Just (makesane k, b)
where
-- keyNames should not contain newline characters.
makesane k = k { keyName = map fixbadchar (keyName k) }
fixbadchar c
| c == '\n' = '_'
| otherwise = c
{- Looks up the key and backend corresponding to an annexed file,
- by examining what the file symlinks to. -}

View file

@ -90,10 +90,12 @@ keyValueE size file = keyValue size file >>= maybe (return Nothing) addE
, keyBackendName = shaNameE size
}
naiveextension = takeExtension file
extension =
if length naiveextension > 6
then "" -- probably not really an extension
else naiveextension
extension
-- long or newline containing extensions are
-- probably not really an extension
| length naiveextension > 6 ||
'\n' `elem` naiveextension = ""
| otherwise = naiveextension
{- A key's checksum is checked during fsck. -}
checkKeyChecksum :: SHASize -> Key -> Annex Bool

View file

@ -32,7 +32,7 @@ dispatch args cmds options header getgitrepo = do
setupConsole
r <- E.try getgitrepo :: IO (Either E.SomeException Git.Repo)
case r of
Left e -> maybe (throw e) id (cmdnorepo cmd)
Left e -> fromMaybe (throw e) (cmdnorepo cmd)
Right g -> do
state <- Annex.new g
(actions, state') <- Annex.run state $ do

View file

@ -10,10 +10,11 @@ module Command (
noRepo,
next,
stop,
stopUnless,
prepCommand,
doCommand,
whenAnnexed,
notAnnexed,
ifAnnexed,
notBareRepo,
isBareRepo,
autoCopies,
@ -49,6 +50,12 @@ next a = return $ Just a
stop :: Annex (Maybe a)
stop = return Nothing
{- Stops unless a condition is met. -}
stopUnless :: Annex Bool -> Annex (Maybe a) -> Annex (Maybe a)
stopUnless c a = do
ok <- c
if ok then a else stop
{- Prepares to run a command via the check and seek stages, returning a
- list of actions to perform to run the command. -}
prepCommand :: Command -> [String] -> Annex [CommandCleanup]
@ -71,10 +78,10 @@ doCommand = start
{- Modifies an action to only act on files that are already annexed,
- and passes the key and backend on to it. -}
whenAnnexed :: (FilePath -> (Key, Backend Annex) -> Annex (Maybe a)) -> FilePath -> Annex (Maybe a)
whenAnnexed a file = maybe (return Nothing) (a file) =<< Backend.lookupFile file
whenAnnexed a file = ifAnnexed file (a file) (return Nothing)
notAnnexed :: FilePath -> Annex (Maybe a) -> Annex (Maybe a)
notAnnexed file a = maybe a (const $ return Nothing) =<< Backend.lookupFile file
ifAnnexed :: FilePath -> ((Key, Backend Annex) -> Annex a) -> Annex a -> Annex a
ifAnnexed file yes no = maybe no yes =<< Backend.lookupFile file
notBareRepo :: Annex a -> Annex a
notBareRepo a = do

View file

@ -29,13 +29,21 @@ seek = [withFilesNotInGit start, withFilesUnlocked start]
- moving it into the annex directory and setting up the symlink pointing
- to its content. -}
start :: BackendFile -> CommandStart
start p@(_, file) = notBareRepo $ notAnnexed file $ do
start p@(_, file) = notBareRepo $ ifAnnexed file fixup add
where
add = do
s <- liftIO $ getSymbolicLinkStatus file
if isSymbolicLink s || not (isRegularFile s)
then stop
else do
showStart "add" file
next $ perform p
fixup (key, _) = do
-- fixup from an interrupted add; the symlink
-- is present but not yet added to git
showStart "add" file
liftIO $ removeFile file
next $ next $ cleanup file key =<< inAnnex key
perform :: BackendFile -> CommandPerform
perform (backend, file) = Backend.genKey file backend >>= go

View file

@ -45,9 +45,7 @@ download url file = do
let dummykey = Backend.URL.fromUrl url
tmp <- fromRepo $ gitAnnexTmpLocation dummykey
liftIO $ createDirectoryIfMissing True (parentDir tmp)
ok <- liftIO $ Url.download url tmp
if ok
then do
stopUnless (liftIO $ Url.download url tmp) $ do
[(backend, _)] <- Backend.chooseBackends [file]
k <- Backend.genKey tmp backend
case k of
@ -56,7 +54,6 @@ download url file = do
moveAnnex key tmp
setUrlPresent key url
next $ Command.Add.cleanup file key True
else stop
nodownload :: String -> FilePath -> CommandPerform
nodownload url file = do

View file

@ -37,13 +37,9 @@ start numcopies file (key, _) = autoCopies key (>) numcopies $ do
else startRemote file numcopies key remote
startLocal :: FilePath -> Maybe Int -> Key -> CommandStart
startLocal file numcopies key = do
present <- inAnnex key
if present
then do
startLocal file numcopies key = stopUnless (inAnnex key) $ do
showStart "drop" file
next $ performLocal key numcopies
else stop
startRemote :: FilePath -> Maybe Int -> Key -> Remote.Remote Annex -> CommandStart
startRemote file numcopies key remote = do
@ -55,12 +51,9 @@ performLocal key numcopies = lockContent key $ do
(remotes, trusteduuids) <- Remote.keyPossibilitiesTrusted key
untrusteduuids <- trustGet UnTrusted
let tocheck = Remote.remotesWithoutUUID remotes (trusteduuids++untrusteduuids)
success <- canDropKey key numcopies trusteduuids tocheck []
if success
then do
stopUnless (canDropKey key numcopies trusteduuids tocheck []) $ do
whenM (inAnnex key) $ removeAnnex key
next $ cleanupLocal key
else stop
performRemote :: Key -> Maybe Int -> Remote.Remote Annex -> CommandPerform
performRemote key numcopies remote = lockContent key $ do
@ -75,12 +68,9 @@ performRemote key numcopies remote = lockContent key $ do
untrusteduuids <- trustGet UnTrusted
let tocheck = filter (/= remote) $
Remote.remotesWithoutUUID remotes (have++untrusteduuids)
success <- canDropKey key numcopies have tocheck [uuid]
if success
then do
stopUnless (canDropKey key numcopies have tocheck [uuid]) $ do
ok <- Remote.removeKey remote key
next $ cleanupRemote key remote ok
else stop
where
uuid = Remote.uuid remote

View file

@ -21,18 +21,11 @@ seek :: [CommandSeek]
seek = [withKeys start]
start :: Key -> CommandStart
start key = do
present <- inAnnex key
if not present
then stop
else do
checkforced
showStart "dropkey" (show key)
next $ perform key
where
checkforced =
start key = stopUnless (inAnnex key) $ do
unlessM (Annex.getState Annex.force) $
error "dropkey can cause data loss; use --force if you're sure you want to do this"
showStart "dropkey" (show key)
next $ perform key
perform :: Key -> CommandPerform
perform key = lockContent key $ do

View file

@ -73,6 +73,6 @@ readUnusedLog prefix = do
then M.fromList . map parse . lines <$> liftIO (readFile f)
else return M.empty
where
parse line = (num, fromJust $ readKey $ tail rest)
parse line = (num, fromJust $ readKey rest)
where
(num, rest) = break (== ' ') line
(num, rest) = separate (== ' ') line

View file

@ -23,10 +23,7 @@ seek = [withFilesInGit $ whenAnnexed start]
start :: FilePath -> (Key, Backend Annex) -> CommandStart
start file (key, _) = do
link <- calcGitLink file key
l <- liftIO $ readSymbolicLink file
if link == l
then stop
else do
stopUnless ((/=) link <$> liftIO (readSymbolicLink file)) $ do
showStart "fix" file
next $ perform file link

View file

@ -22,32 +22,24 @@ seek :: [CommandSeek]
seek = [withNumCopies $ \n -> whenAnnexed $ start n]
start :: Maybe Int -> FilePath -> (Key, Backend Annex) -> CommandStart
start numcopies file (key, _) = do
inannex <- inAnnex key
if inannex
then stop
else autoCopies key (<) numcopies $ do
start numcopies file (key, _) = stopUnless (not <$> inAnnex key) $
autoCopies key (<) numcopies $ do
from <- Annex.getState Annex.fromremote
case from of
Nothing -> go $ perform key
Just name -> do
-- get --from = copy --from
src <- Remote.byName name
ok <- Command.Move.fromOk src key
if ok
then go $ Command.Move.fromPerform src False key
else stop
stopUnless (Command.Move.fromOk src key) $
go $ Command.Move.fromPerform src False key
where
go a = do
showStart "get" file
next a
perform :: Key -> CommandPerform
perform key = do
ok <- getViaTmp key (getKeyFile key)
if ok
then next $ return True -- no cleanup needed
else stop
perform key = stopUnless (getViaTmp key $ getKeyFile key) $ do
next $ return True -- no cleanup needed
{- Try to find a copy of the file in one of the remotes,
- and copy it to here. -}

View file

@ -203,7 +203,7 @@ tryScan r
"git config --list"
dir = Git.workTree r
cddir
| take 2 dir == "/~" =
| "/~" `isPrefixOf` dir =
let (userhome, reldir) = span (/= '/') (drop 1 dir)
in "cd " ++ userhome ++ " && cd " ++ shellEscape (drop 1 reldir)
| otherwise = "cd " ++ shellEscape dir

View file

@ -58,10 +58,7 @@ perform file oldkey newbackend = do
cleantmp tmpfile
case k of
Nothing -> stop
Just (newkey, _) -> do
ok <- link src newkey
if ok
then do
Just (newkey, _) -> stopUnless (link src newkey) $ do
-- Update symlink to use the new key.
liftIO $ removeFile file
@ -73,7 +70,6 @@ perform file oldkey newbackend = do
mapM_ (setUrlPresent newkey) urls
next $ Command.Add.cleanup file newkey True
else stop
where
cleantmp t = liftIO $ whenM (doesFileExist t) $ removeFile t
link src newkey = getViaTmpUnchecked newkey $ \t -> do

View file

@ -108,17 +108,11 @@ toPerform dest move key = moveLock move key $ do
fromStart :: Remote.Remote Annex -> Bool -> FilePath -> Key -> CommandStart
fromStart src move file key
| move = go
| otherwise = do
ishere <- inAnnex key
if ishere then stop else go
| otherwise = stopUnless (not <$> inAnnex key) go
where
go = do
ok <- fromOk src key
if ok
then do
go = stopUnless (fromOk src key) $ do
showMoveAction move file
next $ fromPerform src move key
else stop
fromOk :: Remote.Remote Annex -> Key -> Annex Bool
fromOk src key = do
u <- getUUID

View file

@ -191,8 +191,7 @@ staleSize label dirspec = do
keys <- lift (Command.Unused.staleKeys dirspec)
if null keys
then nostat
else do
stat label $ json (++ aside "clean up with git-annex unused") $
else stat label $ json (++ aside "clean up with git-annex unused") $
return $ keySizeSum $ S.fromList keys
aside :: String -> String

70
Command/Sync.hs Normal file
View file

@ -0,0 +1,70 @@
{- git-annex command
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Sync where
import Common.Annex
import Command
import qualified Annex.Branch
import qualified Git
import qualified Data.ByteString.Lazy.Char8 as L
def :: [Command]
def = [command "sync" paramPaths seek "synchronize local repository with remote"]
-- syncing involves several operations, any of which can independantly fail
seek :: [CommandSeek]
seek = map withNothing [commit, pull, push]
commit :: CommandStart
commit = do
showStart "commit" ""
next $ next $ do
showOutput
-- Commit will fail when the tree is clean, so ignore failure.
_ <- inRepo $ Git.runBool "commit" [Param "-a", Param "-m", Param "sync"]
return True
pull :: CommandStart
pull = do
remote <- defaultRemote
showStart "pull" remote
next $ next $ do
showOutput
checkRemote remote
inRepo $ Git.runBool "pull" [Param remote]
push :: CommandStart
push = do
remote <- defaultRemote
showStart "push" remote
next $ next $ do
Annex.Branch.update
showOutput
inRepo $ Git.runBool "push" [Param remote, matchingbranches]
where
-- git push may be configured to not push matching
-- branches; this should ensure it always does.
matchingbranches = Param ":"
-- the remote defaults to origin when not configured
defaultRemote :: Annex String
defaultRemote = do
branch <- currentBranch
fromRepo $ Git.configGet ("branch." ++ branch ++ ".remote") "origin"
currentBranch :: Annex String
currentBranch = last . split "/" . L.unpack . head . L.lines <$>
inRepo (Git.pipeRead [Param "symbolic-ref", Param "HEAD"])
checkRemote :: String -> Annex ()
checkRemote remote = do
remoteurl <- fromRepo $
Git.configGet ("remote." ++ remote ++ ".url") ""
when (null remoteurl) $ do
error $ "No url is configured for the remote: " ++ remote

View file

@ -10,7 +10,6 @@ module Command.Unannex where
import Common.Annex
import Command
import qualified Annex
import qualified Annex.Queue
import Utility.FileMode
import Logs.Location
import Annex.Content
@ -23,23 +22,10 @@ def = [command "unannex" paramPaths seek "undo accidential add command"]
seek :: [CommandSeek]
seek = [withFilesInGit $ whenAnnexed start]
{- The unannex subcommand undoes an add. -}
start :: FilePath -> (Key, Backend Annex) -> CommandStart
start file (key, _) = do
ishere <- inAnnex key
if ishere
then do
force <- Annex.getState Annex.force
unless force $ do
top <- fromRepo Git.workTree
staged <- inRepo $ LsFiles.staged [top]
unless (null staged) $
error "This command cannot be run when there are already files staged for commit."
Annex.changeState $ \s -> s { Annex.force = True }
start file (key, _) = stopUnless (inAnnex key) $ do
showStart "unannex" file
next $ perform file key
else stop
perform :: FilePath -> Key -> CommandPerform
perform file key = next $ cleanup file key
@ -47,9 +33,17 @@ perform file key = next $ cleanup file key
cleanup :: FilePath -> Key -> CommandCleanup
cleanup file key = do
liftIO $ removeFile file
inRepo $ Git.run "rm" [Params "--quiet --", File file]
-- git rm deletes empty directories; put them back
liftIO $ createDirectoryIfMissing True (parentDir file)
-- git rm deletes empty directory without --cached
inRepo $ Git.run "rm" [Params "--cached --quiet --", File file]
-- If the file was already committed, it is now staged for removal.
-- Commit that removal now, to avoid later confusing the
-- pre-commit hook if this file is later added back to
-- git as a normal, non-annexed file.
whenM (not . null <$> inRepo (LsFiles.staged [file])) $ do
inRepo $ Git.run "commit" [
Param "-m", Param "content removed from git annex",
Param "--", File file]
fast <- Annex.getState Annex.fast
if fast
@ -63,9 +57,4 @@ cleanup file key = do
fromAnnex key file
logStatus key InfoMissing
-- Commit staged changes at end to avoid confusing the
-- pre-commit hook if this file is later added back to
-- git as a normal, non-annexed file.
Annex.Queue.add "commit" [Param "-m", Param "content removed from git annex"] []
return True

View file

@ -152,13 +152,12 @@ excludeReferenced l = do
(S.fromList l)
where
-- Skip the git-annex branches, and get all other unique refs.
refs = map Git.Ref .
map last .
refs = map (Git.Ref . last) .
nubBy cmpheads .
filter ourbranches .
map words . lines . L.unpack
cmpheads a b = head a == head b
ourbranchend = '/' : show (Annex.Branch.name)
ourbranchend = '/' : show Annex.Branch.name
ourbranches ws = not $ ourbranchend `isSuffixOf` last ws
removewith [] s = return $ S.toList s
removewith (a:as) s

View file

@ -79,9 +79,10 @@ repoNotIgnored r = not . Git.configTrue <$> getConfig r "ignore" "false"
{- If a value is specified, it is used; otherwise the default is looked up
- in git config. forcenumcopies overrides everything. -}
getNumCopies :: Maybe Int -> Annex Int
getNumCopies v =
Annex.getState Annex.forcenumcopies >>= maybe (use v) (return . id)
getNumCopies v = perhaps (use v) =<< Annex.getState Annex.forcenumcopies
where
use (Just n) = return n
use Nothing = read <$> fromRepo (Git.configGet config "1")
use Nothing = perhaps (return 1) =<<
readMaybe <$> fromRepo (Git.configGet config "1")
perhaps fallback = maybe fallback (return . id)
config = "annex.numcopies"

25
Git.hs
View file

@ -345,7 +345,7 @@ urlPort :: Repo -> Maybe Integer
urlPort r =
case urlAuthPart uriPort r of
":" -> Nothing
(':':p) -> Just (read p)
(':':p) -> readMaybe p
_ -> Nothing
{- Hostname of an URL repo, including any username (ie, "user@host") -}
@ -463,8 +463,8 @@ shaSize :: Int
shaSize = 40
{- Commits the index into the specified branch (or other ref),
- with the specified parent refs. -}
commit :: String -> Ref -> [Ref] -> Repo -> IO ()
- with the specified parent refs, and returns the new ref -}
commit :: String -> Ref -> [Ref] -> Repo -> IO Ref
commit message newref parentrefs repo = do
tree <- getSha "write-tree" $ asString $
pipeRead [Param "write-tree"] repo
@ -473,6 +473,7 @@ commit message newref parentrefs repo = do
(map Param $ ["commit-tree", show tree] ++ ps)
(L.pack message) repo
run "update-ref" [Param $ show newref, Param $ show sha] repo
return sha
where
ignorehandle a = snd <$> a
asString a = L.unpack <$> a
@ -507,11 +508,7 @@ configStore s repo = do
configParse :: String -> M.Map String String
configParse s = M.fromList $ map pair $ lines s
where
pair l = (key l, val l)
key l = head $ keyval l
val l = join sep $ drop 1 $ keyval l
keyval l = split sep l :: [String]
sep = "="
pair = separate (== '=')
{- Calculates a list of a repo's configured remotes, by parsing its config. -}
configRemotes :: Repo -> IO [Repo]
@ -550,13 +547,11 @@ genRemote s repo = gen $ calcloc s
scpstyle v = ":" `isInfixOf` v && not ("//" `isInfixOf` v)
scptourl v = "ssh://" ++ host ++ slash dir
where
bits = split ":" v
host = head bits
dir = join ":" $ drop 1 bits
slash d | d == "" = "/~/" ++ dir
| head d == '/' = dir
| head d == '~' = '/':dir
| otherwise = "/~/" ++ dir
(host, dir) = separate (== ':') v
slash d | d == "" = "/~/" ++ d
| "/" `isPrefixOf` d = d
| "~" `isPrefixOf` d = '/':d
| otherwise = "/~/" ++ d
{- Checks if a string from git config is a true value. -}
configTrue :: String -> Bool

View file

@ -48,7 +48,7 @@ merge_index h repo bs =
- earlier ones, so the list can be generated from any combination of
- ls_tree, merge_trees, and merge_tree_index. -}
update_index :: Repo -> [String] -> IO ()
update_index repo ls = stream_update_index repo [\s -> mapM_ s ls]
update_index repo ls = stream_update_index repo [(`mapM_` ls)]
{- Streams content into update-index. -}
stream_update_index :: Repo -> [Streamer] -> IO ()

View file

@ -47,6 +47,7 @@ import qualified Command.Trust
import qualified Command.Untrust
import qualified Command.Semitrust
import qualified Command.Dead
import qualified Command.Sync
import qualified Command.AddUrl
import qualified Command.Map
import qualified Command.Upgrade
@ -61,6 +62,8 @@ cmds = concat
, Command.Copy.def
, Command.Unlock.def
, Command.Lock.def
, Command.Sync.def
, Command.AddUrl.def
, Command.Init.def
, Command.Describe.def
, Command.InitRemote.def
@ -72,7 +75,6 @@ cmds = concat
, Command.Untrust.def
, Command.Semitrust.def
, Command.Dead.def
, Command.AddUrl.def
, Command.FromKey.def
, Command.DropKey.def
, Command.Fix.def

View file

@ -20,6 +20,8 @@ module Locations (
gitAnnexUnusedLog,
gitAnnexJournalDir,
gitAnnexJournalLock,
gitAnnexIndex,
gitAnnexIndexLock,
isLinkToAnnex,
annexHashes,
hashDirMixed,
@ -80,16 +82,15 @@ gitAnnexLocation key r
| Git.repoIsLocalBare r =
{- Bare repositories default to hashDirLower for new
- content, as it's more portable. -}
go (Git.workTree r) (annexLocations key)
check (map inrepo $ annexLocations key)
| otherwise =
{- Non-bare repositories only use hashDirMixed, so
- don't need to do any work to check if the file is
- present. -}
return $ Git.workTree r </> ".git" </>
annexLocation key hashDirMixed
return $ inrepo ".git" </> annexLocation key hashDirMixed
where
go dir locs = fromMaybe (dir </> head locs) <$> check dir locs
check dir = firstM $ \f -> doesFileExist $ dir </> f
inrepo d = Git.workTree r </> d
check locs = fromMaybe (head locs) <$> firstM doesFileExist locs
{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> FilePath
@ -132,6 +133,14 @@ gitAnnexJournalDir r = addTrailingPathSeparator $ gitAnnexDir r </> "journal"
gitAnnexJournalLock :: Git.Repo -> FilePath
gitAnnexJournalLock r = gitAnnexDir r </> "journal.lck"
{- .git/annex/index is used to stage changes to the git-annex branch -}
gitAnnexIndex :: Git.Repo -> FilePath
gitAnnexIndex r = gitAnnexDir r </> "index"
{- Lock file for .git/annex/index. -}
gitAnnexIndexLock :: Git.Repo -> FilePath
gitAnnexIndexLock r = gitAnnexDir r </> "index.lck"
{- Checks a symlink target to see if it appears to point to annexed content. -}
isLinkToAnnex :: FilePath -> Bool
isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s

View file

@ -55,15 +55,15 @@ fixBadUUID = M.fromList . map fixup . M.toList
| otherwise = (k, v)
where
kuuid = fromUUID k
isbad = (not $ isuuid kuuid) && isuuid lastword
isbad = not (isuuid kuuid) && isuuid lastword
ws = words $ value v
lastword = last ws
fixeduuid = toUUID lastword
fixedvalue = unwords $ kuuid:(take (length ws - 1) ws)
fixedvalue = unwords $ kuuid: init ws
-- For the fixed line to take precidence, it should be
-- slightly newer, but only slightly.
newertime (LogEntry (Date d) _) = d + minimumPOSIXTimeSlice
newertime (LogEntry (Unknown) _) = minimumPOSIXTimeSlice
newertime (LogEntry Unknown _) = minimumPOSIXTimeSlice
minimumPOSIXTimeSlice = 0.000001
isuuid s = length s == 36 && length (split "-" s) == 5

View file

@ -182,7 +182,7 @@ onBupRemote r a command params = do
- local bup repositories to see if they are available, and getting their
- uuid (which may be different from the stored uuid for the bup remote).
-
- If a bup repository is not available, returns a dummy uuid of "".
- If a bup repository is not available, returns NoUUID.
- This will cause checkPresent to indicate nothing from the bup remote
- is known to be present.
-

View file

@ -165,7 +165,7 @@ onLocal :: Git.Repo -> Annex a -> IO a
onLocal r a = do
-- Avoid re-reading the repository's configuration if it was
-- already read.
state <- if (M.null $ Git.configMap r)
state <- if M.null $ Git.configMap r
then Annex.new r
else return $ Annex.newState r
Annex.eval state $ do
@ -210,6 +210,7 @@ copyToRemote r key
params <- rsyncParams r
-- run copy from perspective of remote
liftIO $ onLocal r $ do
ensureInitialized
ok <- Annex.Content.getViaTmp key $
rsyncOrCopyFile params keysrc
Annex.Content.saveState

View file

@ -61,18 +61,21 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r =
withkey a k = cip k >>= maybe (a k) (a . snd)
cip = cipherKey c
{- Gets encryption Cipher. The decrypted Cipher is cached in the Annex
{- Gets encryption Cipher. The decrypted Ciphers are cached in the Annex
- state. -}
remoteCipher :: RemoteConfig -> Annex (Maybe Cipher)
remoteCipher c = maybe expensive cached =<< Annex.getState Annex.cipher
remoteCipher c = go $ extractCipher c
where
cached cipher = return $ Just cipher
expensive = case extractCipher c of
Nothing -> return Nothing
Just encipher -> do
go Nothing = return Nothing
go (Just encipher) = do
cache <- Annex.getState Annex.ciphers
case M.lookup encipher cache of
Just cipher -> return $ Just cipher
Nothing -> decrypt encipher cache
decrypt encipher cache = do
showNote "gpg"
cipher <- liftIO $ decryptCipher c encipher
Annex.changeState (\s -> s { Annex.cipher = Just cipher })
Annex.changeState (\s -> s { Annex.ciphers = M.insert encipher cipher cache })
return $ Just cipher
{- Gets encryption Cipher, and encrypted version of Key. -}

View file

@ -11,5 +11,7 @@ module Types.Crypto where
newtype Cipher = Cipher String
data EncryptedCipher = EncryptedCipher String KeyIds
deriving (Ord, Eq)
newtype KeyIds = KeyIds [String]
deriving (Ord, Eq)

View file

@ -53,7 +53,7 @@ upgrade = do
when e $ do
inRepo $ Git.run "rm" [Param "-r", Param "-f", Param "-q", File old]
unless bare $ inRepo $ gitAttributesUnWrite
unless bare $ inRepo gitAttributesUnWrite
showProgress
unless bare push

28
Utility/BadPrelude.hs Normal file
View file

@ -0,0 +1,28 @@
{- Some stuff from Prelude should not be used, as it tends to be a source
- of bugs.
-
- This exports functions that conflict with the prelude, which avoids
- them being accidentially used.
-}
module Utility.BadPrelude where
{- head is a partial function; head [] is an error -}
head :: [a] -> a
head = Prelude.head
{- tail is also partial -}
tail :: [a] -> a
tail = Prelude.tail
{- init too -}
init :: [a] -> a
init = Prelude.init
{- last too -}
last :: [a] -> a
last = Prelude.last
{- read should be avoided, as it throws an error -}
read :: Read a => String -> a
read = Prelude.read

View file

@ -99,7 +99,7 @@ bandwidthUnits = error "stop trying to rip people off"
{- Do you yearn for the days when men were men and megabytes were megabytes? -}
oldSchoolUnits :: [Unit]
oldSchoolUnits = map mingle $ zip storageUnits memoryUnits
oldSchoolUnits = zipWith (curry mingle) storageUnits memoryUnits
where
mingle (Unit _ a n, Unit s' _ _) = Unit s' a n

View file

@ -11,6 +11,7 @@ import System.IO.Error
import System.Posix.Files
import System.Directory
import Control.Exception (throw)
import Control.Monad
import Utility.SafeCommand
import Utility.Conditional
@ -37,9 +38,7 @@ moveFile src dest = try (rename src dest) >>= onrename
mv tmp _ = do
ok <- boolSystem "mv" [Param "-f",
Param src, Param tmp]
if ok
then return ()
else do
unless ok $ do
-- delete any partial
_ <- try $
removeFile tmp

View file

@ -27,6 +27,19 @@ readMaybe s = case reads s of
((x,_):_) -> Just x
_ -> Nothing
{- Like break, but the character matching the condition is not included
- in the second result list.
-
- separate (== ':') "foo:bar" = ("foo", "bar")
- separate (== ':') "foobar" = ("foo, "")
-}
separate :: (a -> Bool) -> [a] -> ([a], [a])
separate c l = unbreak $ break c l
where
unbreak r@(a, b)
| null b = r
| otherwise = (a, tail b)
{- Catches IO errors and returns a Bool -}
catchBoolIO :: IO Bool -> IO Bool
catchBoolIO a = catchDefaultIO a False

View file

@ -71,7 +71,7 @@ checkGitVersion = do
dotted = sum . mult 1 . reverse . extend 10 . map readi . split "."
extend n l = l ++ replicate (n - length l) 0
mult _ [] = []
mult n (x:xs) = (n*x) : (mult (n*100) xs)
mult n (x:xs) = (n*x) : mult (n*100) xs
readi :: String -> Integer
readi s = case reads s of
((x,_):_) -> x

16
debian/changelog vendored
View file

@ -2,6 +2,22 @@ git-annex (3.20111204) UNRELEASED; urgency=low
* map: Fix a failure to detect a loop when both repositories are local
and refer to each other with relative paths.
* Prevent key names from containing newlines.
* add: If interrupted, add can leave files converted to symlinks but not
yet added to git. Running the add again will now clean up this situtation.
* Fix caching of decrypted ciphers, which failed when drop had to check
multiple different encrypted special remotes.
* unannex: Can be run on files that have been added to the annex, but not
yet committed.
* sync: New command that synchronises the local repository and default
remote, by running git commit, pull, and push for you.
* Version monad-control dependency in cabal file.
* Fix bug in last version in getting contents from bare repositories.
* Ensure that git-annex branch changes are merged into git-annex's index,
which fixes a bug that could cause changes that were pushed to the
git-annex branch to get reverted. As a side effect, it's now safe
for users to check out and commit changes directly to the git-annex
branch.
-- Joey Hess <joeyh@debian.org> Sun, 04 Dec 2011 12:22:37 -0400

View file

@ -0,0 +1,27 @@
Hi there,
After updating to 3.20111203 (on Arch Linux) I noticed I was not able to use `git annex get` from a SSH remote (server running Arch Linux, same version of git-annex): "requested key is not present". Same behavior with current master (commit 6cf28585). I had no issue with the previous version (3.20111122).
On this server, I was able to track down the issue using `git-annex-shell inannex` and `strace`:
$ strace -f -o log git-annex-shell inannex ~/photos-annex.git WORM-s369360-m1321602916--2011-11-17.jpg
$ echo $?
1
$ tail -n20 log
[...]
25623 chdir("/home/schnouki/git-annex") = 0
25623 stat("/home/schnouki/photos-annex.git/annex/objects/082/676/WORM-s369360-m1321602916--2011-11-17.jpg/WORM-s369360-m1321602916--2011-11-17.jpg", {st_mode=S_IFREG|0400, st_size=369360, ...}) = 0
25623 open("annex/objects/082/676/WORM-s369360-m1321602916--2011-11-17.jpg/WORM-s369360-m1321602916--2011-11-17.jpg", O_RDONLY) = -1 ENOENT (No such file or directory)
[...]
Note there is a call to `stat()` with the full path to the requested file, and *then* a call to `open()` with a relative path -- which calls this call to fail, and git-annex-shell to return 1. With 3.20111122, there was no call to `stat()`, just a successful call to `open()` with a full absolute path.
Using `git bisect` I was able to determine that this bug appeared in commit 64672c62 ("refactor"). Reverting it makes `git-annex-shell` work as expected, but I'm sure there are better ways to fix this. However I don't know enough Haskell to do it myself.
Could you please try to fix this in a future version?
> Thanks for a very good bug report.
>
> I've fixed this stupid mistake introduced in the code refactoring.
> [[done]]
> --[[Joey]]

View file

@ -0,0 +1,5 @@
Found this out the hard way. See the comment in the below post for what happens.
[[/forum/git_annex_add_crash_and_subsequent_recovery/]]
> [[fixed|done]] --[[Joey]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="http://joey.kitenet.net/"
nickname="joey"
subject="comment 1"
date="2011-12-06T16:49:32Z"
content="""
This only happens with the WORM backend (or possibly with SHA1E if the file's extension has a newline).
The problem is not the newline in the file, but the newline in the key generated for the file. It's probably best to just disallow such keys being created.
"""]]

View file

@ -0,0 +1,95 @@
Below is a test case which shows a way that the git-annex branch
can become corrupted and lose data, including location log records and
uuid.log lines.
At the end, a commit on the git-annex branch removes one of the 2 lines
from the uuid.log; which should never happen.
The actual problem occurs earlier, at the "push point". Here a repo is
cloned from the main one, initialized (adding the last uuid.log line),
and then pushed back to the main one. That push is a fast-forward, so is
allowed to directly update the git-annex branch in the main repo:
b884fe5..c497739 git-annex -> git-annex
Now the git-annex branch has a change that is not reflected in
`.git/annex/index`, so the next time a change is made, it's committed
using the out of date index, which causes a reversion of the changes
that were pushed to the branch.
---
## Thoughts
This is essentially the same reason why git blocks pushes to the checked-out
branch of a non-bare repository.
This problem only affects workflows that involve pushing. Pulling workflows
do not directly update the local git-annex branch, so avoid the problem.
And while bare repos are pushed to, they rarely have changes made directly
to their git-annex branches, so while I think the same problem could
happen with pushing to a bare repo, it's unlikely.
None of which is to say this is not a bad bug that needs to be comprehensively
fixed.
Probably git-annex needs to record which ref of the git-annex branch
corresponds to its index, and if the branch is at a different ref,
merge it into the index.
> And now that's [[done]]. I managed to do it with very little slowdown.
>
> A side benefit is that users can now safely check out the git-annex
> branch and commit changes to it, and git-annex will notice them.
> Before, it was documented to ignore such changes.
> --[[Joey]]
---
## Workaround
Users who want to prevent this bug from occuring when pushing to their
non-bare repositories can install this script as `.git/hooks/update`
<pre>
#!/bin/sh
if [ "$1" = refs/heads/git-annex ]; then
exit 1
fi
</pre>
--[[Joey]]
---
## Test Case
<pre>
#!/bin/sh
mkdir annextest
cd annextest
git init dir1
cd dir1
git annex init
touch foo
echo hi > bar
git annex add
git commit -m add
cd ..
git clone dir1 dir2
cd dir2
git annex init otherdir
git annex get
# push point
git push
cd ..
cd dir1
echo "before"
git show git-annex:uuid.log
git annex drop foo --force
echo "after"
git show git-annex:uuid.log
</pre>

View file

@ -0,0 +1,43 @@
The fix for the [[git-annex_branch_corruption]] bug is subject to a race.
With that fix, git-annex does this when committing a change to the branch:
1. lock the journal file (this avoids git-annex racing itself, FWIW)
2. check what the head of the branch points to, to see if a newer branch
has appeared
3. if so, updates the index file from the branch
4. stages changes in the index
5. commits to the branch using the index file
If a push to the branch comes in during 2-5, then
[[git-annex_branch_corruption]] could still occur.
---
## approach 1, using locking
Add an update hook and a post-update hook. The update hook
will use locking to ensure that no git-annex is currently running
a commit, and block any git-annex's from starting one. It
will background itself, and remain running during the push.
The post-update hook will signal it to exit.
I don't like this approach much, since it involves a daemon, two hooks,
and lots of things to go wrong. And it blocks using git-annex during a
push. This approach should be a last resort.
## approach 2, lockless method
After a commit is made to the branch, check to see if the parent of
the commit is the same ref that the index file was last updated to. If it's
not, then the race occurred.
How to recover from the race? Well, just union merging the parent of the
commit into the index file and re-committing should work, I think. When
the race occurs, the commit reverts its parent's changes, and this will
redo them.
(Of course, this re-commit will also be subject to the race, and
will need the same check for the race as the other commits. It won't loop
forever, I hope.)
--[[Joey]]

View file

@ -0,0 +1,101 @@
Somehow git-annex has again lost a complete rsync remote with encryption enabled...
git-annex version was 3.20111111
> "once again" ? When did it do it before?
>> It's the second time i uploaded all the files to an encrypted rsync remote and git-annex is not able to find it anymore. --[[gebi]]
> "lost" ? How is the remote lost?
>> git-annex is not able to find any files on the encrypted rsync remote anymore.
>> Copy does not copy the content again but drop doesn't find it, thus it's somehow "lost" and in an strange state.
>> I've also had the state where the content was already on the remote side but git-annex copy would copy it again,
>> ignoring all the data on the remote side. --[[gebi]]
Both *remoteserver* and *localserver* are rsync remotes with enabled encryption.
All commands are executed on the git repository on my laptop.
Target of origin is a gitolite repository without annex support (thus the two rsync remotes).
Is there a way in git-annex to verify that all files fullfill the numcopies, in my case
numcopies=2, and can be read from the remotes their are on?
I thought that *copy* would verify that, but seems not.
% g a copy --to remoteserver tools
copy tools/md5_sha1_utility.exe (gpg) (checking remoteserver...) ok
copy tools/win32diskimager-RELEASE-0.2-r23-win32.zip (checking remoteserver...) ok
% g a copy --to localserver tools
copy tools/md5_sha1_utility.exe (gpg) (checking localserver...) ok
copy tools/win32diskimager-RELEASE-0.2-r23-win32.zip (checking localserver...) ok
% g a drop tools
drop tools/md5_sha1_utility.exe (gpg) (checking localserver...) (checking remoteserver...) (unsafe)
Could only verify the existence of 1 out of 2 necessary copies
Try making some of these repositories available:
718a9b5c-1b4a-11e1-8211-6f094f20e050 -- remoteserver (remote backupserver)
(Use --force to override this check, or adjust annex.numcopies.)
failed
drop tools/win32diskimager-RELEASE-0.2-r23-win32.zip (checking localserver...) (checking remoteserver...) (unsafe)
Could only verify the existence of 1 out of 2 necessary copies
Try making some of these repositories available:
718a9b5c-1b4a-11e1-8211-6f094f20e050 -- remoteserver (remote backupserver)
(Use --force to override this check, or adjust annex.numcopies.)
failed
git-annex: drop: 2 failed
% g a fsck tools
fsck tools/md5_sha1_utility.exe (checksum...) ok
fsck tools/win32diskimager-RELEASE-0.2-r23-win32.zip (checksum...) ok
> Copy does do an explicit check that the content is present on remoteserver,
> and based on the above, the content was found to be already there,
> which is why it did not copy it again.
>
> Drop does an indentical check that the content is present, and
> since it failed to find it, I am left thinking something must have
> happened to the remove in between the copy and the drop to cause the
> content to go away.
>
> What happens if you copy the data to remoteserver again? --[[Joey]]
The commands above are executed within a few seconds and completely repeatable. --[[gebi]]
> In that case, why don't you run the commands with `-d` to see the actual
> rsync command it's running to check if the content is present.
> Then you can try repeatedly running the command by hand and see why it
> sometimes succeeds and sometimes fail.
The commands fail and succeed consistently, not either or.
git annex copy succeeds consistently with not copying the content to remote because it checks and it's already there.
git annex drop fails consistently with error because content is missing on the exact same remote git annex copy checks
and thinks the content is there. --[[gebi]]
> The command will be something like this:
> `rsync --quiet hostname:/dir/file 2>/dev/null`
>
> The exit status is what's used to see if content is present -- and
> currently any failure even a failure to connect is taken to mean it's not
> present. --[[Joey]]
hm... thats interesting, git annex drop and git annex copy check for different hashes on the same file at the same remote...
git annex drop -d tools/md5_sha1_utility.exe
> Running: sh ["-c","rsync --quiet 'REMOVED_HOST:annex/work/JF/z7/'\"'\"'GPGHMACSHA1--7ffb3840f0e37aee964352e98808403655e8473a/GPGHMACSHA1--7ffb3840f0e37aee964352e98808403655e8473a'\"'\"'' 2>/dev/null"]
git annex copy --to remoteserver -d tools/md5_sha1_utility.exe
> Running: sh ["-c","rsync --quiet 'REMOVED_HOST:annex/work/1F/PQ/'\"'\"'GPGHMACSHA1--ff075e57f649300c5698e346be74fb6e22d70e35/GPGHMACSHA1--ff075e57f649300c5698e346be74fb6e22d70e35'\"'\"'' 2>/dev/null"]
And yes, only the hash *annex copy* is checking for exists on the remote side. --[[gebi]]
> Ok, this is due to too aggressive caching of the decrypted cipher
> for a remote. When dopping, it decrypts localserver's cipher,
> caches it, and then when checking remoteserver it says hey,
> here's an already decrypted cipher -- it must be the right one!
>
> Problem reproduced here, and fixed. [[done]] --[[Joey]]

View file

@ -0,0 +1,25 @@
Perhaps stupidly I added some very large bare git repos into a git-annex.
This took a very long time, used lot's of memory, and then crashed. I didn't catch the error (which is annoying) - sorry about that. IIRC it is the same error if one Ctrl-c's the addition.
I ran `git annex add .` a second time and eventually killed it (I perhaps should have waited - I now think it was working).
A `git annex unannex` fixed up some files but somehow I managed to end up with tonnes of files all sym-linked into the git annex object directory but not somehow recognised as annexed files. I'm assuming that they somehow didn't make it into git annex's meta-data layer (or equivalent).
Commands such as `git annex {fsck,whereis,unannex} weirdfile` immediately returned without error.
I've now spent a lot of manual time copying the files back. Doing the following, not the cleverest but I was a little panicky about my data...
find . -type l -exec mv \{} \{}.link \; #Move link names out of the way
find . -type l -exec cp \{} \{}.cp \; #Copy follows links so we can copy target back to link location
find . -type f -name "*.link.cp" | xargs -n 1 rename 's/\.link\.cp//' #Change to original name
find . -type l -exec rm \{} \; #Ditch the links
git annex unused
git annex dropunused `seq 9228`
9228 files were found to be unused, this gives an idea of the scale of the number of "lost" files for want of a better term.
A pretty poor bug report as these things go. Anyone any idea what might have happened (it didn't seem space or memory related)? Or how I might have fixed it a little more cleverly?
For reference I am using stable Debian, git annex version 3.20111011.

View file

@ -0,0 +1,18 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawnXybLxkPMYpP3yw4b_I6IdC3cKTD-xEdU"
nickname="Matt"
subject="comment 1"
date="2011-12-06T12:50:27Z"
content="""
Ah HA! Looks like I found the cause of this.
[matt@rss01:~/files/matt_ford]0> git annex add mhs
add mhs/Accessing_Web_Manager_V10.pdf ok
....
add mhs/MAHSC Costing Request Form Dual
Organisations - FINAL v20 Oct 2010.xls git-annex: unknown response from git cat-file refs/heads/git-annex:8d5/ed4/WORM-s568832-m1323164214--MAHSC Costing Request Form Dual missing
Spot the file name with a newline character in it! This causes the error message above. It seems that the files proceeding this badly named file are sym-linked but not registered.
Perhaps a bug?
"""]]

View file

@ -0,0 +1,19 @@
[[!comment format=mdwn
username="http://joey.kitenet.net/"
nickname="joey"
subject="comment 2"
date="2011-12-06T17:08:37Z"
content="""
The bug with newlines is now fixed.
Thought I'd mention how to clean up from interrupting `git annex add`.
When you do that, it doesn't get a chance to `git add` the files it's
added (this is normally done at the end, or sometimes at points in the middle when you're adding a *lot* of files).
Which is also why fsck, whereis, and unannex wouldn't operate on them, since they only deal with files in git.
So the first step is to manually use `git add` on any symlinks.
Then, `git commit` as usual.
At that point, `git annex unannex` would get you back to your starting state.
"""]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawnXybLxkPMYpP3yw4b_I6IdC3cKTD-xEdU"
nickname="Matt"
subject="comment 3"
date="2011-12-07T07:39:15Z"
content="""
Ah - very good to know that recovery is easier than the method I used.
I wonder if it could be made a feature to automatically and safely recover/resume from an interrupted `git add`?
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="http://joey.kitenet.net/"
nickname="joey"
subject="comment 4"
date="2011-12-07T20:54:51Z"
content="""
Good idea! I've made `git annex add` recover when ran a second time.
"""]]

View file

@ -0,0 +1,11 @@
I thought I'd followed the walk through when initially setting up my repos.
However I find that I have to do the following to sync my annex's.
git pull remote master
git checkout git-annex
git pull remote git-annex
git checkout master
git annex get .
Has something gone wrong? I see no mention of syncing git-annex repos in the walk-through...

View file

@ -0,0 +1,36 @@
[[!comment format=mdwn
username="http://joey.kitenet.net/"
nickname="joey"
subject="comment 1"
date="2011-12-06T16:43:29Z"
content="""
You're taking a very long and strange way to a place that you can reach as follows:
<pre>
git pull remote
git annex get .
</pre>
Which is just as shown in [[walkthrough/getting_file_content]].
In particular, \"git pull remote\" first fetches all branches from the remote, including the git-annex branch.
When you say \"git pull remote master\", you're preventing it from fetching the git-annex branch.
If for some reason you want the slightly longer way around, it is:
<pre>
git pull remote master
git fetch remote git-annex
git annex get .
</pre>
Or, eqivilantly but with less network connections:
<pre>
git fetch remote
git merge remote/master
git annex get .
</pre>
BTW, notice that this is all bog-standard git branch pulling stuff, not specific to git-annex in the least.
Consult your extensive and friendly git documentation for details. :)
"""]]

View file

@ -0,0 +1,14 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawnXybLxkPMYpP3yw4b_I6IdC3cKTD-xEdU"
nickname="Matt"
subject="comment 2"
date="2011-12-06T23:23:29Z"
content="""
Doh! Total brain melt on my part. Thanks for the additional info. Not taking my time and reading things properly - kept assuming that the full remote pull failed due to the warning:
You asked to pull from the remote 'rss', but did not specify
a branch. Because this is not the default configured remote
for your current branch, you must specify a branch on the command line.
Rookie mistake indeed.
"""]]

View file

@ -0,0 +1,46 @@
Im using git annex to manage my movie collection on various devices my laptop, a NSLU tucked away somewhere with lots of space, some external hard drives. For this use case, I do not need the full power of git as a version control system, so having to run "git commit" and coming up with commit messages is annoying. Also, this makes sense for a version control system, but not for my media collection:
$ git annex add Hot\ Fuzz\ -\ English.mkv
add Hot Fuzz - English.mkv (checksum...) ok
(Recording state in git...)
$ git commit -m 'another movie added'
[master 851dc8a] another movie added
1 files changed, 1 insertions(+), 0 deletions(-)
create mode 120000 00 Noch nicht gesehen/Hot Fuzz - English.mkv
$ git push jeff
Counting objects: 38, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (20/20), done.
Writing objects: 100% (26/26), 2.00 KiB, done.
Total 26 (delta 11), reused 0 (delta 0)
remote: error: refusing to update checked out branch: refs/heads/master
remote: error: By default, updating the current branch in a non-bare repository
remote: error: is denied, because it will make the index and work tree inconsistent
remote: error: with what you pushed, and will require 'git reset --hard' to match
remote: error: the work tree to HEAD.
remote: error:
remote: error: You can set 'receive.denyCurrentBranch' configuration variable to
remote: error: 'ignore' or 'warn' in the remote repository to allow pushing into
remote: error: its current branch; however, this is not recommended unless you
remote: error: arranged to update its work tree to match what you pushed in some
remote: error: other way.
remote: error:
remote: error: To squelch this message and still keep the default behaviour, set
remote: error: 'receive.denyCurrentBranch' configuration variable to 'refuse'.
To jeff:/mnt/media/Movies
! [rejected] git-annex -> git-annex (non-fast-forward)
! [remote rejected] master -> master (branch is currently checked out)
error: failed to push some refs to 'jeff:/mnt/media/Movies'
To prevent you from losing history, non-fast-forward updates were rejected
Merge the remote changes (e.g. 'git pull') before pushing again. See the
'Note about fast-forwards' section of 'git push --help' for details.
It seems that to successfully make the new files known to the other side, I have to log into jeff and pull _from_ my current machine.
What I would like to have is that
* git annex add does not require a commit afterwards.
* Changes to the files are automatically picked up with the next git-annex call (similar to how etckeeper works).
* Commands "git annex push" and "git annex pull" that will sync the metadata (i.e. the list of files) in both directions without further manual intervention, at least not until the two repositories have diverged in a way that is not possible to merge sensible.
Summay: git-annex is great. git is not always. Please make it possible to use git annex without having to use git.

View file

@ -0,0 +1,32 @@
[[!comment format=mdwn
username="http://joey.kitenet.net/"
nickname="joey"
subject="comment 1"
date="2011-12-09T22:56:11Z"
content="""
First, you need a bare git repository that you can push to, and pull from. This simplifies most git workflow.
Secondly, I use [mr](http://kitenet.net/~joey/code/mr/), with this in `.mrconfig`:
<pre>
[DEFAULT]
lib =
annexupdate() {
git commit -a -m update || true
git pull \"$@\"
git annex merge
git push || true
}
[lib/sound]
update = annexupdate
[lib/big]
update = annexupdate
</pre>
Which makes \"mr update\" in repositories where I rarely care about git details take care of syncing my changes.
I also make \"mr update\" do a \"git annex get\" of some files in some repositories that I want to always populate. git-annex and mr go well together. :)
Perhaps my annexupdate above should be available as \"git annex sync\"?
"""]]

View file

@ -0,0 +1,15 @@
[[!comment format=mdwn
username="http://www.joachim-breitner.de/"
nickname="nomeata"
subject="comment 2"
date="2011-12-10T16:28:29Z"
content="""
Thanks for the tips so far. I guess a bare-only repo helps, but as well is something that I dont _need_ (for my use case), any only have to do because git works like this.
Also, if I have a mobile device that I want to push to, then Id have to have two repositories on the device, as I might not be able to reach my main bare repository when traveling, but I cannot push to the „real“ repo on the mobile device from my computer. I guess I am spoiled by darcs, which will happily push to a checked out
remote repository, updating the checkout if possible without conflict.
If I introduce a central bare repository to push to and from; Id still have to have the other non-bare repos as remotes, so that git-annex will know about them and their files, right?
Id appreciate a \"git annex sync\" that does what you described (commit all, pull, merge, push). Especially if it comes in a \"git annex sync --all\" variant that syncs all reachable repositories.
"""]]

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="http://joey.kitenet.net/"
nickname="joey"
subject="comment 3"
date="2011-12-10T19:43:04Z"
content="""
Git can actually push into a non-bare repository, so long as the branch you change there is not a checked out one. Pushing into `remotes/$foo/master` and `remotes/$foo/git-annex` would work, however determining the value that the repository expects for `$foo` is something git cannot do on its own. And of course you'd still have to `git merge remotes/$foo/master` to get the changes.
Yes, you still keep the non-bare repos as remotes when adding a bare repository, so git-annex knows how to get to them.
I've made `git annex sync` run the simple script above. Perhaps it can later be improved to sync all repositories.
"""]]

View file

@ -78,4 +78,4 @@ Joey Hess <joey@kitenet.net>
<http://git-annex.branchable.com/>
Warning: this page is automatically made into a man page via [mdwn2man](http://git.ikiwiki.info/?p=ikiwiki;a=blob;f=mdwn2man;hb=HEAD). Edit with care
Warning: Automatically converted into a man page by mdwn2man. Edit with care

View file

@ -120,6 +120,17 @@ subdirectories).
Use this to undo an unlock command if you don't want to modify
the files, or have made modifications you want to discard.
* sync
Use this command when you want to synchronize the local repository
with its default remote (typically "origin"). The sync process involves
first committing all local changes, then pulling and merging any changes
from the remote, and finally pushing the repository's state to the remote.
You can use standard git commands to do each of those steps by hand,
or if you don't want to worry about the details, you can use sync.
Note that sync does not transfer any file contents from or to the remote.
* addurl [url ...]
Downloads each url to a file, which is added to the annex.
@ -623,4 +634,4 @@ Joey Hess <joey@kitenet.net>
<http://git-annex.branchable.com/>
Warning: this page is automatically made into a man page via [mdwn2man](http://git.ikiwiki.info/?p=ikiwiki;a=blob;f=mdwn2man;hb=HEAD). Edit with care
Warning: Automatically converted into a man page by mdwn2man. Edit with care

View file

@ -35,4 +35,4 @@ Joey Hess <joey@kitenet.net>
<http://git-annex.branchable.com/>
Warning: this page is automatically made into a man page via [mdwn2man](http://git.ikiwiki.info/?p=ikiwiki;a=blob;f=mdwn2man;hb=HEAD). Edit with care
Warning: Automatically converted into a man page by mdwn2man. Edit with care

View file

@ -22,17 +22,9 @@ deleting or changing the file contents.
This branch is managed by git-annex, with the contents listed below.
The file `.git/annex/index` is a separate git index file it uses
to accumulate changes for the git-annex. Also, `.git/annex/journal/` is used
to record changes before they are added to git.
Note that for speed reasons, git-annex assumes only it will modify this
branch. If you go in and make changes directly, it will probably revert
your changes in its next commit to the branch.
The best way to make changes to the git-annex branch is instead
to create a branch of it, with a name like "my/git-annex", and then
use "git annex merge" to automerge your branch into the main git-annex
branch.
to accumulate changes for the git-annex branch.
Also, `.git/annex/journal/` is used to record changes before they
are added to git.
### `uuid.log`

View file

@ -0,0 +1,72 @@
## Intro
This tip is based on my (Matt Ford) experience of using `git annex` with my out-and-about netbook which hits many different wifi networks and has no fixed home or address.
I'm not using a bare repository that allows pushing (an alternative solution) nor do I fancy allowing `git push` to run against my desktop checked out repository (perhaps I worry over nothing?)
None of this is really `git annex` specific but I think it is useful to know...
## Dealing with no fixed hostname
Essentially set up two repos as per the [[walkthrough]].
Desktop as follows:
cd ~/annex
git init
git annex init "desktop"
And the laptop like this
git clone ssh://desktop/annex
git init
git annex init "laptop"
Now we want to add the the repos as remotes of each other.
For the laptop it is easy:
git remote add desktop ssh://desktop/~/annex
However for the desktop to add an ever changing laptops hostname it's a little tricky. We make use of remote SSH tunnels to do this. Essentially we have the laptop (which always knows it's own name and address and knows the address of the desktop) create a tunnel starting on an arbitrary port at the desktop and heads back to the laptop on it's own SSH server port (22).
To do this make part of your laptop's SSH config look like this:
Host desktop
User matt
HostName desktop.example.org
RemoteForward 2222 localhost:22
Now on the desktop to connect over the tunnel to the laptop's SSH port you need this:
Host laptop
User matt
HostName localhost
port 2222
So to add the desktop's remote:
a) From the laptop ensure the tunnel is up
ssh desktop
b) From the desktop add the remote
git remote add laptop ssh://laptop/~/annex
So now you can work on the train, pop on the wifi at work upon arrival, and sync up with a `git pull && git annex get`.
An alternative solution may be to use direct tunnels over Openvpn.
## Optimising SSH
Running a `git annex get .`, at least in the version I have, creates a new SSH connection for every file transfer (maybe this should be a feature request?)
Lot's of new small files in an _annex_ cause lot's of connections to be made quickly: this is an relatively expensive overhead and is enough for connection limiting to start in my case. The process can be made much faster by using SSH's connection sharing capabilities. An SSH config like this should do it:
# Global Settings
ControlMaster auto
ControlPersist 30
ControlPath ~/.ssh/master-%r@%h:%p
This will create a master connection for sharing if one isn't present, maintain it for 30 seconds after closing down the connection (just-in-cases') and automatically use the master connection for subsequent connections. Wins all round!

View file

@ -1,4 +1,7 @@
Git-annex doesn't compile with the latest version of monad-control. Would it be hard to support that new version?
> I hope not. I have been waiting for it to land in Debian before trying to
> deal with its changes. --[[Joey]]
> I have been waiting for it to land in Debian before trying to
> deal with its changes.
>
> There is now a branch in git called `new-monad-control` that will build
> with the new monad-control. --[[Joey]]

1
doc/users/gebi.mdwn Normal file
View file

@ -0,0 +1 @@
Michael Gebetsroither <michael@mgeb.org>

View file

@ -42,5 +42,5 @@ main = do
_ <- Git.useIndex (tmpIndex g)
setup g
Git.UnionMerge.merge aref bref g
Git.commit "union merge" newref [aref, bref] g
_ <- Git.commit "union merge" newref [aref, bref] g
cleanup g

View file

@ -11,11 +11,10 @@ import Test.QuickCheck
import System.Posix.Directory (changeWorkingDirectory)
import System.Posix.Files
import Control.Exception (bracket_, bracket)
import Control.Exception (bracket_, bracket, throw)
import System.IO.Error
import System.Posix.Env
import qualified Control.Exception.Extensible as E
import Control.Exception (throw)
import qualified Data.Map as M
import System.IO.HVFS (SystemFS(..))