Merge commit '3.20120123' into debian-stable

Conflicts:
	Annex.hs
	Remote/S3.hs
	git-annex.cabal
This commit is contained in:
Joey Hess 2012-02-08 12:40:13 -04:00
commit 1211d19dbf
39 changed files with 577 additions and 92 deletions

View file

@ -27,6 +27,7 @@ module Annex (
) where
import Control.Monad.State
import System.Posix.Types (Fd)
import Common
import qualified Git
@ -78,6 +79,7 @@ data AnnexState = AnnexState
, forcetrust :: TrustMap
, trustmap :: Maybe TrustMap
, ciphers :: M.Map EncryptedCipher Cipher
, lockpool :: M.Map FilePath Fd
, flags :: M.Map String Bool
, fields :: M.Map String String
}
@ -100,6 +102,7 @@ newState gitrepo = AnnexState
, forcetrust = M.empty
, trustmap = Nothing
, ciphers = M.empty
, lockpool = M.empty
, flags = M.empty
, fields = M.empty
}

View file

@ -22,6 +22,7 @@ module Annex.Content (
getKeysPresent,
saveState,
downloadUrl,
preseedTmp,
) where
import System.IO.Error (try)
@ -40,6 +41,7 @@ import Utility.FileMode
import qualified Utility.Url as Url
import Types.Key
import Utility.DataUnits
import Utility.CopyFile
import Config
import Annex.Exception
@ -301,3 +303,21 @@ downloadUrl urls file = do
g <- gitRepo
o <- map Param . words <$> getConfig g "web-options" ""
liftIO $ anyM (\u -> Url.download u o file) urls
{- Copies a key's content, when present, to a temp file.
- This is used to speed up some rsyncs. -}
preseedTmp :: Key -> FilePath -> Annex Bool
preseedTmp key file = go =<< inAnnex key
where
go False = return False
go True = do
ok <- copy
when ok $ liftIO $ allowWrite file
return ok
copy = do
present <- liftIO $ doesFileExist file
if present
then return True
else do
s <- inRepo $ gitAnnexLocation key
liftIO $ copyFileExternal s file

43
Annex/LockPool.hs Normal file
View file

@ -0,0 +1,43 @@
{- git-annex lock pool
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.LockPool where
import qualified Data.Map as M
import System.Posix.Types (Fd)
import Common.Annex
import Annex
{- Create a specified lock file, and takes a shared lock. -}
lockFile :: FilePath -> Annex ()
lockFile file = go =<< fromPool file
where
go (Just _) = return () -- already locked
go Nothing = do
fd <- liftIO $ openFd file ReadOnly (Just stdFileMode) defaultFileFlags
liftIO $ waitToSetLock fd (ReadLock, AbsoluteSeek, 0, 0)
changePool $ M.insert file fd
unlockFile :: FilePath -> Annex ()
unlockFile file = go =<< fromPool file
where
go Nothing = return ()
go (Just fd) = do
liftIO $ closeFd fd
changePool $ M.delete file
getPool :: Annex (M.Map FilePath Fd)
getPool = getState lockpool
fromPool :: FilePath -> Annex (Maybe Fd)
fromPool file = M.lookup file <$> getPool
changePool :: (M.Map FilePath Fd -> M.Map FilePath Fd) -> Annex ()
changePool a = do
m <- getPool
changeState $ \s -> s { lockpool = a m }

115
Annex/Ssh.hs Normal file
View file

@ -0,0 +1,115 @@
{- git-annex ssh interface, with connection caching
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Ssh (
sshParams,
sshCleanup,
) where
import qualified Data.Map as M
import System.IO.Error (try)
import Common.Annex
import Annex.LockPool
import qualified Git
import qualified Git.Config
{- Generates parameters to ssh to a given host (or user@host) on a given
- port, with connection caching. -}
sshParams :: (String, Maybe Integer) -> [CommandParam] -> Annex [CommandParam]
sshParams (host, port) opts = go =<< sshInfo (host, port)
where
go (Nothing, params) = ret params
go (Just socketfile, params) = do
cleanstale
liftIO $ createDirectoryIfMissing True $ parentDir socketfile
lockFile $ socket2lock socketfile
ret params
ret ps = return $ ps ++ opts ++ portParams port ++ [Param host]
-- If the lock pool is empty, this is the first ssh of this
-- run. There could be stale ssh connections hanging around
-- from a previous git-annex run that was interrupted.
cleanstale = whenM (null . filter isLock . M.keys <$> getPool) $
sshCleanup
sshInfo :: (String, Maybe Integer) -> Annex (Maybe FilePath, [CommandParam])
sshInfo (host, port) = do
caching <- Git.configTrue <$> fromRepo (Git.Config.get "annex.sshcaching" "true")
if caching
then do
dir <- fromRepo $ gitAnnexSshDir
let socketfile = dir </> hostport2socket host port
return $ (Just socketfile, cacheParams socketfile)
else return (Nothing, [])
cacheParams :: FilePath -> [CommandParam]
cacheParams socketfile =
[ Param "-S", Param socketfile
, Params "-o ControlMaster=auto -o ControlPersist=yes"
]
portParams :: Maybe Integer -> [CommandParam]
portParams Nothing = []
portParams (Just port) = [Param "-p", Param $ show port]
{- Stop any unused ssh processes. -}
sshCleanup :: Annex ()
sshCleanup = do
dir <- fromRepo $ gitAnnexSshDir
liftIO $ createDirectoryIfMissing True dir
sockets <- filter (not . isLock) <$> liftIO (dirContents dir)
forM_ sockets cleanup
where
cleanup socketfile = do
-- Drop any shared lock we have, and take an
-- exclusive lock, without blocking. If the lock
-- succeeds, nothing is using this ssh, and it can
-- be stopped.
let lockfile = socket2lock socketfile
unlockFile lockfile
fd <- liftIO $ openFd lockfile ReadWrite (Just stdFileMode) defaultFileFlags
v <- liftIO $ try $ setLock fd (WriteLock, AbsoluteSeek, 0, 0)
case v of
Left _ -> return ()
Right _ -> stopssh socketfile
liftIO $ closeFd fd
stopssh socketfile = do
(_, params) <- sshInfo $ socket2hostport socketfile
_ <- liftIO $ do
-- "ssh -O stop" is noisy on stderr even with -q
let cmd = unwords $ toCommand $
[ Params "-O stop"
] ++ params
_ <- boolSystem "sh"
[ Param "-c"
, Param $ "ssh " ++ cmd ++ " >/dev/null 2>/dev/null"
]
--try $ removeFile socketfile
return ()
-- Cannot remove the lock file; other processes may
-- be waiting on our exclusive lock to use it.
return ()
hostport2socket :: String -> Maybe Integer -> FilePath
hostport2socket host Nothing = host
hostport2socket host (Just port) = host ++ "!" ++ show port
socket2hostport :: FilePath -> (String, Maybe Integer)
socket2hostport socket
| null p = (h, Nothing)
| otherwise = (h, readMaybe p)
where
(h, p) = separate (== '!') $ takeFileName socket
socket2lock :: FilePath -> FilePath
socket2lock socket = socket ++ lockExt
isLock :: FilePath -> Bool
isLock f = lockExt `isSuffixOf` f
lockExt :: String
lockExt = ".lock"

View file

@ -9,7 +9,6 @@ module Backend.SHA (backends) where
import Common.Annex
import qualified Annex
import Annex.Content
import Types.Backend
import Types.Key
import qualified Build.SysConfig as SysConfig
@ -32,7 +31,7 @@ genBackend size
b = Backend
{ name = shaName size
, getKey = keyValue size
, fsckKey = checkKeyChecksum size
, fsckKey = Just $ checkKeyChecksum size
}
genBackendE :: SHASize -> Maybe Backend
@ -97,18 +96,14 @@ keyValueE size file = keyValue size file >>= maybe (return Nothing) addE
| otherwise = naiveextension
{- A key's checksum is checked during fsck. -}
checkKeyChecksum :: SHASize -> Key -> Annex Bool
checkKeyChecksum size key = do
checkKeyChecksum :: SHASize -> Key -> FilePath -> Annex Bool
checkKeyChecksum size key file = do
fast <- Annex.getState Annex.fast
file <- inRepo $ gitAnnexLocation key
present <- liftIO $ doesFileExist file
if not present || fast
then return True
else check =<< shaN size file
else check <$> shaN size file
where
check s
| s == dropExtension (keyName key) = return True
| otherwise = do
dest <- moveBad key
warning $ "Bad file content; moved to " ++ dest
return False
| s == dropExtension (keyName key) = True
| otherwise = False

View file

@ -21,7 +21,7 @@ backend :: Backend
backend = Backend {
name = "URL",
getKey = const (return Nothing),
fsckKey = const (return True)
fsckKey = Nothing
}
fromUrl :: String -> Key

View file

@ -18,7 +18,7 @@ backend :: Backend
backend = Backend {
name = "WORM",
getKey = keyValue,
fsckKey = const (return True)
fsckKey = Nothing
}
{- The key includes the file size, modification time, and the

View file

@ -22,6 +22,7 @@ import qualified Annex.Queue
import qualified Git
import qualified Git.Command
import Annex.Content
import Annex.Ssh
import Command
type Params = [String]
@ -92,4 +93,5 @@ shutdown :: Annex Bool
shutdown = do
saveState
liftIO Git.Command.reap -- zombies from long-running git processes
sshCleanup -- ssh connection caching
return True

View file

@ -87,7 +87,7 @@ cleanupRemote key remote ok = do
-- better safe than sorry: assume the remote dropped the key
-- even if it seemed to fail; the failure could have occurred
-- after it really dropped it
Remote.logStatus remote key False
Remote.logStatus remote key InfoMissing
return ok
{- Checks specified remotes to verify that enough copies of a key exist to

View file

@ -9,6 +9,7 @@ module Command.Fsck where
import Common.Annex
import Command
import qualified Annex
import qualified Remote
import qualified Types.Backend
import qualified Types.Key
@ -20,20 +21,31 @@ import Annex.UUID
import Utility.DataUnits
import Utility.FileMode
import Config
import qualified Option
def :: [Command]
def = [command "fsck" paramPaths seek "check for problems"]
def = [withOptions options $ command "fsck" paramPaths seek
"check for problems"]
fromOption :: Option
fromOption = Option.field ['f'] "from" paramRemote "check remote"
options :: [Option]
options = [fromOption]
seek :: [CommandSeek]
seek =
[ withNumCopies $ \n -> whenAnnexed $ start n
[ withField fromOption Remote.byName $ \from ->
withNumCopies $ \n -> whenAnnexed $ start from n
, withBarePresentKeys startBare
]
start :: Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start numcopies file (key, backend) = do
start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start from numcopies file (key, backend) = do
showStart "fsck" file
next $ perform key file backend numcopies
case from of
Nothing -> next $ perform key file backend numcopies
Just r -> next $ performRemote key file backend numcopies r
perform :: Key -> FilePath -> Backend -> Maybe Int -> CommandPerform
perform key file backend numcopies = check
@ -44,6 +56,44 @@ perform key file backend numcopies = check
, checkKeyNumCopies key file numcopies
]
{- To fsck a remote, the content is retrieved to a tmp file,
- and checked locally. -}
performRemote :: Key -> FilePath -> Backend -> Maybe Int -> Remote -> CommandPerform
performRemote key file backend numcopies remote = do
v <- Remote.hasKey remote key
case v of
Left err -> do
showNote err
stop
Right True -> withtmp $ \tmpfile -> do
copied <- getfile tmpfile
if copied then go True (Just tmpfile) else go True Nothing
Right False -> go False Nothing
where
go present localcopy = check
[ verifyLocationLogRemote key file remote present
, checkKeySizeRemote key remote localcopy
, checkBackendRemote backend key remote localcopy
, checkKeyNumCopies key file numcopies
]
withtmp a = do
pid <- liftIO getProcessID
t <- fromRepo gitAnnexTmpDir
let tmp = t </> "fsck" ++ show pid ++ "." ++ keyFile key
liftIO $ createDirectoryIfMissing True t
let cleanup = liftIO $ catch (removeFile tmp) (const $ return ())
cleanup
cleanup `after` a tmp
getfile tmp = do
ok <- Remote.retrieveKeyFileCheap remote key tmp
if ok
then return ok
else do
fast <- Annex.getState Annex.fast
if fast
then return False
else Remote.retrieveKeyFile remote key tmp
{- To fsck a bare repository, fsck each key in the location log. -}
withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek
withBarePresentKeys a params = isBareRepo >>= go
@ -93,26 +143,33 @@ verifyLocationLog key desc = do
preventWrite (parentDir f)
u <- getUUID
uuids <- Remote.keyLocations key
verifyLocationLog' key desc present u (logChange key u)
verifyLocationLogRemote :: Key -> String -> Remote -> Bool -> Annex Bool
verifyLocationLogRemote key desc remote present =
verifyLocationLog' key desc present (Remote.uuid remote)
(Remote.logStatus remote key)
verifyLocationLog' :: Key -> String -> Bool -> UUID -> (LogStatus -> Annex ()) -> Annex Bool
verifyLocationLog' key desc present u bad = do
uuids <- Remote.keyLocations key
case (present, u `elem` uuids) of
(True, False) -> do
fix u InfoPresent
fix InfoPresent
-- There is no data loss, so do not fail.
return True
(False, True) -> do
fix u InfoMissing
fix InfoMissing
warning $
"** Based on the location log, " ++ desc
++ "\n** was expected to be present, " ++
"but its content is missing."
return False
_ -> return True
where
fix u s = do
fix s = do
showNote "fixing location log"
logChange key u s
bad s
{- The size of the data for a key is checked against the size encoded in
- the key's metadata, if available. -}
@ -120,24 +177,49 @@ checkKeySize :: Key -> Annex Bool
checkKeySize key = do
file <- inRepo $ gitAnnexLocation key
present <- liftIO $ doesFileExist file
case (present, Types.Key.keySize key) of
(_, Nothing) -> return True
(False, _) -> return True
(True, Just size) -> do
stat <- liftIO $ getFileStatus file
let size' = fromIntegral (fileSize stat)
if size == size'
then return True
else do
dest <- moveBad key
warning $ "Bad file size (" ++
compareSizes storageUnits True size size' ++
"); moved to " ++ dest
return False
if present
then checkKeySize' key file badContent
else return True
checkKeySizeRemote :: Key -> Remote -> Maybe FilePath -> Annex Bool
checkKeySizeRemote _ _ Nothing = return True
checkKeySizeRemote key remote (Just file) = checkKeySize' key file
(badContentRemote remote)
checkKeySize' :: Key -> FilePath -> (Key -> Annex String) -> Annex Bool
checkKeySize' key file bad = case Types.Key.keySize key of
Nothing -> return True
Just size -> do
stat <- liftIO $ getFileStatus file
let size' = fromIntegral (fileSize stat)
if size == size'
then return True
else do
msg <- bad key
warning $ "Bad file size (" ++
compareSizes storageUnits True size size' ++
"); " ++ msg
return False
checkBackend :: Backend -> Key -> Annex Bool
checkBackend = Types.Backend.fsckKey
checkBackend backend key = do
file <- inRepo (gitAnnexLocation key)
checkBackend' backend key (Just file) badContent
checkBackendRemote :: Backend -> Key -> Remote -> Maybe FilePath -> Annex Bool
checkBackendRemote backend key remote localcopy =
checkBackend' backend key localcopy (badContentRemote remote)
checkBackend' :: Backend -> Key -> Maybe FilePath -> (Key -> Annex String) -> Annex Bool
checkBackend' _ _ Nothing _ = return True
checkBackend' backend key (Just file) bad = case Types.Backend.fsckKey backend of
Nothing -> return True
Just a -> do
ok <- a key file
unless ok $ do
msg <- bad key
warning $ "Bad file content; " ++ msg
return ok
checkKeyNumCopies :: Key -> FilePath -> Maybe Int -> Annex Bool
checkKeyNumCopies key file numcopies = do
@ -166,3 +248,19 @@ missingNote file present needed untrusted =
missingNote file present needed [] ++
"\nThe following untrusted locations may also have copies: " ++
"\n" ++ untrusted
{- Bad content is moved aside. -}
badContent :: Key -> Annex String
badContent key = do
dest <- moveBad key
return $ "moved to " ++ dest
badContentRemote :: Remote -> Key -> Annex String
badContentRemote remote key = do
ok <- Remote.removeKey remote key
-- better safe than sorry: assume the remote dropped the key
-- even if it seemed to fail; the failure could have occurred
-- after it really dropped it
Remote.logStatus remote key InfoMissing
return $ (if ok then "dropped from " else "failed to drop from ")
++ Remote.name remote

View file

@ -15,6 +15,7 @@ import Annex.Content
import qualified Remote
import Annex.UUID
import qualified Option
import Logs.Presence
def :: [Command]
def = [withOptions options $ command "move" paramPaths seek
@ -97,7 +98,7 @@ toPerform dest move key = moveLock move key $ do
Right True -> finish
where
finish = do
Remote.logStatus dest key True
Remote.logStatus dest key InfoPresent
if move
then do
whenM (inAnnex key) $ removeAnnex key

View file

@ -22,6 +22,7 @@ module Locations (
gitAnnexJournalLock,
gitAnnexIndex,
gitAnnexIndexLock,
gitAnnexSshDir,
isLinkToAnnex,
annexHashes,
hashDirMixed,
@ -142,6 +143,10 @@ gitAnnexIndex r = gitAnnexDir r </> "index"
gitAnnexIndexLock :: Git.Repo -> FilePath
gitAnnexIndexLock r = gitAnnexDir r </> "index.lck"
{- .git/annex/ssh/ is used for ssh connection caching -}
gitAnnexSshDir :: Git.Repo -> FilePath
gitAnnexSshDir r = addTrailingPathSeparator $ gitAnnexDir r </> "ssh"
{- Checks a symlink target to see if it appears to point to annexed content. -}
isLinkToAnnex :: FilePath -> Bool
isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s

1
NEWS Symbolic link
View file

@ -0,0 +1 @@
debian/NEWS

View file

@ -11,6 +11,7 @@ module Remote (
name,
storeKey,
retrieveKeyFile,
retrieveKeyFileCheap,
removeKey,
hasKey,
hasKeyCheap,
@ -212,7 +213,5 @@ forceTrust level remotename = do
- in the local repo, not on the remote. The process of transferring the
- key to the remote, or removing the key from it *may* log the change
- on the remote, but this cannot always be relied on. -}
logStatus :: Remote -> Key -> Bool -> Annex ()
logStatus remote key present = logChange key (uuid remote) status
where
status = if present then InfoPresent else InfoMissing
logStatus :: Remote -> Key -> LogStatus -> Annex ()
logStatus remote key present = logChange key (uuid remote) present

View file

@ -50,6 +50,7 @@ gen r u c = do
name = Git.repoDescribe r,
storeKey = store r buprepo,
retrieveKeyFile = retrieve buprepo,
retrieveKeyFileCheap = retrieveCheap buprepo,
removeKey = remove,
hasKey = checkPresent r bupr',
hasKeyCheap = bupLocal buprepo,
@ -125,6 +126,9 @@ retrieve buprepo k f = do
tofile <- openFile f WriteMode
pipeBup params Nothing (Just tofile)
retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
retrieveEncrypted :: BupRepo -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted buprepo (cipher, enck) f = do
let params = bupParams "join" buprepo [Param $ show enck]

View file

@ -41,6 +41,7 @@ gen r u c = do
name = Git.repoDescribe r,
storeKey = store dir,
retrieveKeyFile = retrieve dir,
retrieveKeyFileCheap = retrieveCheap dir,
removeKey = remove dir,
hasKey = checkPresent dir,
hasKeyCheap = True,
@ -112,6 +113,10 @@ storeHelper d key a = do
retrieve :: FilePath -> Key -> FilePath -> Annex Bool
retrieve d k f = liftIO $ withStoredFile d k $ \file -> copyFileExternal file f
retrieveCheap :: FilePath -> Key -> FilePath -> Annex Bool
retrieveCheap d k f = liftIO $ withStoredFile d k $ \file ->
catchBoolIO $ createSymbolicLink file f >> return True
retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted d (cipher, enck) f =
liftIO $ withStoredFile d enck $ \file -> catchBoolIO $ do

View file

@ -75,6 +75,7 @@ gen r u _ = do
name = Git.repoDescribe r',
storeKey = copyToRemote r',
retrieveKeyFile = copyFromRemote r',
retrieveKeyFileCheap = copyFromRemoteCheap r',
removeKey = dropKey r',
hasKey = inAnnex r',
hasKeyCheap = cheap,
@ -208,6 +209,18 @@ copyFromRemote r key file
| Git.repoIsHttp r = Annex.Content.downloadUrl (keyUrls r key) file
| otherwise = error "copying from non-ssh, non-http repo not supported"
copyFromRemoteCheap :: Git.Repo -> Key -> FilePath -> Annex Bool
copyFromRemoteCheap r key file
| not $ Git.repoIsUrl r = do
loc <- liftIO $ gitAnnexLocation key r
liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True
| Git.repoIsSsh r = do
ok <- Annex.Content.preseedTmp key file
if ok
then copyFromRemote r key file
else return False
| otherwise = return False
{- Tries to copy a key's content to a remote's annex. -}
copyToRemote :: Git.Repo -> Key -> Annex Bool
copyToRemote r key

View file

@ -47,6 +47,7 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r =
r {
storeKey = store,
retrieveKeyFile = retrieve,
retrieveKeyFileCheap = retrieveCheap,
removeKey = withkey $ removeKey r,
hasKey = withkey $ hasKey r,
cost = cost r + encryptedRemoteCostAdj
@ -58,6 +59,9 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r =
retrieve k f = cip k >>= maybe
(retrieveKeyFile r k f)
(`retrieveKeyFileEncrypted` f)
retrieveCheap k f = cip k >>= maybe
(retrieveKeyFileCheap r k f)
(\_ -> return False)
withkey a k = cip k >>= maybe (a k) (a . snd)
cip = cipherKey c

View file

@ -7,25 +7,21 @@
module Remote.Helper.Ssh where
import Common
import Common.Annex
import qualified Git
import qualified Git.Url
import Types
import Config
import Annex.UUID
import Annex.Ssh
{- Generates parameters to ssh to a repository's host and run a command.
- Caller is responsible for doing any neccessary shellEscaping of the
- passed command. -}
sshToRepo :: Git.Repo -> [CommandParam] -> Annex [CommandParam]
sshToRepo repo sshcmd = do
s <- getConfig repo "ssh-options" ""
let sshoptions = map Param (words s)
let sshport = case Git.Url.port repo of
Nothing -> []
Just p -> [Param "-p", Param (show p)]
let sshhost = Param $ Git.Url.hostuser repo
return $ sshoptions ++ sshport ++ [sshhost] ++ sshcmd
opts <- map Param . words <$> getConfig repo "ssh-options" ""
params <- sshParams (Git.Url.hostuser repo, Git.Url.port repo) opts
return $ params ++ sshcmd
{- Generates parameters to run a git-annex-shell command on a remote
- repository. -}

View file

@ -41,6 +41,7 @@ gen r u c = do
name = Git.repoDescribe r,
storeKey = store hooktype,
retrieveKeyFile = retrieve hooktype,
retrieveKeyFileCheap = retrieveCheap hooktype,
removeKey = remove hooktype,
hasKey = checkPresent r hooktype,
hasKeyCheap = False,
@ -109,6 +110,9 @@ storeEncrypted h (cipher, enck) k = withTmp enck $ \tmp -> do
retrieve :: String -> Key -> FilePath -> Annex Bool
retrieve h k f = runHook h "retrieve" k (Just f) $ return True
retrieveCheap :: String -> Key -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
retrieveEncrypted :: String -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted h (cipher, enck) f = withTmp enck $ \tmp ->
runHook h "retrieve" enck (Just tmp) $ liftIO $ catchBoolIO $ do

View file

@ -48,6 +48,7 @@ gen r u c = do
name = Git.repoDescribe r,
storeKey = store o,
retrieveKeyFile = retrieve o,
retrieveKeyFileCheap = retrieveCheap o,
removeKey = remove o,
hasKey = checkPresent r o,
hasKeyCheap = False,
@ -103,13 +104,19 @@ storeEncrypted o (cipher, enck) k = withTmp enck $ \tmp -> do
rsyncSend o enck tmp
retrieve :: RsyncOpts -> Key -> FilePath -> Annex Bool
retrieve o k f = untilTrue (rsyncUrls o k) $ \u ->
rsyncRemote o
-- use inplace when retrieving to support resuming
[ Param "--inplace"
, Param u
, Param f
]
retrieve o k f = untilTrue (rsyncUrls o k) $ \u -> rsyncRemote o
-- use inplace when retrieving to support resuming
[ Param "--inplace"
, Param u
, Param f
]
retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool
retrieveCheap o k f = do
ok <- preseedTmp k f
if ok
then retrieve o k f
else return False
retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted o (cipher, enck) f = withTmp enck $ \tmp -> do

View file

@ -40,6 +40,7 @@ gen r _ _ =
name = Git.repoDescribe r,
storeKey = uploadKey,
retrieveKeyFile = downloadKey,
retrieveKeyFileCheap = downloadKeyCheap,
removeKey = dropKey,
hasKey = checkKey,
hasKeyCheap = False,
@ -58,6 +59,9 @@ downloadKey key file = get =<< getUrls key
showOutput -- make way for download progress bar
downloadUrl urls file
downloadKeyCheap :: Key -> FilePath -> Annex Bool
downloadKeyCheap _ _ = return False
uploadKey :: Key -> Annex Bool
uploadKey _ = do
warning "upload to web not supported"

View file

@ -16,8 +16,8 @@ data BackendA a = Backend {
name :: String,
-- converts a filename to a key
getKey :: FilePath -> a (Maybe Key),
-- called during fsck to check a key
fsckKey :: Key -> a Bool
-- called during fsck to check a key, if the backend has its own checks
fsckKey :: Maybe (Key -> FilePath -> a Bool)
}
instance Show (BackendA a) where

View file

@ -45,6 +45,8 @@ data RemoteA a = Remote {
storeKey :: Key -> a Bool,
-- retrieves a key's contents to a file
retrieveKeyFile :: Key -> FilePath -> a Bool,
-- retrieves a key's contents to a tmp file, if it can be done cheaply
retrieveKeyFileCheap :: Key -> FilePath -> a Bool,
-- removes a key's contents
removeKey :: Key -> a Bool,
-- Checks if a key is present in the remote; if the remote

View file

@ -37,9 +37,11 @@ data Frag = Const String | Var String Justify
data Justify = LeftJustified Int | RightJustified Int | UnJustified
deriving (Show)
type Variables = M.Map String String
{- Expands a Format using some variables, generating a formatted string.
- This can be repeatedly called, efficiently. -}
format :: Format -> M.Map String String -> String
format :: Format -> Variables -> String
format f vars = concatMap expand f
where
expand (Const s) = s

View file

@ -25,7 +25,7 @@ readFileStrict = readFile >=> \s -> length s `seq` return s
- in the second result list.
-
- separate (== ':') "foo:bar" = ("foo", "bar")
- separate (== ':') "foobar" = ("foo, "")
- separate (== ':') "foobar" = ("foobar", "")
-}
separate :: (a -> Bool) -> [a] -> ([a], [a])
separate c l = unbreak $ break c l

View file

@ -12,7 +12,7 @@ import Control.Monad (liftM)
{- Return the first value from a list, if any, satisfying the given
- predicate -}
firstM :: (Monad m) => (a -> m Bool) -> [a] -> m (Maybe a)
firstM :: Monad m => (a -> m Bool) -> [a] -> m (Maybe a)
firstM _ [] = return Nothing
firstM p (x:xs) = do
q <- p x
@ -22,20 +22,20 @@ firstM p (x:xs) = do
{- Returns true if any value in the list satisfies the predicate,
- stopping once one is found. -}
anyM :: (Monad m) => (a -> m Bool) -> [a] -> m Bool
anyM :: Monad m => (a -> m Bool) -> [a] -> m Bool
anyM p = liftM isJust . firstM p
{- Runs an action on values from a list until it succeeds. -}
untilTrue :: (Monad m) => [a] -> (a -> m Bool) -> m Bool
untilTrue :: Monad m => [a] -> (a -> m Bool) -> m Bool
untilTrue = flip anyM
{- Runs an action, passing its value to an observer before returning it. -}
observe :: (Monad m) => (a -> m b) -> m a -> m a
observe :: Monad m => (a -> m b) -> m a -> m a
observe observer a = do
r <- a
_ <- observer r
return r
{- b `after` a runs first a, then b, and returns the value of a -}
after :: (Monad m) => m b -> m a -> m a
after :: Monad m => m b -> m a -> m a
after = observe . const

View file

@ -37,7 +37,7 @@ last = Prelude.last
- Ignores leading/trailing whitespace, and throws away any trailing
- text after the part that can be read.
-}
readMaybe :: (Read a) => String -> Maybe a
readMaybe :: Read a => String -> Maybe a
readMaybe s = case reads s of
((x,_):_) -> Just x
_ -> Nothing

View file

@ -26,8 +26,10 @@ viaTmp a file content = do
a tmpfile content
renameFile tmpfile file
type Template = String
{- Runs an action with a temp file, then removes the file. -}
withTempFile :: String -> (FilePath -> Handle -> IO a) -> IO a
withTempFile :: Template -> (FilePath -> Handle -> IO a) -> IO a
withTempFile template a = bracket create remove use
where
create = do

11
debian/NEWS vendored
View file

@ -1,8 +1,13 @@
git-annex (3.20110702) unstable; urgency=low
git-annex (3.20120123) unstable; urgency=low
The URL backend has been removed. Instead the new web remote can be used.
There was a bug in the handling of directory special remotes that
could cause partial file contents to be stored in them. If you use
a directory special remote, you should fsck it, to avoid potential
data loss.
-- Joey Hess <joeyh@debian.org> Fri, 01 Jul 2011 15:40:51 -0400
Example: git annex fsck --from mydirectory
-- Joey Hess <joeyh@debian.org> Thu, 19 Jan 2012 15:24:23 -0400
git-annex (3.20110624) experimental; urgency=low

22
debian/changelog vendored
View file

@ -1,3 +1,25 @@
git-annex (3.20120123) unstable; urgency=low
* fsck --from: Fscking a remote is now supported. It's done by retrieving
the contents of the specified files from the remote, and checking them,
so can be an expensive operation. Still, if the remote is a special
remote, or a git repository that you cannot run fsck in locally, it's
nice to have the ability to fsck it.
* If you have any directory special remotes, now would be a good time to
fsck them, in case you were hit by the data loss bug fixed in the
previous release!
* fsck --from remote --fast: Avoids expensive file transfers, at the
expense of not checking file size and/or contents.
* Ssh connection caching is now enabled automatically by git-annex.
Only one ssh connection is made to each host per git-annex run, which
can speed some things up a lot, as well as avoiding repeated password
prompts. Concurrent git-annex processes also share ssh connections.
Cached ssh connections are shut down when git-annex exits.
* To disable the ssh caching (if for example you have your own broader
ssh caching configuration), set annex.sshcaching=false.
-- Joey Hess <joeyh@debian.org> Mon, 23 Jan 2012 13:48:48 -0400
git-annex (3.20120116~bpo60+1) squeeze-backports; urgency=low
* Removed conflict on newer version of git, this backport can now be used

View file

@ -0,0 +1,77 @@
Hello,
I have the problem that, while git-annex preserves the file access rights (user, group, others) for the actual file, it does not make sure that others can access this file through the directory tree above said file:
/tmp $ mkdir test
/tmp $ chown claudius:media test
/tmp $ chmod 750 test
/tmp $ ls -dl test
drwxr-x--- 2 claudius media 40 2012-01-23 19:27 test/
/tmp $ cd test
/tmp/test $ git init --shared=all
Initialized empty shared Git repository in /tmp/test/.git/
/tmp/test $ git annex init "test"
init test ok
/tmp/test $ echo 123 > abc
/tmp/test $ chmod 640 abc
/tmp/test $ chown claudius:media abc
/tmp/test $ ls -l
total 4
-rw-r----- 1 claudius media 4 2012-01-23 19:27 abc
/tmp/test $ git annex add .
add abc (checksum...) ok
(Recording state in git...)
/tmp/test $ ls -l
total 4
lrwxrwxrwx 1 claudius claudius 176 2012-01-23 19:27 abc -> .git/annex/objects/8F/pj/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b
/tmp/test $ ls -l .git/annex/objects/8F/pj/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b
-r--r----- 1 claudius media 4 2012-01-23 19:27 .git/annex/objects/8F/pj/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b
/tmp/test $ ls -lR .git/annex/objects/
.git/annex/objects/:
total 0
drwx--S--- 3 claudius claudius 60 2012-01-23 19:28 8F/
.git/annex/objects/8F:
total 0
drwx--S--- 3 claudius claudius 60 2012-01-23 19:28 pj/
.git/annex/objects/8F/pj:
total 0
dr-x--S--- 2 claudius claudius 60 2012-01-23 19:28 SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b/
.git/annex/objects/8F/pj/SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b:
total 4
-r--r----- 1 claudius media 4 2012-01-23 19:27 SHA256-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b
/tmp/test $ stat .git/annex/objects/
File: `.git/annex/objects/'
Size: 60 Blocks: 0 IO Block: 4096 directory
Device: 11h/17d Inode: 2365970 Links: 3
Access: (2700/drwx--S---) Uid: ( 1000/claudius) Gid: ( 1000/claudius)
Access: 2012-01-23 19:28:10.614948386 +0100
Modify: 2012-01-23 19:28:10.614948386 +0100
Change: 2012-01-23 19:28:10.614948386 +0100
Birth: -
The use case is that I have a rather large collection of music I would like to manage with git-annex in various locations (all of it on my external hard drive, some on my notebook etc. This music is played by MPD, which can access the collection because it is in the "media" group. After changing to git-annex, however, this fails.
I tried to avoid this specific problem by declaring the git repository to be shared, which does appear to have some effect on the other files in .git:
/tmp/test $ ls -l .git
total 16
drwx--S--- 5 claudius claudius 160 2012-01-23 19:28 annex/
drwxrwsr-x 2 claudius claudius 40 2012-01-23 19:27 branches/
-rw-rw-r-- 1 claudius claudius 218 2012-01-23 19:27 config
-rw-rw-r-- 1 claudius claudius 73 2012-01-23 19:27 description
-rw-rw-r-- 1 claudius claudius 23 2012-01-23 19:27 HEAD
drwxrwsr-x 2 claudius claudius 220 2012-01-23 19:27 hooks/
-rw-rw-r-- 1 claudius claudius 104 2012-01-23 19:28 index
drwxrwsr-x 2 claudius claudius 60 2012-01-23 19:27 info/
drwxrwsr-x 3 claudius claudius 60 2012-01-23 19:27 logs/
drwxrwsr-x 15 claudius claudius 300 2012-01-23 19:28 objects/
drwxrwsr-x 4 claudius claudius 80 2012-01-23 19:27 refs/
I could obviously try to change the rights of annex/, annex/objects etc., but I would like to avoid having to adapt them each time a new folder is added somewhere below annex/objects/.
My knowledge of git and especially git-annex is not too good, so it might well be that I missed something obvious. Any hints? :)
(And thank you, of course, for taking the time to read all this)

View file

@ -0,0 +1,14 @@
[[!comment format=mdwn
username="http://joey.kitenet.net/"
nickname="joey"
subject="comment 1"
date="2012-01-23T19:00:40Z"
content="""
You say you started the repo with \"git init --shared\" .. but what that's really meant for is bare repositories, which can have several users pushing into it, not a non-bare repository.
The strange mode on the directories \"dr-x--S---\" and files \"-r--r-----\" must be due to your umask setting though. My umask is 022 and the directories and files under `.git/annex/objects` are \"drwxr-xr-x\" and \"-r--r--r--\", which allows anyone to read them unless an upper directory blocks it -- and with this umask, none do unless I explicitly remove permissions from one to lock down a repository.
About mpd, the obvious fix is to run mpd not as a system user but as yourself. I put \"@reboot mpd\" in my crontab to do this.
"""]]

View file

@ -210,7 +210,10 @@ subdirectories).
With parameters, only the specified files are checked.
To avoid expensive checksum calculations, specify --fast
To check a remote to fsck, specify --from.
To avoid expensive checksum calculations (and expensive transfers when
fscking a remote), specify --fast
* unused
@ -572,6 +575,10 @@ Here are all the supported configuration settings.
Automatically maintained, and used to automate upgrades between versions.
* `annex.sshcaching`
By default, git-annex caches ssh connections. To disable this, set to `false`.
* `remote.<name>.annex-cost`
When determining which repository to

View file

@ -57,16 +57,3 @@ b) From the desktop add the remote
So now you can work on the train, pop on the wifi at work upon arrival, and sync up with a `git pull && git annex get`.
An alternative solution may be to use direct tunnels over Openvpn.
## Optimising SSH
Running a `git annex get .`, at least in the version I have, creates a new SSH connection for every file transfer (maybe this should be a feature request?)
Lot's of new small files in an _annex_ cause lot's of connections to be made quickly: this is an relatively expensive overhead and is enough for connection limiting to start in my case. The process can be made much faster by using SSH's connection sharing capabilities. An SSH config like this should do it:
# Global Settings
ControlMaster auto
ControlPersist 30
ControlPath ~/.ssh/master-%r@%h:%p
This will create a master connection for sharing if one isn't present, maintain it for 30 seconds after closing down the connection (just-in-cases') and automatically use the master connection for subsequent connections. Wins all round!

View file

@ -15,8 +15,8 @@ or removed from them with git-annex.
To use gource this way, first go into the directory you want to visualize,
and use `git annex log` to make an input file for `gource`:
git annex log --gource | tee gorce.log
git annex log --gource | tee gource.log
sort gource.log | gource --log-format custom -
The `git annex log` can take a while, to speed it up you can use something
like `--after "4 monts ago"` to limit how far back it goes.
like `--after "4 months ago"` to limit how far back it goes.

View file

@ -0,0 +1,11 @@
`git annex fsck --from remote`
Basically, this needs to receive each file in turn from the remote, to a
temp file, and then run the existing fsck code on it. Could be quite
expensive, but sometimes you really want to check.
An unencrypted directory special remote could be optimised, by not actually
copying the file, just dropping a symlink, etc.
The WORM backend doesn't care about file content, so it would be nice to
avoid transferring the content at all, and only send the size.

View file

@ -6,3 +6,40 @@ Simple, when performing various git annex command over ssh, in particular a mult
>
> Combining multiple operations into a single ssh is on the todo list, but
> very far down it. --[[Joey]]
>> OTOH, automatically running ssh in ControlMaster mode (and stopping it
>> at exit) would be useful and not hard thing for git-annex to do.
>>
>> It'd just need to set the appropriate config options, setting
>> ControlPath to a per-remote socket location that includes git-annex's
>> pid. Then at shutdown, run `ssh -O exit` on each such socket.
>>
>> Complicated slightly by not doing this if the user has already set up
>> more broad ssh connection caching.
>>
>> [[done]]! --[[Joey]]
---
Slightly more elaborate design for using ssh connection caching:
* Per-uuid ssh socket in `.git/annex/ssh/user@host.socket`
* Can be shared amoung concurrent git-annex processes as well as ssh
invocations inside the current git-annex.
* Also a lock file, `.git/annex/ssh/user@host.lock`.
Open and take shared lock before running ssh; store lock in lock pool.
(Not locking socket directly, because ssh might want to.)
* Run ssh like: `ssh -S .git/annex/ssh/user@host.socket -o ControlMaster=auto -o ControlPersist=yes user@host`
* At shutdown, enumerate all existing sockets, and on each:
1. Drop any shared lock.
2. Attempt to take an exclusive lock (non-blocking).
3. `ssh -q -S .git/annex/ssh/user@host.socket -o ControlMaster=auto -o ControlPersist=yes -O stop user@host`
(Will exit nonzero if ssh is not running on that socket.)
4. And then remove the socket and the lock file.
* Do same *at startup*. Why? In case an old git-annex was interrupted
and left behind a ssh. May have moved to a different network
in the meantime, etc, and be stalled waiting for a response from the
network, or talking to the wrong interface or something.
(Ie, the reason why I don't use ssh connection caching by default.)
* User should be able to override this, to use their own preferred
connection caching setup. `annex.sshcaching=false`

View file

@ -1,5 +1,5 @@
Name: git-annex
Version: 3.20120116~bpo60+1
Version: 3.20120123~bpo60+1
Cabal-Version: >= 1.6
License: GPL
Maintainer: Joey Hess <joey@kitenet.net>