Merge branch 'ghc7.0'

Conflicts:
	Annex.hs
	Git/CheckAttr.hs
	Remote/S3.hs
	debian/control
	git-annex.cabal
This commit is contained in:
Joey Hess 2012-02-15 13:46:02 -04:00
commit e189c09195
100 changed files with 908 additions and 431 deletions

View file

@ -26,19 +26,21 @@ module Annex (
fromRepo,
) where
import Control.Monad.State
import Control.Monad.State.Strict
import System.Posix.Types (Fd)
import Common
import qualified Git
import qualified Git.Config
import Git.CatFile
import Git.CheckAttr
import qualified Git.Queue
import Types.Backend
import qualified Types.Remote
import Types.Crypto
import Types.BranchState
import Types.TrustLevel
import Utility.State
import qualified Utility.Matcher
import qualified Data.Map as M
@ -73,6 +75,7 @@ data AnnexState = AnnexState
, auto :: Bool
, branchstate :: BranchState
, catfilehandle :: Maybe CatFileHandle
, checkattrhandle :: Maybe CheckAttrHandle
, forcebackend :: Maybe String
, forcenumcopies :: Maybe Int
, limit :: Matcher (FilePath -> Annex Bool)
@ -96,6 +99,7 @@ newState gitrepo = AnnexState
, auto = False
, branchstate = startBranchState
, catfilehandle = Nothing
, checkattrhandle = Nothing
, forcebackend = Nothing
, forcenumcopies = Nothing
, limit = Left []
@ -117,18 +121,6 @@ run s a = runStateT (runAnnex a) s
eval :: AnnexState -> Annex a -> IO a
eval s a = evalStateT (runAnnex a) s
{- Gets a value from the internal state, selected by the passed value
- constructor. -}
getState :: (AnnexState -> a) -> Annex a
getState = gets
{- Applies a state mutation function to change the internal state.
-
- Example: changeState $ \s -> s { output = QuietOutput }
-}
changeState :: (AnnexState -> AnnexState) -> Annex ()
changeState = modify
{- Sets a flag to True -}
setFlag :: String -> Annex ()
setFlag flag = changeState $ \s ->

View file

@ -1,6 +1,6 @@
{- management of the git-annex branch
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
- Copyright 2011-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -32,7 +32,7 @@ import qualified Git.Command
import qualified Git.Ref
import qualified Git.Branch
import qualified Git.UnionMerge
import qualified Git.HashObject
import Git.HashObject
import qualified Git.Index
import Annex.CatFile
@ -190,7 +190,7 @@ commit message = whenM journalDirty $ lockJournal $ do
{- Commits the staged changes in the index to the branch.
-
- Ensures that the branch's index file is first updated to the state
- of the brannch at branchref, before running the commit action. This
- of the branch at branchref, before running the commit action. This
- is needed because the branch may have had changes pushed to it, that
- are not yet reflected in the index.
-
@ -307,13 +307,14 @@ stageJournal = do
fs <- getJournalFiles
g <- gitRepo
withIndex $ liftIO $ do
let dir = gitAnnexJournalDir g
let paths = map (dir </>) fs
(shas, cleanup) <- Git.HashObject.hashFiles paths g
Git.UnionMerge.update_index g $
index_lines shas (map fileJournal fs)
cleanup
mapM_ removeFile paths
h <- hashObjectStart g
Git.UnionMerge.stream_update_index g
[genstream (gitAnnexJournalDir g) h fs]
hashObjectStop h
where
index_lines shas = map genline . zip shas
genline (sha, file) = Git.UnionMerge.update_index_line sha file
genstream dir h fs streamer = forM_ fs $ \file -> do
let path = dir </> file
sha <- hashFile h path
streamer $ Git.UnionMerge.update_index_line
sha (fileJournal file)
removeFile path

35
Annex/CheckAttr.hs Normal file
View file

@ -0,0 +1,35 @@
{- git check-attr interface, with handle automatically stored in the Annex monad
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.CheckAttr (
checkAttr,
checkAttrHandle
) where
import Common.Annex
import qualified Git.CheckAttr as Git
import qualified Annex
{- All gitattributes used by git-annex. -}
annexAttrs :: [Git.Attr]
annexAttrs =
[ "annex.backend"
, "annex.numcopies"
]
checkAttr :: Git.Attr -> FilePath -> Annex String
checkAttr attr file = do
h <- checkAttrHandle
liftIO $ Git.checkAttr h attr file
checkAttrHandle :: Annex Git.CheckAttrHandle
checkAttrHandle = maybe startup return =<< Annex.getState Annex.checkattrhandle
where
startup = do
h <- inRepo $ Git.checkAttrStart annexAttrs
Annex.changeState $ \s -> s { Annex.checkattrhandle = Just h }
return h

View file

@ -25,7 +25,6 @@ module Annex.Content (
preseedTmp,
) where
import System.IO.Error (try)
import Control.Exception (bracket_)
import System.Posix.Types
@ -79,7 +78,7 @@ lockContent key a = do
where
lock Nothing = return Nothing
lock (Just l) = do
v <- try $ setLock l (WriteLock, AbsoluteSeek, 0, 0)
v <- tryIO $ setLock l (WriteLock, AbsoluteSeek, 0, 0)
case v of
Left _ -> error "content is locked"
Right _ -> return $ Just l
@ -291,11 +290,16 @@ getKeysPresent' dir = do
let files = concat contents
return $ mapMaybe (fileKey . takeFileName) files
{- Things to do to record changes to content. -}
saveState :: Annex ()
saveState = do
{- Things to do to record changes to content when shutting down.
-
- It's acceptable to avoid committing changes to the branch,
- especially if performing a short-lived action.
-}
saveState :: Bool -> Annex ()
saveState oneshot = do
Annex.Queue.flush False
Annex.Branch.commit "update"
unless oneshot $
Annex.Branch.commit "update"
{- Downloads content from any of a list of urls. -}
downloadUrl :: [Url.URLString] -> FilePath -> Annex Bool

View file

@ -91,4 +91,4 @@ lockJournal a = do
{- Runs an action, catching failure and running something to fix it up, and
- retrying if necessary. -}
doRedo :: IO a -> IO b -> IO a
doRedo a b = catch a $ const $ b >> a
doRedo a b = catchIO a $ const $ b >> a

View file

@ -11,7 +11,6 @@ module Annex.Ssh (
) where
import qualified Data.Map as M
import System.IO.Error (try)
import Common.Annex
import Annex.LockPool
@ -72,18 +71,20 @@ sshCleanup = do
let lockfile = socket2lock socketfile
unlockFile lockfile
fd <- liftIO $ openFd lockfile ReadWrite (Just stdFileMode) defaultFileFlags
v <- liftIO $ try $ setLock fd (WriteLock, AbsoluteSeek, 0, 0)
v <- liftIO $ tryIO $
setLock fd (WriteLock, AbsoluteSeek, 0, 0)
case v of
Left _ -> return ()
Right _ -> stopssh socketfile
liftIO $ closeFd fd
stopssh socketfile = do
(_, params) <- sshInfo $ socket2hostport socketfile
let (host, port) = socket2hostport socketfile
(_, params) <- sshInfo (host, port)
_ <- liftIO $ do
-- "ssh -O stop" is noisy on stderr even with -q
let cmd = unwords $ toCommand $
[ Params "-O stop"
] ++ params
] ++ params ++ [Param host]
_ <- boolSystem "sh"
[ Param "-c"
, Param $ "ssh " ++ cmd ++ " >/dev/null 2>/dev/null"
@ -101,7 +102,7 @@ hostport2socket host (Just port) = host ++ "!" ++ show port
socket2hostport :: FilePath -> (String, Maybe Integer)
socket2hostport socket
| null p = (h, Nothing)
| otherwise = (h, readMaybe p)
| otherwise = (h, readish p)
where
(h, p) = separate (== '!') $ takeFileName socket

View file

@ -6,23 +6,21 @@
-}
module Backend (
BackendFile,
list,
orderedList,
genKey,
lookupFile,
chooseBackends,
chooseBackend,
lookupBackendName,
maybeLookupBackendName
) where
import System.IO.Error (try)
import System.Posix.Files
import Common.Annex
import qualified Git.Config
import qualified Git.CheckAttr
import qualified Annex
import Annex.CheckAttr
import Types.Key
import qualified Types.Backend as B
@ -77,7 +75,7 @@ genKey' (b:bs) file = do
- by examining what the file symlinks to. -}
lookupFile :: FilePath -> Annex (Maybe (Key, Backend))
lookupFile file = do
tl <- liftIO $ try getsymlink
tl <- liftIO $ tryIO getsymlink
case tl of
Left _ -> return Nothing
Right l -> makekey l
@ -94,20 +92,15 @@ lookupFile file = do
bname ++ ")"
return Nothing
type BackendFile = (Maybe Backend, FilePath)
{- Looks up the backends that should be used for each file in a list.
{- Looks up the backend that should be used for a file.
- That can be configured on a per-file basis in the gitattributes file.
-}
chooseBackends :: [FilePath] -> Annex [BackendFile]
chooseBackends fs = Annex.getState Annex.forcebackend >>= go
chooseBackend :: FilePath -> Annex (Maybe Backend)
chooseBackend f = Annex.getState Annex.forcebackend >>= go
where
go Nothing = do
pairs <- inRepo $ Git.CheckAttr.lookup "annex.backend" fs
return $ map (\(f,b) -> (maybeLookupBackendName b, f)) pairs
go (Just _) = do
l <- orderedList
return $ map (\f -> (Just $ Prelude.head l, f)) fs
go Nothing = maybeLookupBackendName <$>
checkAttr "annex.backend" f
go (Just _) = Just . Prelude.head <$> orderedList
{- Looks up a backend by name. May fail if unknown. -}
lookupBackendName :: String -> Backend

View file

@ -24,5 +24,9 @@ backend = Backend {
fsckKey = Nothing
}
fromUrl :: String -> Key
fromUrl url = stubKey { keyName = url, keyBackendName = "URL" }
fromUrl :: String -> Maybe Integer -> Key
fromUrl url size = stubKey
{ keyName = url
, keyBackendName = "URL"
, keySize = size
}

View file

@ -11,7 +11,6 @@ module CmdLine (
shutdown
) where
import qualified System.IO.Error as IO
import qualified Control.Exception as E
import Control.Exception (throw)
import System.Console.GetOpt
@ -40,7 +39,7 @@ dispatch args cmds commonoptions header getgitrepo = do
(actions, state') <- Annex.run state $ do
sequence_ flags
prepCommand cmd params
tryRun state' cmd $ [startup] ++ actions ++ [shutdown]
tryRun state' cmd $ [startup] ++ actions ++ [shutdown $ cmdoneshot cmd]
where
(flags, cmd, params) = parseCmd args cmds commonoptions header
@ -72,9 +71,11 @@ tryRun' :: Integer -> Annex.AnnexState -> Command -> [CommandCleanup] -> IO ()
tryRun' errnum _ cmd []
| errnum > 0 = error $ cmdname cmd ++ ": " ++ show errnum ++ " failed"
| otherwise = return ()
tryRun' errnum state cmd (a:as) = run >>= handle
tryRun' errnum state cmd (a:as) = do
r <- run
handle $! r
where
run = IO.try $ Annex.run state $ do
run = tryIO $ Annex.run state $ do
Annex.Queue.flushWhenFull
a
handle (Left err) = showerr err >> cont False state
@ -89,9 +90,9 @@ startup :: Annex Bool
startup = return True
{- Cleanup actions. -}
shutdown :: Annex Bool
shutdown = do
saveState
shutdown :: Bool -> Annex Bool
shutdown oneshot = do
saveState oneshot
liftIO Git.Command.reap -- zombies from long-running git processes
sshCleanup -- ssh connection caching
return True

View file

@ -8,6 +8,7 @@
module Command (
command,
noRepo,
oneShot,
withOptions,
next,
stop,
@ -18,6 +19,7 @@ module Command (
ifAnnexed,
notBareRepo,
isBareRepo,
numCopies,
autoCopies,
module ReExported
) where
@ -34,10 +36,15 @@ import Checks as ReExported
import Usage as ReExported
import Logs.Trust
import Config
import Annex.CheckAttr
{- Generates a normal command -}
command :: String -> String -> [CommandSeek] -> String -> Command
command = Command [] Nothing commonChecks
command = Command [] Nothing commonChecks False
{- Makes a command run in oneshot mode. -}
oneShot :: Command -> Command
oneShot c = c { cmdoneshot = True }
{- Adds a fallback action to a command, that will be run if it's used
- outside a git repository. -}
@ -98,17 +105,22 @@ notBareRepo a = do
isBareRepo :: Annex Bool
isBareRepo = fromRepo Git.repoIsLocalBare
numCopies :: FilePath -> Annex (Maybe Int)
numCopies file = readish <$> checkAttr "annex.numcopies" file
{- Used for commands that have an auto mode that checks the number of known
- copies of a key.
-
- In auto mode, first checks that the number of known
- copies of the key is > or < than the numcopies setting, before running
- the action. -}
autoCopies :: Key -> (Int -> Int -> Bool) -> Maybe Int -> CommandStart -> CommandStart
autoCopies key vs numcopiesattr a = Annex.getState Annex.auto >>= auto
autoCopies :: FilePath -> Key -> (Int -> Int -> Bool) -> (Maybe Int -> CommandStart) -> CommandStart
autoCopies file key vs a = do
numcopiesattr <- numCopies file
Annex.getState Annex.auto >>= auto numcopiesattr
where
auto False = a
auto True = do
auto numcopiesattr False = a numcopiesattr
auto numcopiesattr True = do
needed <- getNumCopies numcopiesattr
(_, have) <- trustPartition UnTrusted =<< Remote.keyLocations key
if length have `vs` needed then a else stop
if length have `vs` needed then a numcopiesattr else stop

View file

@ -16,7 +16,6 @@ import qualified Backend
import Logs.Location
import Annex.Content
import Utility.Touch
import Backend
def :: [Command]
def = [command "add" paramPaths seek "add files to annex"]
@ -28,8 +27,8 @@ seek = [withFilesNotInGit start, withFilesUnlocked start]
{- The add subcommand annexes a file, storing it in a backend, and then
- moving it into the annex directory and setting up the symlink pointing
- to its content. -}
start :: BackendFile -> CommandStart
start p@(_, file) = notBareRepo $ ifAnnexed file fixup add
start :: FilePath -> CommandStart
start file = notBareRepo $ ifAnnexed file fixup add
where
add = do
s <- liftIO $ getSymbolicLinkStatus file
@ -37,7 +36,7 @@ start p@(_, file) = notBareRepo $ ifAnnexed file fixup add
then stop
else do
showStart "add" file
next $ perform p
next $ perform file
fixup (key, _) = do
-- fixup from an interrupted add; the symlink
-- is present but not yet added to git
@ -45,8 +44,10 @@ start p@(_, file) = notBareRepo $ ifAnnexed file fixup add
liftIO $ removeFile file
next $ next $ cleanup file key =<< inAnnex key
perform :: BackendFile -> CommandPerform
perform (backend, file) = Backend.genKey file backend >>= go
perform :: FilePath -> CommandPerform
perform file = do
backend <- Backend.chooseBackend file
Backend.genKey file backend >>= go
where
go Nothing = stop
go (Just (key, _)) = do

View file

@ -15,37 +15,55 @@ import qualified Backend
import qualified Command.Add
import qualified Annex
import qualified Backend.URL
import qualified Utility.Url as Url
import Annex.Content
import Logs.Web
import qualified Option
import Types.Key
def :: [Command]
def = [command "addurl" (paramRepeating paramUrl) seek "add urls to annex"]
def = [withOptions [fileOption] $
command "addurl" (paramRepeating paramUrl) seek "add urls to annex"]
fileOption :: Option
fileOption = Option.field [] "file" paramFile "specify what file the url is added to"
seek :: [CommandSeek]
seek = [withStrings start]
seek = [withField fileOption return $ \f ->
withStrings $ start f]
start :: String -> CommandStart
start s = notBareRepo $ go $ parseURI s
start :: Maybe FilePath -> String -> CommandStart
start optfile s = notBareRepo $ go $ fromMaybe bad $ parseURI s
where
go Nothing = error $ "bad url " ++ s
go (Just url) = do
file <- liftIO $ url2file url
bad = fromMaybe (error $ "bad url " ++ s) $
parseURI $ escapeURIString isUnescapedInURI s
go url = do
let file = fromMaybe (url2file url) optfile
showStart "addurl" file
next $ perform s file
perform :: String -> FilePath -> CommandPerform
perform url file = do
fast <- Annex.getState Annex.fast
if fast then nodownload url file else download url file
perform url file = ifAnnexed file addurl geturl
where
geturl = do
whenM (liftIO $ doesFileExist file) $
error $ "not overwriting existing " ++ file
fast <- Annex.getState Annex.fast
if fast then nodownload url file else download url file
addurl (key, _backend) = do
unlessM (liftIO $ Url.check url (keySize key)) $
error $ "failed to verify url: " ++ url
setUrlPresent key url
next $ return True
download :: String -> FilePath -> CommandPerform
download url file = do
showAction $ "downloading " ++ url ++ " "
let dummykey = Backend.URL.fromUrl url
let dummykey = Backend.URL.fromUrl url Nothing
tmp <- fromRepo $ gitAnnexTmpLocation dummykey
liftIO $ createDirectoryIfMissing True (parentDir tmp)
stopUnless (downloadUrl [url] tmp) $ do
[(backend, _)] <- Backend.chooseBackends [file]
backend <- Backend.chooseBackend file
k <- Backend.genKey tmp backend
case k of
Nothing -> stop
@ -56,16 +74,15 @@ download url file = do
nodownload :: String -> FilePath -> CommandPerform
nodownload url file = do
let key = Backend.URL.fromUrl url
(exists, size) <- liftIO $ Url.exists url
unless exists $
error $ "unable to access url: " ++ url
let key = Backend.URL.fromUrl url size
setUrlPresent key url
next $ Command.Add.cleanup file key False
url2file :: URI -> IO FilePath
url2file url = do
whenM (doesFileExist file) $
error $ "already have this url in " ++ file
return file
url2file :: URI -> FilePath
url2file url = escape $ uriRegName auth ++ uriPath url ++ uriQuery url
where
file = escape $ uriRegName auth ++ uriPath url ++ uriQuery url
escape = replace "/" "_" . replace "?" "_"
auth = fromMaybe (error $ "bad url " ++ show url) $ uriAuthority url

View file

@ -12,7 +12,7 @@ import Command
import Annex.UUID
def :: [Command]
def = [command "configlist" paramNothing seek
def = [oneShot $ command "configlist" paramNothing seek
"outputs relevant git configuration"]
seek :: [CommandSeek]

View file

@ -19,10 +19,10 @@ def = [withOptions Command.Move.options $ command "copy" paramPaths seek
seek :: [CommandSeek]
seek = [withField Command.Move.toOption Remote.byName $ \to ->
withField Command.Move.fromOption Remote.byName $ \from ->
withNumCopies $ \n -> whenAnnexed $ start to from n]
withFilesInGit $ whenAnnexed $ start to from]
-- A copy is just a move that does not delete the source file.
-- However, --auto mode avoids unnecessary copies.
start :: Maybe Remote -> Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start to from numcopies file (key, backend) = autoCopies key (<) numcopies $
start :: Maybe Remote -> Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
start to from file (key, backend) = autoCopies file key (<) $ \_numcopies ->
Command.Move.start to from False file (key, backend)

View file

@ -26,11 +26,11 @@ fromOption :: Option
fromOption = Option.field ['f'] "from" paramRemote "drop content from a remote"
seek :: [CommandSeek]
seek = [withField fromOption Remote.byName $ \from -> withNumCopies $ \n ->
whenAnnexed $ start from n]
seek = [withField fromOption Remote.byName $ \from ->
withFilesInGit $ whenAnnexed $ start from]
start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start from numcopies file (key, _) = autoCopies key (>) numcopies $ do
start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
start from file (key, _) = autoCopies file key (>) $ \numcopies -> do
case from of
Nothing -> startLocal file numcopies key
Just remote -> do

View file

@ -14,7 +14,7 @@ import Logs.Location
import Annex.Content
def :: [Command]
def = [command "dropkey" (paramRepeating paramKey) seek
def = [oneShot $ command "dropkey" (paramRepeating paramKey) seek
"drops annexed content for specified keys"]
seek :: [CommandSeek]

View file

@ -36,12 +36,13 @@ options = [fromOption]
seek :: [CommandSeek]
seek =
[ withField fromOption Remote.byName $ \from ->
withNumCopies $ \n -> whenAnnexed $ start from n
withFilesInGit $ whenAnnexed $ start from
, withBarePresentKeys startBare
]
start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start from numcopies file (key, backend) = do
start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
start from file (key, backend) = do
numcopies <- numCopies file
showStart "fsck" file
case from of
Nothing -> next $ perform key file backend numcopies
@ -81,7 +82,7 @@ performRemote key file backend numcopies remote = do
t <- fromRepo gitAnnexTmpDir
let tmp = t </> "fsck" ++ show pid ++ "." ++ keyFile key
liftIO $ createDirectoryIfMissing True t
let cleanup = liftIO $ catch (removeFile tmp) (const $ return ())
let cleanup = liftIO $ catchIO (removeFile tmp) (const $ return ())
cleanup
cleanup `after` a tmp
getfile tmp = do

View file

@ -19,11 +19,11 @@ def = [withOptions [Command.Move.fromOption] $ command "get" paramPaths seek
seek :: [CommandSeek]
seek = [withField Command.Move.fromOption Remote.byName $ \from ->
withNumCopies $ \n -> whenAnnexed $ start from n]
withFilesInGit $ whenAnnexed $ start from]
start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start from numcopies file (key, _) = stopUnless (not <$> inAnnex key) $
autoCopies key (<) numcopies $ do
start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
start from file (key, _) = stopUnless (not <$> inAnnex key) $
autoCopies file key (<) $ \_numcopies -> do
case from of
Nothing -> go $ perform key
Just src -> do

View file

@ -12,7 +12,7 @@ import Command
import Annex.Content
def :: [Command]
def = [command "inannex" (paramRepeating paramKey) seek
def = [oneShot $ command "inannex" (paramRepeating paramKey) seek
"checks if keys are present in the annex"]
seek :: [CommandSeek]

View file

@ -10,7 +10,6 @@ module Command.Lock where
import Common.Annex
import Command
import qualified Annex.Queue
import Backend
def :: [Command]
def = [command "lock" paramPaths seek "undo unlock command"]
@ -18,9 +17,8 @@ def = [command "lock" paramPaths seek "undo unlock command"]
seek :: [CommandSeek]
seek = [withFilesUnlocked start, withFilesUnlockedToBeCommitted start]
{- Undo unlock -}
start :: BackendFile -> CommandStart
start (_, file) = do
start :: FilePath -> CommandStart
start file = do
showStart "lock" file
next $ perform file

View file

@ -19,12 +19,12 @@ def :: [Command]
def = [command "migrate" paramPaths seek "switch data to different backend"]
seek :: [CommandSeek]
seek = [withBackendFilesInGit $ \(b, f) -> whenAnnexed (start b) f]
seek = [withFilesInGit $ whenAnnexed start]
start :: Maybe Backend -> FilePath -> (Key, Backend) -> CommandStart
start b file (key, oldbackend) = do
start :: FilePath -> (Key, Backend) -> CommandStart
start file (key, oldbackend) = do
exists <- inAnnex key
newbackend <- choosebackend b
newbackend <- choosebackend =<< Backend.chooseBackend file
if (newbackend /= oldbackend || upgradableKey key) && exists
then do
showStart "migrate" file

View file

@ -10,7 +10,6 @@ module Command.PreCommit where
import Command
import qualified Command.Add
import qualified Command.Fix
import Backend
def :: [Command]
def = [command "pre-commit" paramPaths seek "run by git pre-commit hook"]
@ -22,12 +21,12 @@ seek =
[ withFilesToBeCommitted $ whenAnnexed Command.Fix.start
, withFilesUnlockedToBeCommitted start]
start :: BackendFile -> CommandStart
start p = next $ perform p
start :: FilePath -> CommandStart
start file = next $ perform file
perform :: BackendFile -> CommandPerform
perform pair@(_, file) = do
ok <- doCommand $ Command.Add.start pair
perform :: FilePath -> CommandPerform
perform file = do
ok <- doCommand $ Command.Add.start file
if ok
then next $ return True
else error $ "failed to add " ++ file ++ "; canceling commit"

View file

@ -14,7 +14,7 @@ import Annex.Content
import Utility.RsyncFile
def :: [Command]
def = [command "recvkey" paramKey seek
def = [oneShot $ command "recvkey" paramKey seek
"runs rsync in server mode to receive content"]
seek :: [CommandSeek]
@ -28,7 +28,7 @@ start key = do
if ok
then do
-- forcibly quit after receiving one key,
-- and shutdown cleanly so queued git commands run
_ <- shutdown
-- and shutdown cleanly
_ <- shutdown True
liftIO exitSuccess
else liftIO exitFailure

View file

@ -13,7 +13,7 @@ import Annex.Content
import Utility.RsyncFile
def :: [Command]
def = [command "sendkey" paramKey seek
def = [oneShot $ command "sendkey" paramKey seek
"runs rsync in server mode to send content"]
seek :: [CommandSeek]

View file

@ -7,7 +7,7 @@
module Command.Status where
import Control.Monad.State
import Control.Monad.State.Strict
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Set (Set)
@ -66,8 +66,8 @@ slow_stats =
, bad_data_size
, local_annex_keys
, local_annex_size
, visible_annex_keys
, visible_annex_size
, known_annex_keys
, known_annex_size
, backend_usage
]
@ -113,7 +113,7 @@ supported_remote_types = stat "supported remote types" $ json unwords $
remote_list :: TrustLevel -> String -> Stat
remote_list level desc = stat n $ nojson $ lift $ do
us <- M.keys <$> (M.union <$> uuidMap <*> remoteMap)
us <- M.keys <$> (M.union <$> uuidMap <*> remoteMap Remote.name)
rs <- fst <$> trustPartition level us
s <- prettyPrintUUIDs n rs
return $ if null s then "0" else show (length rs) ++ "\n" ++ beginning s
@ -128,12 +128,12 @@ local_annex_keys :: Stat
local_annex_keys = stat "local annex keys" $ json show $
S.size <$> cachedKeysPresent
visible_annex_size :: Stat
visible_annex_size = stat "visible annex size" $ json id $
known_annex_size :: Stat
known_annex_size = stat "known annex size" $ json id $
keySizeSum <$> cachedKeysReferenced
visible_annex_keys :: Stat
visible_annex_keys = stat "visible annex keys" $ json show $
known_annex_keys :: Stat
known_annex_keys = stat "known annex keys" $ json show $
S.size <$> cachedKeysReferenced
tmp_size :: Stat

View file

@ -57,7 +57,7 @@ cleanup = do
mapM_ removeAnnex =<< getKeysPresent
liftIO $ removeDirectoryRecursive annexdir
-- avoid normal shutdown
saveState
saveState False
inRepo $ Git.Command.run "branch"
[Param "-D", Param $ show Annex.Branch.name]
liftIO exitSuccess

View file

@ -13,7 +13,7 @@ import qualified Build.SysConfig as SysConfig
import Annex.Version
def :: [Command]
def = [noRepo showPackageVersion $ dontCheck repoExists $
def = [oneShot $ noRepo showPackageVersion $ dontCheck repoExists $
command "version" paramNothing seek "show version info"]
seek :: [CommandSeek]

View file

@ -7,6 +7,8 @@
module Command.Whereis where
import qualified Data.Map as M
import Common.Annex
import Command
import Remote
@ -17,24 +19,36 @@ def = [command "whereis" paramPaths seek
"lists repositories that have file content"]
seek :: [CommandSeek]
seek = [withFilesInGit $ whenAnnexed start]
seek = [withValue (remoteMap id) $ \m ->
withFilesInGit $ whenAnnexed $ start m]
start :: FilePath -> (Key, Backend) -> CommandStart
start file (key, _) = do
start :: (M.Map UUID Remote) -> FilePath -> (Key, Backend) -> CommandStart
start remotemap file (key, _) = do
showStart "whereis" file
next $ perform key
next $ perform remotemap key
perform :: Key -> CommandPerform
perform key = do
(untrustedlocations, safelocations) <- trustPartition UnTrusted =<< keyLocations key
perform :: (M.Map UUID Remote) -> Key -> CommandPerform
perform remotemap key = do
locations <- keyLocations key
(untrustedlocations, safelocations) <- trustPartition UnTrusted locations
let num = length safelocations
showNote $ show num ++ " " ++ copiesplural num
pp <- prettyPrintUUIDs "whereis" safelocations
unless (null safelocations) $ showLongNote pp
pp' <- prettyPrintUUIDs "untrusted" untrustedlocations
unless (null untrustedlocations) $ showLongNote $ untrustedheader ++ pp'
forM_ (catMaybes $ map (`M.lookup` remotemap) locations) $
performRemote key
if null safelocations then stop else next $ return True
where
copiesplural 1 = "copy"
copiesplural _ = "copies"
untrustedheader = "The following untrusted locations may also have copies:\n"
performRemote :: Key -> Remote -> Annex ()
performRemote key remote = case whereisKey remote of
Nothing -> return ()
Just a -> do
ls <- a key
unless (null ls) $ showLongNote $
unlines $ map (\l -> name remote ++ ": " ++ l) ls

View file

@ -1,8 +1,9 @@
module Common (module X) where
import Control.Monad as X hiding (join)
import Control.Monad.IfElse as X
import Control.Applicative as X
import Control.Monad.State as X (liftIO)
import Control.Monad.State.Strict as X (liftIO)
import Control.Exception.Extensible as X (IOException)
import Data.Maybe as X
@ -20,7 +21,7 @@ import System.Posix.Process as X hiding (executeFile)
import System.Exit as X
import Utility.Misc as X
import Utility.Conditional as X
import Utility.Exception as X
import Utility.SafeCommand as X
import Utility.Path as X
import Utility.Directory as X

View file

@ -40,7 +40,7 @@ remoteConfig r key = "remote." ++ fromMaybe "" (Git.remoteName r) ++ ".annex-" +
remoteCost :: Git.Repo -> Int -> Annex Int
remoteCost r def = do
cmd <- getConfig r "cost-command" ""
(fromMaybe def . readMaybe) <$>
(fromMaybe def . readish) <$>
if not $ null cmd
then liftIO $ snd <$> pipeFrom "sh" ["-c", cmd]
else getConfig r "cost" ""
@ -78,7 +78,7 @@ getNumCopies v = perhaps (use v) =<< Annex.getState Annex.forcenumcopies
where
use (Just n) = return n
use Nothing = perhaps (return 1) =<<
readMaybe <$> fromRepo (Git.Config.get config "1")
readish <$> fromRepo (Git.Config.get config "1")
perhaps fallback = maybe fallback (return . id)
config = "annex.numcopies"

View file

@ -13,7 +13,6 @@ module Git.CatFile (
catObject
) where
import Control.Monad.State
import System.Cmd.Utils
import System.IO
import qualified Data.ByteString.Char8 as S

View file

@ -1,40 +1,56 @@
{- git check-attr interface
-
- Copyright 2010, 2011 Joey Hess <joey@kitenet.net>
- Copyright 2010-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Git.CheckAttr where
import System.Exit
import Common
import Git
import Git.Command
import qualified Git.Filename
import qualified Git.Version
{- Efficiently looks up a gitattributes value for each file in a list. -}
lookup :: String -> [FilePath] -> Repo -> IO [(FilePath, String)]
lookup attr files repo = do
oldgit <- Git.Version.older "1.7.7"
cwd <- getCurrentDirectory
(_, fromh, toh) <- hPipeBoth "git" (toCommand params)
_ <- forkProcess $ do
hClose fromh
hPutStr toh $ join "\0" $ input cwd oldgit
hClose toh
exitSuccess
hClose toh
output cwd oldgit . lines <$> hGetContents fromh
where
params = gitCommandLine
[ Param "check-attr"
, Param attr
, Params "-z --stdin"
] repo
type CheckAttrHandle = (PipeHandle, Handle, Handle, [Attr], String)
type Attr = String
{- Starts git check-attr running to look up the specified gitattributes
- values and return a handle. -}
checkAttrStart :: [Attr] -> Repo -> IO CheckAttrHandle
checkAttrStart attrs repo = do
cwd <- getCurrentDirectory
(pid, from, to) <- hPipeBoth "git" $ toCommand $
gitCommandLine params repo
return (pid, from, to, attrs, cwd)
where
params =
[ Param "check-attr" ]
++ map Param attrs ++
[ Params "-z --stdin" ]
{- Stops git check-attr. -}
checkAttrStop :: CheckAttrHandle -> IO ()
checkAttrStop (pid, from, to, _, _) = do
hClose to
hClose from
forceSuccess pid
{- Gets an attribute of a file. -}
checkAttr :: CheckAttrHandle -> Attr -> FilePath -> IO String
checkAttr (_, from, to, attrs, cwd) want file = do
oldgit <- Git.Version.older "1.7.7"
hPutStr to $ file' oldgit ++ "\0"
hFlush to
pairs <- forM attrs $ \attr -> do
l <- hGetLine from
return (attr, attrvalue attr l)
let vals = map snd $ filter (\(attr, _) -> attr == want) pairs
case vals of
[v] -> return v
_ -> error $ "unable to determine " ++ want ++ " attribute of " ++ file
where
{- Before git 1.7.7, git check-attr worked best with
- absolute filenames; using them worked around some bugs
- with relative filenames.
@ -42,25 +58,10 @@ lookup attr files repo = do
- With newer git, git check-attr chokes on some absolute
- filenames, and the bugs that necessitated them were fixed,
- so use relative filenames. -}
input cwd oldgit
| oldgit = map (absPathFrom cwd) files
| otherwise = map (relPathDirToFile cwd . absPathFrom cwd) files
output cwd oldgit
| oldgit = map (torel cwd . topair)
| otherwise = map topair
topair l = (Git.Filename.decode file, value)
where
file = join sep $ beginning bits
value = end bits !! 0
file' oldgit
| oldgit = absPathFrom cwd file
| otherwise = relPathDirToFile cwd $ absPathFrom cwd file
attrvalue attr l = end bits !! 0
where
bits = split sep l
sep = ": " ++ attr ++ ": "
torel cwd (file, value) = (relfile, value)
where
relfile
| startswith cwd' file = drop (length cwd') file
| otherwise = relPathDirToFile top' file
top = workTree repo
cwd' = cwd ++ "/"
top' = top ++ "/"

View file

@ -30,8 +30,8 @@ runBool subcommand params repo = assertLocal repo $
{- Runs git in the specified repo, throwing an error if it fails. -}
run :: String -> [CommandParam] -> Repo -> IO ()
run subcommand params repo = assertLocal repo $
runBool subcommand params repo
>>! error $ "git " ++ show params ++ " failed"
unlessM (runBool subcommand params repo) $
error $ "git " ++ subcommand ++ " " ++ show params ++ " failed"
{- Runs a git subcommand and returns its output, lazily.
-

View file

@ -9,6 +9,7 @@ module Git.Construct (
fromCurrent,
fromCwd,
fromAbsPath,
fromPath,
fromUrl,
fromUnknown,
localToUrl,

View file

@ -1,6 +1,6 @@
{- git hash-object interface
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
- Copyright 2011-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -11,22 +11,31 @@ import Common
import Git
import Git.Command
{- Injects a set of files into git, returning the shas of the objects
- and an IO action to call ones the the shas have been used. -}
hashFiles :: [FilePath] -> Repo -> IO ([Sha], IO ())
hashFiles paths repo = do
(pid, fromh, toh) <- hPipeBoth "git" $ toCommand $ git_hash_object repo
_ <- forkProcess (feeder toh)
hClose toh
shas <- map Ref . lines <$> hGetContentsStrict fromh
return (shas, ender fromh pid)
type HashObjectHandle = (PipeHandle, Handle, Handle)
{- Starts git hash-object and returns a handle. -}
hashObjectStart :: Repo -> IO HashObjectHandle
hashObjectStart repo = do
r@(_, _, toh) <- hPipeBoth "git" $
toCommand $ gitCommandLine params repo
return r
where
git_hash_object = gitCommandLine
[Param "hash-object", Param "-w", Param "--stdin-paths"]
feeder toh = do
hPutStr toh $ unlines paths
hClose toh
exitSuccess
ender fromh pid = do
hClose fromh
forceSuccess pid
params =
[ Param "hash-object"
, Param "-w"
, Param "--stdin-paths"
]
{- Stops git hash-object. -}
hashObjectStop :: HashObjectHandle -> IO ()
hashObjectStop (pid, from, to) = do
hClose to
hClose from
forceSuccess pid
{- Injects a file into git, returning the shas of the objects. -}
hashFile :: HashObjectHandle -> FilePath -> IO Sha
hashFile (_, from, to) file = do
hPutStrLn to file
hFlush to
Ref <$> hGetLine from

View file

@ -65,7 +65,13 @@ typeChanged :: [FilePath] -> Repo -> IO [FilePath]
typeChanged = typeChanged' []
typeChanged' :: [CommandParam] -> [FilePath] -> Repo -> IO [FilePath]
typeChanged' ps l = pipeNullSplit $ prefix ++ ps ++ suffix
typeChanged' ps l repo = do
fs <- pipeNullSplit (prefix ++ ps ++ suffix) repo
-- git diff returns filenames relative to the top of the git repo;
-- convert to filenames relative to the cwd, like git ls-files.
let top = workTree repo
cwd <- getCurrentDirectory
return $ map (\f -> relPathDirToFile cwd $ top </> f) fs
where
prefix = [Params "diff --name-only --diff-filter=T -z"]
suffix = Param "--" : map File l

View file

@ -18,8 +18,8 @@ import qualified Data.Map as M
import System.IO
import System.Cmd.Utils
import Data.String.Utils
import Utility.SafeCommand
import Utility.SafeCommand
import Common
import Git
import Git.Command

View file

@ -83,13 +83,14 @@ merge_trees (Ref x) (Ref y) h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]
{- For merging a single tree into the index. -}
merge_tree_index :: Ref -> CatFileHandle -> Repo -> Streamer
merge_tree_index (Ref x) h = calc_merge h $ "diff-index":diff_opts ++ ["--cached", x]
merge_tree_index (Ref x) h = calc_merge h $
"diff-index" : diff_opts ++ ["--cached", x]
diff_opts :: [String]
diff_opts = ["--raw", "-z", "-r", "--no-renames", "-l0"]
{- Calculates how to perform a merge, using git to get a raw diff,
- and returning a list suitable for update_index. -}
- and generating update-index input. -}
calc_merge :: CatFileHandle -> [String] -> Repo -> Streamer
calc_merge ch differ repo streamer = gendiff >>= go
where
@ -100,7 +101,7 @@ calc_merge ch differ repo streamer = gendiff >>= go
go (_:[]) = error "calc_merge parse error"
{- Given an info line from a git raw diff, and the filename, generates
- a line suitable for update_index that union merges the two sides of the
- a line suitable for update-index that union merges the two sides of the
- diff. -}
mergeFile :: String -> FilePath -> CatFileHandle -> Repo -> IO (Maybe String)
mergeFile info file h repo = case filter (/= nullSha) [Ref asha, Ref bsha] of

View file

@ -45,7 +45,7 @@ port :: Repo -> Maybe Integer
port r =
case authpart uriPort r of
":" -> Nothing
(':':p) -> readMaybe p
(':':p) -> readish p
_ -> Nothing
{- Hostname of an URL repo, including any username (ie, "user@host") -}

View file

@ -119,7 +119,7 @@ options = Option.common ++
"skip files not using a key-value backend"
] ++ Option.matcher
where
setnumcopies v = Annex.changeState $ \s -> s {Annex.forcenumcopies = readMaybe v }
setnumcopies v = Annex.changeState $ \s -> s {Annex.forcenumcopies = readish v }
setgitconfig :: String -> Annex ()
setgitconfig v = do
newg <- inRepo $ Git.Config.store v

View file

@ -84,7 +84,7 @@ addIn name = addLimit $ check $ if name == "." then inAnnex else inremote
- of copies. -}
addCopies :: String -> Annex ()
addCopies num =
case readMaybe num :: Maybe Int of
case readish num :: Maybe Int of
Nothing -> error "bad number for --copies"
Just n -> addLimit $ check n
where

View file

@ -15,6 +15,7 @@ module Remote (
removeKey,
hasKey,
hasKeyCheap,
whereisKey,
remoteTypes,
remoteList,
@ -48,16 +49,16 @@ import Logs.Trust
import Logs.Location
import Remote.List
{- Map of UUIDs of Remotes and their names. -}
remoteMap :: Annex (M.Map UUID String)
remoteMap = M.fromList . map (\r -> (uuid r, name r)) .
{- Map from UUIDs of Remotes to a calculated value. -}
remoteMap :: (Remote -> a) -> Annex (M.Map UUID a)
remoteMap c = M.fromList . map (\r -> (uuid r, c r)) .
filter (\r -> uuid r /= NoUUID) <$> remoteList
{- Map of UUIDs and their descriptions.
- The names of Remotes are added to suppliment any description that has
- been set for a repository. -}
uuidDescriptions :: Annex (M.Map UUID String)
uuidDescriptions = M.unionWith addName <$> uuidMap <*> remoteMap
uuidDescriptions = M.unionWith addName <$> uuidMap <*> remoteMap name
addName :: String -> String -> String
addName desc n
@ -66,7 +67,7 @@ addName desc n
| otherwise = n ++ " (" ++ desc ++ ")"
{- When a name is specified, looks up the remote matching that name.
- (Or it can be a UUID.) Only finds currently configured git remotes. -}
- Only finds currently configured git remotes. -}
byName :: Maybe String -> Annex (Maybe Remote)
byName Nothing = return Nothing
byName (Just n) = do

View file

@ -8,7 +8,6 @@
module Remote.Bup (remote) where
import qualified Data.ByteString.Lazy.Char8 as L
import System.IO.Error
import qualified Data.Map as M
import System.Process
@ -54,6 +53,7 @@ gen r u c = do
removeKey = remove,
hasKey = checkPresent r bupr',
hasKeyCheap = bupLocal buprepo,
whereisKey = Nothing,
config = c,
repo = r,
remotetype = remote
@ -69,7 +69,7 @@ bupSetup u c = do
-- bup init will create the repository.
-- (If the repository already exists, bup init again appears safe.)
showAction "bup init"
bup "init" buprepo [] >>! error "bup init failed"
unlessM (bup "init" buprepo []) $ error "bup init failed"
storeBupUUID u buprepo
@ -167,9 +167,9 @@ storeBupUUID u buprepo = do
if Git.repoIsUrl r
then do
showAction "storing uuid"
onBupRemote r boolSystem "git"
[Params $ "config annex.uuid " ++ v]
>>! error "ssh failed"
unlessM (onBupRemote r boolSystem "git"
[Params $ "config annex.uuid " ++ v]) $
error "ssh failed"
else liftIO $ do
r' <- Git.Config.read r
let olduuid = Git.Config.get "annex.uuid" "" r'
@ -200,7 +200,7 @@ getBupUUID :: Git.Repo -> UUID -> Annex (UUID, Git.Repo)
getBupUUID r u
| Git.repoIsUrl r = return (u, r)
| otherwise = liftIO $ do
ret <- try $ Git.Config.read r
ret <- tryIO $ Git.Config.read r
case ret of
Right r' -> return (toUUID $ Git.Config.get "annex.uuid" "" r', r')
Left _ -> return (NoUUID, r)

View file

@ -45,6 +45,7 @@ gen r u c = do
removeKey = remove dir,
hasKey = checkPresent dir,
hasKeyCheap = True,
whereisKey = Nothing,
config = Nothing,
repo = r,
remotetype = remote
@ -55,8 +56,8 @@ directorySetup u c = do
-- verify configuration is sane
let dir = fromMaybe (error "Specify directory=") $
M.lookup "directory" c
liftIO $ doesDirectoryExist dir
>>! error $ "Directory does not exist: " ++ dir
liftIO $ unlessM (doesDirectoryExist dir) $
error $ "Directory does not exist: " ++ dir
c' <- encryptionSetup c
-- The directory is stored in git config, not in this remote's

View file

@ -20,6 +20,7 @@ import qualified Git.Command
import qualified Git.Config
import qualified Git.Construct
import qualified Annex
import Logs.Presence
import Annex.UUID
import qualified Annex.Content
import qualified Annex.BranchState
@ -27,6 +28,7 @@ import qualified Utility.Url as Url
import Utility.TempFile
import Config
import Init
import Types.Key
remote :: RemoteType
remote = RemoteType {
@ -79,6 +81,7 @@ gen r u _ = do
removeKey = dropKey r',
hasKey = inAnnex r',
hasKeyCheap = cheap,
whereisKey = Nothing,
config = Nothing,
repo = r',
remotetype = remote
@ -142,7 +145,8 @@ inAnnex r key
where
go e [] = return $ Left e
go _ (u:us) = do
res <- catchMsgIO $ Url.exists u
res <- catchMsgIO $
Url.check u (keySize key)
case res of
Left e -> go e us
v -> return v
@ -192,6 +196,14 @@ keyUrls r key = map tourl (annexLocations key)
dropKey :: Git.Repo -> Key -> Annex Bool
dropKey r key
| not $ Git.repoIsUrl r = liftIO $ onLocal r $ do
ensureInitialized
whenM (Annex.Content.inAnnex key) $ do
Annex.Content.lockContent key $
Annex.Content.removeAnnex key
Annex.Content.logStatus key InfoMissing
Annex.Content.saveState True
return True
| Git.repoIsHttp r = error "dropping from http repo not supported"
| otherwise = onRemote r (boolSystem, False) "dropkey"
[ Params "--quiet --force"
@ -230,7 +242,7 @@ copyToRemote r key
-- run copy from perspective of remote
liftIO $ onLocal r $ do
ensureInitialized
Annex.Content.saveState `after`
Annex.Content.saveState True `after`
Annex.Content.getViaTmp key
(rsyncOrCopyFile params keysrc)
| Git.repoIsSsh r = do

View file

@ -45,6 +45,7 @@ gen r u c = do
removeKey = remove hooktype,
hasKey = checkPresent r hooktype,
hasKeyCheap = False,
whereisKey = Nothing,
config = Nothing,
repo = r,
remotetype = remote

View file

@ -52,6 +52,7 @@ gen r u c = do
removeKey = remove o,
hasKey = checkPresent r o,
hasKeyCheap = False,
whereisKey = Nothing,
config = Nothing,
repo = r,
remotetype = remote
@ -181,8 +182,8 @@ withRsyncScratchDir a = do
liftIO $ createDirectoryIfMissing True tmp
nuke tmp `after` a tmp
where
nuke d = liftIO $
doesDirectoryExist d >>? removeDirectoryRecursive d
nuke d = liftIO $ whenM (doesDirectoryExist d) $
removeDirectoryRecursive d
rsyncRemote :: RsyncOpts -> [CommandParam] -> Annex Bool
rsyncRemote o params = do

View file

@ -15,6 +15,7 @@ import Annex.Content
import Config
import Logs.Web
import qualified Utility.Url as Url
import Types.Key
remote :: RemoteType
remote = RemoteType {
@ -44,6 +45,7 @@ gen r _ _ =
removeKey = dropKey,
hasKey = checkKey,
hasKeyCheap = False,
whereisKey = Just getUrls,
config = Nothing,
repo = r,
remotetype = remote
@ -77,8 +79,8 @@ checkKey key = do
us <- getUrls key
if null us
then return $ Right False
else return . Right =<< checkKey' us
checkKey' :: [URLString] -> Annex Bool
checkKey' us = untilTrue us $ \u -> do
else return . Right =<< checkKey' key us
checkKey' :: Key -> [URLString] -> Annex Bool
checkKey' key us = untilTrue us $ \u -> do
showAction $ "checking " ++ u
liftIO $ Url.exists u
liftIO $ Url.check u (keySize key)

45
Seek.hs
View file

@ -14,11 +14,9 @@ module Seek where
import Common.Annex
import Types.Command
import Types.Key
import Backend
import qualified Annex
import qualified Git
import qualified Git.LsFiles as LsFiles
import qualified Git.CheckAttr
import qualified Limit
import qualified Option
@ -28,26 +26,12 @@ seekHelper a params = inRepo $ \g -> runPreserveOrder (`a` g) params
withFilesInGit :: (FilePath -> CommandStart) -> CommandSeek
withFilesInGit a params = prepFiltered a $ seekHelper LsFiles.inRepo params
withAttrFilesInGit :: String -> ((FilePath, String) -> CommandStart) -> CommandSeek
withAttrFilesInGit attr a params = do
files <- seekHelper LsFiles.inRepo params
prepFilteredGen a fst $ inRepo $ Git.CheckAttr.lookup attr files
withNumCopies :: (Maybe Int -> FilePath -> CommandStart) -> CommandSeek
withNumCopies a params = withAttrFilesInGit "annex.numcopies" go params
where
go (file, v) = a (readMaybe v) file
withBackendFilesInGit :: (BackendFile -> CommandStart) -> CommandSeek
withBackendFilesInGit a params =
prepBackendPairs a =<< seekHelper LsFiles.inRepo params
withFilesNotInGit :: (BackendFile -> CommandStart) -> CommandSeek
withFilesNotInGit :: (FilePath -> CommandStart) -> CommandSeek
withFilesNotInGit a params = do
{- dotfiles are not acted on unless explicitly listed -}
files <- filter (not . dotfile) <$> seek ps
dotfiles <- if null dotps then return [] else seek dotps
prepBackendPairs a $ preserveOrder params (files++dotfiles)
prepFiltered a $ return $ preserveOrder params (files++dotfiles)
where
(dotps, ps) = partition dotfile params
seek l = do
@ -65,20 +49,18 @@ withFilesToBeCommitted :: (String -> CommandStart) -> CommandSeek
withFilesToBeCommitted a params = prepFiltered a $
seekHelper LsFiles.stagedNotDeleted params
withFilesUnlocked :: (BackendFile -> CommandStart) -> CommandSeek
withFilesUnlocked :: (FilePath -> CommandStart) -> CommandSeek
withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged
withFilesUnlockedToBeCommitted :: (BackendFile -> CommandStart) -> CommandSeek
withFilesUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CommandSeek
withFilesUnlockedToBeCommitted = withFilesUnlocked' LsFiles.typeChangedStaged
withFilesUnlocked' :: ([FilePath] -> Git.Repo -> IO [FilePath]) -> (BackendFile -> CommandStart) -> CommandSeek
withFilesUnlocked' :: ([FilePath] -> Git.Repo -> IO [FilePath]) -> (FilePath -> CommandStart) -> CommandSeek
withFilesUnlocked' typechanged a params = do
-- unlocked files have changed type from a symlink to a regular file
top <- fromRepo Git.workTree
typechangedfiles <- seekHelper typechanged params
unlockedfiles <- liftIO $ filterM notSymlink $
map (\f -> top ++ "/" ++ f) typechangedfiles
prepBackendPairs a unlockedfiles
let unlockedfiles = liftIO $ filterM notSymlink typechangedfiles
prepFiltered a unlockedfiles
withKeys :: (Key -> CommandStart) -> CommandSeek
withKeys a params = return $ map (a . parse) params
@ -107,20 +89,13 @@ withNothing _ _ = error "This command takes no parameters."
prepFiltered :: (FilePath -> CommandStart) -> Annex [FilePath] -> Annex [CommandStart]
prepFiltered a = prepFilteredGen a id
prepBackendPairs :: (BackendFile -> CommandStart) -> CommandSeek
prepBackendPairs a fs = prepFilteredGen a snd (chooseBackends fs)
prepFilteredGen :: (b -> CommandStart) -> (b -> FilePath) -> Annex [b] -> Annex [CommandStart]
prepFilteredGen a d fs = do
prepFiltered a fs = do
matcher <- Limit.getMatcher
map (proc matcher) <$> fs
where
proc matcher v = do
let f = d v
proc matcher f = do
ok <- matcher f
if ok then a v else return Nothing
if ok then a f else return Nothing
notSymlink :: FilePath -> IO Bool
notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f

View file

@ -36,6 +36,7 @@ data Command = Command
{ cmdoptions :: [Option] -- command-specific options
, cmdnorepo :: Maybe (IO ()) -- an action to run when not in a repo
, cmdcheck :: [CommandCheck] -- check stage
, cmdoneshot :: Bool -- don't save state after running
, cmdname :: String
, cmdparamdesc :: String -- description of params for usage
, cmdseek :: [CommandSeek] -- seek stage

View file

@ -69,8 +69,8 @@ readKey s = if key == Just stubKey then Nothing else key
findfields _ v = v
addbackend k v = Just k { keyBackendName = v }
addfield 's' k v = Just k { keySize = readMaybe v }
addfield 'm' k v = Just k { keyMtime = readMaybe v }
addfield 's' k v = Just k { keySize = readish v }
addfield 'm' k v = Just k { keyMtime = readish v }
addfield _ _ _ = Nothing
prop_idempotent_key_read_show :: Key -> Bool

View file

@ -55,6 +55,8 @@ data RemoteA a = Remote {
-- Some remotes can check hasKey without an expensive network
-- operation.
hasKeyCheap :: Bool,
-- Some remotes can provide additional details for whereis.
whereisKey :: Maybe (Key -> a [String]),
-- a Remote can have a persistent configuration store
config :: Maybe RemoteConfig,
-- git configuration for the remote

View file

@ -7,8 +7,6 @@
module Upgrade.V0 where
import System.IO.Error (try)
import Common.Annex
import Annex.Content
import qualified Upgrade.V1
@ -47,7 +45,7 @@ getKeysPresent0 dir = do
return $ map fileKey0 files
where
present d = do
result <- try $
result <- tryIO $
getFileStatus $ dir ++ "/" ++ takeFileName d
case result of
Right s -> return $ isRegularFile s

View file

@ -7,7 +7,6 @@
module Upgrade.V1 where
import System.IO.Error (try)
import System.Posix.Types
import Data.Char
@ -183,7 +182,7 @@ readLog1 file = catchDefaultIO (parseLog <$> readFileStrict file) []
lookupFile1 :: FilePath -> Annex (Maybe (Key, Backend))
lookupFile1 file = do
tl <- liftIO $ try getsymlink
tl <- liftIO $ tryIO getsymlink
case tl of
Left _ -> return Nothing
Right l -> makekey l
@ -216,7 +215,7 @@ getKeyFilesPresent1' dir = do
liftIO $ filterM present files
where
present f = do
result <- try $ getFileStatus f
result <- tryIO $ getFileStatus f
case result of
Right s -> return $ isRegularFile s
Left _ -> return False

View file

@ -50,7 +50,7 @@ upgrade = do
mapM_ (\(k, f) -> inject f $ logFile k) =<< locationLogs
mapM_ (\f -> inject f f) =<< logFiles old
saveState
saveState False
showProgress
when e $ do

View file

@ -76,6 +76,8 @@ paramDate :: String
paramDate = "DATE"
paramFormat :: String
paramFormat = "FORMAT"
paramFile :: String
paramFile = "FILE"
paramKeyValue :: String
paramKeyValue = "K=V"
paramNothing :: String

View file

@ -1,26 +0,0 @@
{- monadic conditional operators
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Utility.Conditional where
import Control.Monad (when, unless)
whenM :: Monad m => m Bool -> m () -> m ()
whenM c a = c >>= flip when a
unlessM :: Monad m => m Bool -> m () -> m ()
unlessM c a = c >>= flip unless a
(>>?) :: Monad m => m Bool -> m () -> m ()
(>>?) = whenM
(>>!) :: Monad m => m Bool -> m () -> m ()
(>>!) = unlessM
-- low fixity allows eg, foo bar >>! error $ "failed " ++ meep
infixr 0 >>?
infixr 0 >>!

View file

@ -8,8 +8,8 @@
module Utility.CopyFile (copyFileExternal) where
import System.Directory (doesFileExist, removeFile)
import Control.Monad.IfElse
import Utility.Conditional
import Utility.SafeCommand
import qualified Build.SysConfig as SysConfig

View file

@ -12,15 +12,16 @@ import System.Posix.Files
import System.Directory
import Control.Exception (throw)
import Control.Monad
import Control.Monad.IfElse
import Utility.SafeCommand
import Utility.Conditional
import Utility.TempFile
import Utility.Exception
{- Moves one filename to another.
- First tries a rename, but falls back to moving across devices if needed. -}
moveFile :: FilePath -> FilePath -> IO ()
moveFile src dest = try (rename src dest) >>= onrename
moveFile src dest = tryIO (rename src dest) >>= onrename
where
onrename (Right _) = return ()
onrename (Left e)
@ -40,11 +41,10 @@ moveFile src dest = try (rename src dest) >>= onrename
Param src, Param tmp]
unless ok $ do
-- delete any partial
_ <- try $
removeFile tmp
_ <- tryIO $ removeFile tmp
rethrow
isdir f = do
r <- try (getFileStatus f)
r <- tryIO $ getFileStatus f
case r of
(Left _) -> return False
(Right s) -> return $ isDirectory s

39
Utility/Exception.hs Normal file
View file

@ -0,0 +1,39 @@
{- Simple IO exception handling
-
- Copyright 2011-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Utility.Exception where
import Prelude hiding (catch)
import Control.Exception
import Control.Applicative
{- Catches IO errors and returns a Bool -}
catchBoolIO :: IO Bool -> IO Bool
catchBoolIO a = catchDefaultIO a False
{- Catches IO errors and returns a Maybe -}
catchMaybeIO :: IO a -> IO (Maybe a)
catchMaybeIO a = catchDefaultIO (Just <$> a) Nothing
{- Catches IO errors and returns a default value. -}
catchDefaultIO :: IO a -> a -> IO a
catchDefaultIO a def = catchIO a (const $ return def)
{- Catches IO errors and returns the error message. -}
catchMsgIO :: IO a -> IO (Either String a)
catchMsgIO a = dispatch <$> tryIO a
where
dispatch (Left e) = Left $ show e
dispatch (Right v) = Right v
{- catch specialized for IO errors only -}
catchIO :: IO a -> (IOException -> IO a) -> IO a
catchIO = catch
{- try specialized for IO errors only -}
tryIO :: IO a -> IO (Either IOException a)
tryIO = try

View file

@ -88,7 +88,7 @@ gen = filter (not . empty) . fuse [] . scan [] . decode_c
| c == '}' = foundvar f var (readjustify $ reverse p) cs
| otherwise = inpad (c:p) f var cs
inpad p f var [] = Const (novar $ p++";"++var) : f
readjustify = getjustify . fromMaybe 0 . readMaybe
readjustify = getjustify . fromMaybe 0 . readish
getjustify i
| i == 0 = UnJustified
| i < 0 = LeftJustified (-1 * i)

View file

@ -8,9 +8,7 @@
module Utility.Misc where
import System.IO
import System.IO.Error (try)
import Control.Monad
import Control.Applicative
{- A version of hgetContents that is not lazy. Ensures file is
- all read before it gets closed. -}
@ -37,22 +35,3 @@ separate c l = unbreak $ break c l
{- Breaks out the first line. -}
firstLine :: String-> String
firstLine = takeWhile (/= '\n')
{- Catches IO errors and returns a Bool -}
catchBoolIO :: IO Bool -> IO Bool
catchBoolIO a = catchDefaultIO a False
{- Catches IO errors and returns a Maybe -}
catchMaybeIO :: IO a -> IO (Maybe a)
catchMaybeIO a = catchDefaultIO (Just <$> a) Nothing
{- Catches IO errors and returns a default value. -}
catchDefaultIO :: IO a -> a -> IO a
catchDefaultIO a def = catch a (const $ return def)
{- Catches IO errors and returns the error message. -}
catchMsgIO :: IO a -> IO (Either String a)
catchMsgIO a = dispatch <$> try a
where
dispatch (Left e) = Left $ show e
dispatch (Right v) = Right v

View file

@ -7,8 +7,10 @@
module Utility.PartialPrelude where
import qualified Data.Maybe
{- read should be avoided, as it throws an error
- Instead, use: readMaybe -}
- Instead, use: readish -}
read :: Read a => String -> a
read = Prelude.read
@ -36,16 +38,18 @@ last = Prelude.last
-
- Ignores leading/trailing whitespace, and throws away any trailing
- text after the part that can be read.
-
- readMaybe is available in Text.Read in new versions of GHC,
- but that one requires the entire string to be consumed.
-}
readMaybe :: Read a => String -> Maybe a
readMaybe s = case reads s of
readish :: Read a => String -> Maybe a
readish s = case reads s of
((x,_):_) -> Just x
_ -> Nothing
{- Like head but Nothing on empty list. -}
headMaybe :: [a] -> Maybe a
headMaybe [] = Nothing
headMaybe v = Just $ Prelude.head v
headMaybe = Data.Maybe.listToMaybe
{- Like last but Nothing on empty list. -}
lastMaybe :: [a] -> Maybe a

View file

@ -47,7 +47,10 @@ dirContains a b = a == b || a' == b' || (a'++"/") `isPrefixOf` b'
a' = norm a
b' = norm b
{- Converts a filename into a normalized, absolute path. -}
{- Converts a filename into a normalized, absolute path.
-
- Unlike Directory.canonicalizePath, this does not require the path
- already exists. -}
absPath :: FilePath -> IO FilePath
absPath file = do
cwd <- getCurrentDirectory

26
Utility/State.hs Normal file
View file

@ -0,0 +1,26 @@
{- state monad support
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Utility.State where
import Control.Monad.State.Strict
{- Modifies Control.Monad.State's state, forcing a strict update.
- This avoids building thunks in the state and leaking.
- Why it's not the default, I don't know.
-
- Example: changeState $ \s -> s { foo = bar }
-}
changeState :: MonadState s m => (s -> s) -> m ()
changeState f = do
x <- get
put $! f x
{- Gets a value from the internal state, selected by the passed value
- constructor. -}
getState :: MonadState s m => (s -> a) -> m a
getState = gets

View file

@ -12,7 +12,7 @@ import System.IO
import System.Posix.Process hiding (executeFile)
import System.Directory
import Utility.Misc
import Utility.Exception
import Utility.Path
{- Runs an action like writeFile, writing to a temp file first and

View file

@ -7,6 +7,7 @@
module Utility.Url (
URLString,
check,
exists,
canDownload,
download,
@ -14,25 +15,39 @@ module Utility.Url (
) where
import Control.Applicative
import Control.Monad
import qualified Network.Browser as Browser
import Network.HTTP
import Network.URI
import Data.Maybe
import Utility.SafeCommand
import Utility.Path
type URLString = String
{- Checks that an url exists and could be successfully downloaded. -}
exists :: URLString -> IO Bool
{- Checks that an url exists and could be successfully downloaded,
- also checking that its size, if available, matches a specified size. -}
check :: URLString -> Maybe Integer -> IO Bool
check url expected_size = handle <$> exists url
where
handle (False, _) = False
handle (True, Nothing) = True
handle (True, s) = expected_size == s
{- Checks that an url exists and could be successfully downloaded,
- also returning its size if available. -}
exists :: URLString -> IO (Bool, Maybe Integer)
exists url =
case parseURI url of
Nothing -> return False
Nothing -> return (False, Nothing)
Just u -> do
r <- request u HEAD
case rspCode r of
(2,_,_) -> return True
_ -> return False
(2,_,_) -> return (True, size r)
_ -> return (False, Nothing)
where
size = liftM read . lookupHeader HdrContentLength . rspHeaders
canDownload :: IO Bool
canDownload = (||) <$> inPath "wget" <*> inPath "curl"
@ -73,12 +88,32 @@ get url =
{- Makes a http request of an url. For example, HEAD can be used to
- check if the url exists, or GET used to get the url content (best for
- small urls). -}
- small urls).
-
- This does its own redirect following because Browser's is buggy for HEAD
- requests.
-}
request :: URI -> RequestMethod -> IO (Response String)
request url requesttype = Browser.browse $ do
Browser.setErrHandler ignore
Browser.setOutHandler ignore
Browser.setAllowRedirects True
snd <$> Browser.request (mkRequest requesttype url :: Request_String)
request url requesttype = go 5 url
where
go :: Int -> URI -> IO (Response String)
go 0 _ = error "Too many redirects "
go n u = do
rsp <- Browser.browse $ do
Browser.setErrHandler ignore
Browser.setOutHandler ignore
Browser.setAllowRedirects False
snd <$> Browser.request (mkRequest requesttype u :: Request_String)
case rspCode rsp of
(3,0,x) | x /= 5 -> redir (n - 1) u rsp
_ -> return rsp
ignore = const $ return ()
redir n u rsp = do
case retrieveHeaders HdrLocation rsp of
[] -> return rsp
(Header _ newu:_) ->
case parseURIReference newu of
Nothing -> return rsp
Just newURI -> go n newURI_abs
where
newURI_abs = fromMaybe newURI (newURI `relativeTo` u)

29
debian/changelog vendored
View file

@ -1,3 +1,32 @@
git-annex (3.20120124) UNRELEASED; urgency=low
* Use the haskell IfElse library.
* Avoid repeated location log commits when a remote is receiving files.
Done by adding a oneshot mode, in which location log changes are
written to the journal, but not committed. Taking advantage of
git-annex's existing ability to recover in this situation. This is
used by git-annex-shell and other places where changes are made to
a remote's location log.
* S3: Fix irrefutable pattern failure when accessing encrypted S3
credentials.
* addurl: Added a --file option, which can be used to specify what
file the url is added to. This can be used to override the default
filename that is used when adding an url, which is based on the url.
Or, when the file already exists, the url is recorded as another
location of the file.
* addurl: Normalize badly encoded urls.
* Fix teardown of stale cached ssh connections.
* When checking that an url has a key, verify that the Content-Length,
if available, matches the size of the key.
* addurl --fast: Verifies that the url can be downloaded (only getting
its head), and records the size in the key.
* Fixed to use the strict state monad, to avoid leaking all kinds of memory
due to lazy state update thunks when adding/fixing many files.
* Fixed some memory leaks that occurred when committing journal files.
* whereis: Prints the urls of files that the web special remote knows about.
-- Joey Hess <joeyh@debian.org> Tue, 24 Jan 2012 16:21:55 -0400
git-annex (3.20120123~bpo60+1) squeeze-backports; urgency=low
* Fixed build dependency that made the previous backport FTBFS on several

1
debian/control vendored
View file

@ -14,6 +14,7 @@ Build-Depends:
libghc6-testpack-dev,
libghc6-json-dev,
libghc6-quickcheck2-dev,
libghc6-ifelse-dev,
ikiwiki,
perlmagick,
git,

1
debian/manpages vendored
View file

@ -1 +0,0 @@
git-annex.1

View file

@ -2,3 +2,5 @@ This is git-annex's bug list. Link bugs to [[bugs/done]] when done.
[[!inline pages="./bugs/* and !./bugs/done and !link(done)
and !*/Discussion" actions=yes postform=yes show=0 archive=yes]]
[[!edittemplate template=templates/bugtemplate match="bugs/*" silent=yes]]

View file

@ -0,0 +1,35 @@
It seems that git-annex copies every individual file in a separate
transaction. This is quite costly for mass transfers: each file involves a
separate rsync invocation and the creation of a new commit. Even with a
meager thousand files or so in the annex, I have to wait for fifteen
minutes to copy the contents to another disk, simply because every
individual file involves some disk thrashing. Also, it seems suspicious
that the git-annex branch would get a thousands commits of history from the
simple procedure of copying everything to a new repository. Surely it would
be better to first copy everything and then create only a single commit
that registers the changes to the files' availability?
> git-annex is very careful to commit as infrequently as possible,
> and the current version makes *1* commit after all the copies are
> complete, even if it transferred a billion files. The only overhead
> incurred for each file is writing a journal file.
> You must have an old version.
> --[[Joey]]
(I'm also not quite clear on why rsync is being used when both repositories
are local. It seems to be just overhead.)
> Even when copying to another disk it's often on
> some slow bus, and the file is by definition large. So it's
> nice to support resumes of interrupted transfers of files.
> Also because rsync has a handy progress display that is hard to get with cp.
>
> (However, if the copy is to another directory in the same disk, it does
> use cp, and even supports really fast copies on COW filesystems.)
> --[[Joey]]
---
Oneshot mode is now implemented, making git-annex-shell and other
short lifetime processes not bother with committing changes.
[[done]] --[[Joey]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawk6QAwUsFHpr3Km1yQbg8hf3S7RDYf7hX4"
nickname="Lauri"
subject="comment 1"
date="2012-01-28T00:17:37Z"
content="""
To me it very much seems that a commit per file is indeed created at the remote end, although not at the local end. See the following transcript: <https://gist.github.com/1691714>.
"""]]

View file

@ -0,0 +1,15 @@
[[!comment format=mdwn
username="http://joey.kitenet.net/"
nickname="joey"
subject="comment 2"
date="2012-01-28T19:32:36Z"
content="""
Ah, I see, I was not thinking about the location log update that's done on the remote side.
For transfers over ssh, that's a separate git-annex-shell invoked per change. For local-local transfers, it's all done in a single process but it spins up a state to handle the remote and then immediately shuts it down, also generating a commit.
In either case, I think there is a nice fix. Since git-annex *does* have a journal nowadays, and goes to all the bother to
support recovery if a process was interrupted and journalled changes that did not get committed, there's really no reason in either of these cases for the remote end to do anything more than journal the change. The next time git-annex is actually run on the remote, and needs to look up location information, it will merge the journalled changes into the branch, in a single commit.
My only real concern is that some remotes might *never* have git-annex run in them directly, and would just continue to accumulate journal files forever. Although due to the way the journal is structured, it can have, at a maximum, the number of files in the git-annex branch. However, the number of files in it is expected to be relatively smal and it might get a trifle innefficient, as it lacks directory hashing. These performance problems could certainly be dealt with if they do turn out to be a problem.
"""]]

View file

@ -0,0 +1,11 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawk6QAwUsFHpr3Km1yQbg8hf3S7RDYf7hX4"
nickname="Lauri"
subject="comment 3"
date="2012-01-29T01:51:35Z"
content="""
That sounds just fine, but indeed my use case was a bare backup/transfer repository that is meant to always be only at the remote end of git-annex operations. So why not as well do a single commit after everything has been copied and journaled? That's what's done at the other end too, after all. Or, if commits are to be minimized, just stage the journal into the index before finishing, but don't commit it yet?
(I would actually prefer this mode of usage for other git-annex operations, too. In git you can add stuff little by little and commit them all in one go. In git-annex the add immediately creates a commit, which is unexpected and a bit annoying.)
"""]]

View file

@ -0,0 +1,42 @@
For the record, `git annex add` has had a series of memory leaks.
Mostly these are minor -- until you need to check in a few
million files in a single operation.
If this happens to you, git-annex will run out of memory and stop.
(Generally well before your system runs out of memory, since it has some
built-in ulimits.) You can recover by just re-running the `git annex add`
-- it will automatically pick up where it left off.
A history of the leaks:
* Originally, `git annex add` remembered all the files
it had added, and fed them to git at the end. Of course
that made its memory use grow, so it was fixed to periodically
flush its buffer. Fixed in version 0.20110417.
* Something called a "lazy state monad" caused "thunks" to build
up and memory to leak. Also affected other git annex commands
than `add`. Adding files using a SHA* backend hit the worst.
Fixed in versions afer 3.20120123.
* Committing journal files turned out to have another memory leak.
After adding a lot of files ran out of memory, this left the journal
behind and could affect other git-annex commands. Fixed in versions afer
3.20120123.
* Something is still causing a slow leak when adding files.
I tested by adding many copies of the whole linux kernel
tree into the annex using the WORM backend, and once
it had added 1 million files, git-annex used ~100 mb of ram.
That's 100 bytes leaked per file on average .. roughly the
size of a filename? It's worth noting that `git add` uses more memory
than that in such a large tree.
**not fixed yet**
* (Note that `git ls-files --others`, which is used to find files to add,
also uses surpsisingly large amounts
of memory when you have a lot of files. It buffers
the entire list, so it can compare it with the files in the index,
before outputting anything.
This is Not Our Problem, but I'm sure the git developers
would appreciate a patch that fixes it.)

View file

@ -1,3 +1,16 @@
This bug is reopened to track some new UTF-8 filename issues caused by GHC
7.4. In this version of GHC, git-annex's hack to support filenames in any
encoding no longer works. Even unicode filenames fail to work when
git-annex is built with 7.4. --[[Joey]]
This bug is now fixed in current master. Once again, git-annex will work
for all filename encodings, and all system encodings. It will
only build with the new GHC. [[done]] --[[Joey]]
----
Old, now fixed bug report follows:
There are problems with displaying filenames in UTF8 encoding, as shown here:
$ echo $LANG
@ -45,7 +58,7 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu
> outputting a filename (assuming the filename is encoded using the
> user's configured encoding), and allow haskell's output encoding to then
> encode it according to the user's locale configuration.
> > This is now [[implemented|done]]. I'm not very happy that I have to watch
> > This is now implemented. I'm not very happy that I have to watch
> > out for any place that a filename is output and call `filePathToString`
> > on it, but there are really not too many such places in git-annex.
> >
@ -66,39 +79,3 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu
> > On second thought, I switched to this. Any decoding of a filename
> > is going to make someone unhappy; the previous approach broke
> > non-utf8 filenames.
----
Simpler test case:
<pre>
import Codec.Binary.UTF8.String
import System.Environment
main = do
args <- getArgs
let file = decodeString $ head args
putStrLn $ "file is: " ++ file
putStr =<< readFile file
</pre>
If I pass this a filename like 'ü', it will fail, and notice
the bad encoding of the filename in the error message:
<pre>
$ echo hi > ü; runghc foo.hs ü
file is: ü
foo.hs: <20>: openFile: does not exist (No such file or directory)
</pre>
On the other hand, if I remove the decodeString, it prints the filename
wrong, while accessing it right:
<pre>
$ runghc foo.hs ü
file is: üa
hi
</pre>
The only way that seems to consistently work is to delay decoding the
filename to places where it's output. But then it's easy to miss some.

View file

@ -0,0 +1,11 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawk6QAwUsFHpr3Km1yQbg8hf3S7RDYf7hX4"
nickname="Lauri"
subject="comment 5"
date="2012-01-26T22:13:18Z"
content="""
I also encountered Adam's bug. The problem seems to be that communication with the git process is done with `Char8`-bytestrings. So, when `L.unpack` is called, all filenames that git outputs (with `ls-files` or `ls-tree`) are interpreted to be in latin-1, which wreaks havoc if they are really in UTF-8.
I suspect that it would be enough to just switch to standard `String`s (or `Data.Text.Text`) instead of bytestrings for textual data, and to `Word8`-bytestrings for pure binary data. GHC should nowadays handle locale-dependent encoding of `String`s transparently.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="http://joey.kitenet.net/"
nickname="joey"
subject="comment 6"
date="2012-01-27T21:00:06Z"
content="""
Lauri, what version of GHC do you have that behaves this way? 7.0.4 does not.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawk6QAwUsFHpr3Km1yQbg8hf3S7RDYf7hX4"
nickname="Lauri"
subject="comment 7"
date="2012-01-28T00:21:40Z"
content="""
7.2. nomeata already explained the issue. I got utf-8 filenames to work on a utf-8 locale by switching from Char8-bytestrings to UTF8-bytestrings, and adding `hSetEncoding h localeEncoding` to suitable places. Making things work properly with an arbitrary locale encoding would be more complicated.
"""]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="http://joey.kitenet.net/"
nickname="joey"
subject="comment 8"
date="2012-01-28T19:40:34Z"
content="""
Lauri a scratch patch would be very helpful. Encoding stuff makes my head explode.
However, I am very worried by haskell's changes WRT unicode and filenames. Based on user input, git-annex users like to use it on diverse sets of files, with diverse and ill-defined encodings. Faffing about with converting between encodings seems likely to speactacularly fail.
"""]]

View file

@ -0,0 +1,11 @@
It'd be nice to be able to run "git annex version" -- and maybe some other
commands, like "git annex" itself for the help text, without having to be
inside a git repo. Right now it requires you to be in a git repo even if
it's not a git-annex repo.
> You need a newer verison of git-annex. --[[Joey]]
joey@gnu:/>git annex version
git-annex version: 3.20120124
[[done]]

View file

@ -15,3 +15,21 @@ From time to time, releases of git-annex are uploaded
Some operating systems include git-annex in easily prepackaged form and
others need some manual work. See [[install]] for details.
## git branches
The git repository has some branches:
* `debian-stable` contains the latest backport of git-annex to Debian
stable.
* `no-s3` disables the S3 special remote, for systems that lack the
necessary haskell library.
* `old-monad-control` is for systems that don't have a newer monad-control
library.
* `tweak-fetch` adds support for the git tweak-fetch hook, which has
been proposed and implemented but not yet accepted into git.
* `ghc7.0` supports versions of ghc older than 7.4, which
had a major change to filename encoding.
* `setup` contains configuration for this website
* `pristine-tar` contains [pristine-tar](http://kitenet.net/~joey/code/pristine-tar)
data to create tarballs of any past git-annex release.

View file

@ -0,0 +1,7 @@
My annex contains several large files that I have unlocked, edited, and committed again, i.e. the annex contains the version history of those files. However, I don't want the history -- keeping the latest version is good enough for me. Running `git annex unused` won't detect those old versions, though, because they aren't unused as old Git revisions still refer to them. So I wonder:
1. What is the best way to get rid of the old versions of files in the annex?
2. What is the best way to detect old versions of files in the annex?
I guess, I could run `git rebase -i` to squash commits to those files into one commit, thereby getting rid of the references to the old copies, but that approach feels awkward and error prone. Is anyone aware of a better way?

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="http://peter-simons.myopenid.com/"
ip="77.186.179.173"
subject="comment 1"
date="2012-02-09T18:53:00Z"
content="""
Sorry for commmenting on my own question ... I think I just figured out that `git annex unused` *does* in fact do what I want. When I tried it, it just didn't show the obsolete versions of the files I edited because I hadn't yet synchronized all repositories, so that was why the obsolete versions were still considered used.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="http://joey.kitenet.net/"
nickname="joey"
subject="comment 2"
date="2012-02-09T19:42:28Z"
content="""
Yes, contents are still considered used while tags or refs refer to them. Including remote tracking branches like `remotes/origin/master`
"""]]

View file

@ -0,0 +1,16 @@
git-annex can already be used to store data in several cloud services:
Amazon S3, rsync.net, Tahoe-LAFFS, The Internet Archive.
I would like to support as many other cloud services as possible/reasonable.
* [[swift|todo/wishlist:_swift_backend]]
* Dropbox (I had been reluctant to go there due to it using a non-free client,
which I have no interest in installing, but there is actually an API,
and already a
[haskell module to use it](http://hackage.haskell.org/package/dropbox-sdk).
Would need to register for an API key.
<http://www.dropbox.com/developers/start/core>.
Annoyingly, Dropbox reviews each app before granting it production status.
Whoops my interest level dropped by 99%.)
Post others in the comments. --[[Joey]]

View file

@ -0,0 +1,6 @@
Hi,
I use git-annex 3.20120123 on a debian-testing amd-64 machine with software RAID6 and LVM2 on it. I needed to move the whole `/home` directory to another LV (the new LV is on encrypted PV, the old LV is encrypted and not properly aligned; I'm changing from encrypted `/home` only to encrypted everything except `/boot`), so I have used the `rsync -aAXH` from a `ro` mounted `/home` to a new LV mounted on `/mnt/home_2`. After the move was complete I run the `git annex fsck` on my (4TB of) data. The fsck finds some files bad, and moves them to the `..../bad` directory. So far so good, this is how it should be, right? But then- I have a file with sha1sum of all my files. So - I checked the 'bad' file against that. It was OK. Then I computed the SHA256 of the file - this is used by `git annex fsck`. It was OK, too. So how did it happen, that the file was marked as bad? Do I miss something here? Could it be related to the hardware (HDDs) and silent data corruption? Or is it the undesirable effect of rsync? Or maybe the fsck is at fault here?
Any ideas?

View file

@ -0,0 +1,15 @@
[[!comment format=mdwn
username="http://joey.kitenet.net/"
nickname="joey"
subject="comment 1"
date="2012-02-14T16:58:33Z"
content="""
Well, it should only move files to `.git/annex/bad/` if their filesize is wrong, or their checksum is wrong.
You can try moving a file out of `.git/annex/bad/` and re-run fsck and see if it fails it again. (And if it does, paste in a log!)
To do that --
Suppose you have a file `.git/annex/bad/SHA256-s33--5dc45521382f1c7974d9dbfcff1246370404b952` and you know that file `foobar` was supposed to have that content (you can check that `foobar` is a symlink to that SHA value). Then reinject it:
`git annex reinject .git/annex/bad/SHA256-s33--5dc45521382f1c7974d9dbfcff1246370404b952 foobar`
"""]]

View file

@ -145,9 +145,14 @@ subdirectories).
* addurl [url ...]
Downloads each url to a file, which is added to the annex.
Downloads each url to its own file, which is added to the annex.
To avoid immediately downloading the url, specify --fast
To avoid immediately downloading the url, specify --fast.
To specify what file the url is added to, specify --file. This changes
the behavior; now all the specified urls are recorded as alternate
locations from which the file can be downloaded. In this mode, addurl
can be used both to add new files, or to add urls to existing files.
# REPOSITORY SETUP COMMANDS

View file

@ -32,6 +32,7 @@ To build and use git-annex, you will need:
* [HTTP](http://hackage.haskell.org/package/HTTP)
* [hS3](http://hackage.haskell.org/package/hS3)
* [json](http://hackage.haskell.org/package/json)
* [IfElse](http://hackage.haskell.org/package/IfElse)
* Shell commands
* [git](http://git-scm.com/)
* [uuid](http://www.ossp.org/pkg/lib/uuid/)

View file

@ -0,0 +1,4 @@
git-annex will be briefly presented at FOSDEM, on Sunday February 4th at 15:40.
[Details](http://fosdem.org/2012/schedule/event/gitannex).
Thanks to Richard Hartmann for making this presentation.

View file

@ -0,0 +1,27 @@
News for git-annex 3.20120123:
There was a bug in the handling of directory special remotes that
could cause partial file contents to be stored in them. If you use
a directory special remote, you should fsck it, to avoid potential
data loss.
Example: git annex fsck --from mydirectory
git-annex 3.20120123 released with [[!toggle text="these changes"]]
[[!toggleable text="""
* fsck --from: Fscking a remote is now supported. It's done by retrieving
the contents of the specified files from the remote, and checking them,
so can be an expensive operation. Still, if the remote is a special
remote, or a git repository that you cannot run fsck in locally, it's
nice to have the ability to fsck it.
* If you have any directory special remotes, now would be a good time to
fsck them, in case you were hit by the data loss bug fixed in the
previous release!
* fsck --from remote --fast: Avoids expensive file transfers, at the
expense of not checking file size and/or contents.
* Ssh connection caching is now enabled automatically by git-annex.
Only one ssh connection is made to each host per git-annex run, which
can speed some things up a lot, as well as avoiding repeated password
prompts. Concurrent git-annex processes also share ssh connections.
Cached ssh connections are shut down when git-annex exits.
* To disable the ssh caching (if for example you have your own broader
ssh caching configuration), set annex.sshcaching=false."""]]

12
doc/templates/bugtemplate.mdwn vendored Normal file
View file

@ -0,0 +1,12 @@
What steps will reproduce the problem?
What is the expected output? What do you see instead?
What version of git-annex are you using? On what operating system?
Please provide any additional information below.

View file

@ -0,0 +1,31 @@
[[!meta title="using assume-unstages to speed up git with large trees of annexed files"]]
Git update-index's assume-unstaged feature can be used to speed
up `git status` and stuff by not statting the whole tree looking for changed
files.
This feature works quite well with git-annex. Especially because git
annex's files are immutable, so arn't going to change out from under it,
this is a nice fit. If you have a very large tree and `git status` is
annoyingly slow, you can turn it on:
git config core.ignoreStat true
When git mv and git rm are used, those changes *do* get noticed, even
on assume-unchanged files. When new files are added, eg by `git annex add`,
they are also noticed.
There are two gotchas. Both occur because `git add` does not stage
assume-unchanged files.
1. When an annexed file is moved to a different directory, it updates
the symlink, and runs `git add` on it. So the file will move,
but the changed symlink will not be noticed by git and it will commit a
dangling symlink.
2. When using `git annex migrate`, it changes the symlink and `git adds`
it. Again this won't be committed.
These can be worked around by running `git update-index --really-refresh`
after performing such operations. I hope that `git add` will be changed
to stage changes to assume-unchanged files, which would remove this
only complication. --[[Joey]]

View file

@ -8,10 +8,10 @@ The web can be used as a [[special_remote|special_remotes]] too.
Now the file is downloaded, and has been added to the annex like any other
file. So it can be renamed, copied to other repositories, and so on.
Note that git-annex assumes that, if the web site does not 404, the file is
still present on the web, and this counts as one [[copy|copies]] of the
file. So it will let you remove your last copy, trusting it can be
downloaded again:
Note that git-annex assumes that, if the web site does not 404, and has the
right file size, the file is still present on the web, and this counts as
one [[copy|copies]] of the file. So it will let you remove your last copy,
trusting it can be downloaded again:
# git annex drop example.com_video.mpeg
drop example.com_video.mpeg (checking http://example.com/video.mpeg) ok

View file

@ -9,3 +9,5 @@ copying the file, just dropping a symlink, etc.
The WORM backend doesn't care about file content, so it would be nice to
avoid transferring the content at all, and only send the size.
> [[done]] --[[Joey]]

View file

@ -1,25 +1,16 @@
short answer: no
Can it be built on Windows?
Long answer, quoting from a mail to someone else:
short answer: not yet
Well, I can tell you that it assumes a POSIX system, both in available
utilities and system calls, So you'd need to use cygwin or something
like that. (Perhaps you already are for git, I think git also assumes a
POSIX system.) So you need a Haskell that can target that. What this
page refers to as "GHC-Cygwin":
<http://www.haskell.org/ghc/docs/6.6/html/building/platforms.html>
I don't know where to get one. Did find this:
<http://copilotco.com/mail-archives/haskell-cafe.2007/msg00824.html>
First, you need to get some unix utilities for windows. Git of course.
Also rsync, and a `cp` command that understands at least `cp -p`, and
`uuid`, and `xargs` and `sha1sum`. Note that some of these could be
replaced with haskell libraries to some degree.
(There are probably also still some places where it assumes / as a path
separator, although I fixed some. Probably almost all are fixed now.)
There are probably still some places where it assumes / as a path
separator, although I fixed probably almost all by now.
FWIW, git-annex works fine on OS X and other fine proprietary unixen. ;P
--[[Joey]]
----
Alternatively, windows versions of these functions could be found,
Then windows versions of these functions could be found,
which are all the ones that need POSIX, I think. A fair amount of this,
the stuff to do with signals and users, could be empty stubs in windows.
The file manipulation, particularly symlinks, would probably be the main
@ -63,3 +54,8 @@ sigCHLD
sigINT
unionFileModes
</pre>
A good starting point is
<http://hackage.haskell.org/package/unix-compat-0.3.0.1>. However, note
that its implementations of stuff like `createSymbolicLink` are stubs.
--[[Joey]]

View file

@ -31,7 +31,7 @@ Executable git-annex
Build-Depends: MissingH, hslogger, directory, filepath,
unix, containers, utf8-string, network, mtl, bytestring, old-locale, time,
pcre-light, extensible-exceptions, dataenc, SHA, process, json, HTTP,
base < 5, QuickCheck >= 2.1
base < 5, IfElse, QuickCheck >= 2.1
Executable git-annex-shell
Main-Is: git-annex-shell.hs

22
test.hs
View file

@ -11,10 +11,8 @@ import Test.QuickCheck
import System.Posix.Directory (changeWorkingDirectory)
import System.Posix.Files
import Control.Exception (bracket_, bracket, throw)
import System.IO.Error
import System.Posix.Env
import qualified Control.Exception.Extensible as E
import Control.Exception.Extensible
import qualified Data.Map as M
import System.IO.HVFS (SystemFS(..))
import Text.JSON
@ -131,7 +129,7 @@ test_init = "git-annex init" ~: TestCase $ innewrepo $ do
reponame = "test repo"
test_add :: Test
test_add = "git-annex add" ~: TestList [basic, sha1dup, subdirs]
test_add = "git-annex add" ~: TestList [basic, sha1dup, sha1unicode, subdirs]
where
-- this test case runs in the main repo, to set up a basic
-- annexed file that later tests will use
@ -158,6 +156,10 @@ test_add = "git-annex add" ~: TestList [basic, sha1dup, subdirs]
git_annex "add" [sha1annexedfiledup, "--backend=SHA1"] @? "add of second file with same SHA1 failed"
annexed_present sha1annexedfiledup
annexed_present sha1annexedfile
sha1unicode = TestCase $ intmpclonerepo $ do
writeFile sha1annexedfileunicode $ content sha1annexedfileunicode
git_annex "add" [sha1annexedfileunicode, "--backend=SHA1"] @? "add of unicode filename failed"
annexed_present sha1annexedfileunicode
subdirs = TestCase $ intmpclonerepo $ do
createDirectory "dir"
writeFile "dir/foo" $ content annexedfile
@ -691,7 +693,7 @@ test_crypto = "git-annex crypto" ~: intmpclonerepo $ when Build.SysConfig.gpg $
git_annex :: String -> [String] -> IO Bool
git_annex command params = do
-- catch all errors, including normally fatal errors
r <- E.try (run)::IO (Either E.SomeException ())
r <- try (run)::IO (Either SomeException ())
case r of
Right _ -> return True
Left _ -> return False
@ -757,7 +759,7 @@ indir dir a = do
-- any type of error and change back to cwd before
-- rethrowing.
r <- bracket_ (changeToTmpDir dir) (changeWorkingDirectory cwd)
(E.try (a)::IO (Either E.SomeException ()))
(try (a)::IO (Either SomeException ()))
case r of
Right () -> return ()
Left e -> throw e
@ -828,14 +830,14 @@ checkunwritable f = do
checkwritable :: FilePath -> Assertion
checkwritable f = do
r <- try $ writeFile f $ content f
r <- tryIO $ writeFile f $ content f
case r of
Left _ -> assertFailure $ "unable to modify " ++ f
Right _ -> return ()
checkdangling :: FilePath -> Assertion
checkdangling f = do
r <- try $ readFile f
r <- tryIO $ readFile f
case r of
Left _ -> return () -- expected; dangling link
Right _ -> assertFailure $ f ++ " was not a dangling link as expected"
@ -919,6 +921,9 @@ sha1annexedfile = "sha1foo"
sha1annexedfiledup :: String
sha1annexedfiledup = "sha1foodup"
sha1annexedfileunicode :: String
sha1annexedfileunicode = "foo¡"
ingitfile :: String
ingitfile = "bar"
@ -928,6 +933,7 @@ content f
| f == ingitfile = "normal file content"
| f == sha1annexedfile ="sha1 annexed file content"
| f == sha1annexedfiledup = content sha1annexedfile
| f == sha1annexedfileunicode ="sha1 annexed file content ¡ünicodé!"
| f == wormannexedfile = "worm annexed file content"
| otherwise = "unknown file " ++ f