rework git check-attr interface

Now gitattributes are looked up, efficiently, in only the places that
really need them, using the same approach used for cat-file.

The old CheckAttr code seemed very fragile, in the way it streamed files
through git check-attr.
I actually found that cad8824852
was still deadlocking with ghc 7.4, at the end of adding a lot of files.
This should fix that problem, and avoid future ones.

The best part is that this removes withAttrFilesInGit and withNumCopies,
which were complicated Seek methods, as well as simplfying the types
for several other Seek methods that had a Backend tupled in.
This commit is contained in:
Joey Hess 2012-02-13 23:42:44 -04:00
parent d35a8d85b5
commit cbaebf538a
16 changed files with 143 additions and 99 deletions

View file

@ -35,6 +35,7 @@ import Common
import qualified Git
import qualified Git.Config
import Git.CatFile
import Git.CheckAttr
import qualified Git.Queue
import Types.Backend
import qualified Types.Remote
@ -82,6 +83,7 @@ data AnnexState = AnnexState
, auto :: Bool
, branchstate :: BranchState
, catfilehandle :: Maybe CatFileHandle
, checkattrhandle :: Maybe CheckAttrHandle
, forcebackend :: Maybe String
, forcenumcopies :: Maybe Int
, limit :: Matcher (FilePath -> Annex Bool)
@ -105,6 +107,7 @@ newState gitrepo = AnnexState
, auto = False
, branchstate = startBranchState
, catfilehandle = Nothing
, checkattrhandle = Nothing
, forcebackend = Nothing
, forcenumcopies = Nothing
, limit = Left []

35
Annex/CheckAttr.hs Normal file
View file

@ -0,0 +1,35 @@
{- git check-attr interface, with handle automatically stored in the Annex monad
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.CheckAttr (
checkAttr,
checkAttrHandle
) where
import Common.Annex
import qualified Git.CheckAttr as Git
import qualified Annex
{- All gitattributes used by git-annex. -}
annexAttrs :: [Git.Attr]
annexAttrs =
[ "annex.backend"
, "annex.numcopies"
]
checkAttr :: Git.Attr -> FilePath -> Annex String
checkAttr attr file = do
h <- checkAttrHandle
liftIO $ Git.checkAttr h attr file
checkAttrHandle :: Annex Git.CheckAttrHandle
checkAttrHandle = maybe startup return =<< Annex.getState Annex.checkattrhandle
where
startup = do
h <- inRepo $ Git.checkAttrStart annexAttrs
Annex.changeState $ \s -> s { Annex.checkattrhandle = Just h }
return h

View file

@ -6,12 +6,11 @@
-}
module Backend (
BackendFile,
list,
orderedList,
genKey,
lookupFile,
chooseBackends,
chooseBackend,
lookupBackendName,
maybeLookupBackendName
) where
@ -22,6 +21,7 @@ import Common.Annex
import qualified Git.Config
import qualified Git.CheckAttr
import qualified Annex
import Annex.CheckAttr
import Types.Key
import qualified Types.Backend as B
@ -93,20 +93,15 @@ lookupFile file = do
bname ++ ")"
return Nothing
type BackendFile = (Maybe Backend, FilePath)
{- Looks up the backends that should be used for each file in a list.
{- Looks up the backend that should be used for a file.
- That can be configured on a per-file basis in the gitattributes file.
-}
chooseBackends :: [FilePath] -> Annex [BackendFile]
chooseBackends fs = Annex.getState Annex.forcebackend >>= go
chooseBackend :: FilePath -> Annex (Maybe Backend)
chooseBackend f = Annex.getState Annex.forcebackend >>= go
where
go Nothing = do
pairs <- inRepo $ Git.CheckAttr.lookup "annex.backend" fs
return $ map (\(f,b) -> (maybeLookupBackendName b, f)) pairs
go (Just _) = do
l <- orderedList
return $ map (\f -> (Just $ Prelude.head l, f)) fs
go Nothing = maybeLookupBackendName <$>
checkAttr "annex.backend" f
go (Just _) = Just . Prelude.head <$> orderedList
{- Looks up a backend by name. May fail if unknown. -}
lookupBackendName :: String -> Backend

View file

@ -18,6 +18,7 @@ module Command (
ifAnnexed,
notBareRepo,
isBareRepo,
numCopies,
autoCopies,
module ReExported
) where
@ -34,6 +35,7 @@ import Checks as ReExported
import Usage as ReExported
import Logs.Trust
import Config
import Annex.CheckAttr
{- Generates a normal command -}
command :: String -> String -> [CommandSeek] -> String -> Command
@ -98,17 +100,22 @@ notBareRepo a = do
isBareRepo :: Annex Bool
isBareRepo = fromRepo Git.repoIsLocalBare
numCopies :: FilePath -> Annex (Maybe Int)
numCopies file = readish <$> checkAttr "annex.numcopies" file
{- Used for commands that have an auto mode that checks the number of known
- copies of a key.
-
- In auto mode, first checks that the number of known
- copies of the key is > or < than the numcopies setting, before running
- the action. -}
autoCopies :: Key -> (Int -> Int -> Bool) -> Maybe Int -> CommandStart -> CommandStart
autoCopies key vs numcopiesattr a = Annex.getState Annex.auto >>= auto
autoCopies :: FilePath -> Key -> (Int -> Int -> Bool) -> (Maybe Int -> CommandStart) -> CommandStart
autoCopies file key vs a = do
numcopiesattr <- numCopies file
Annex.getState Annex.auto >>= auto numcopiesattr
where
auto False = a
auto True = do
auto numcopiesattr False = a numcopiesattr
auto numcopiesattr True = do
needed <- getNumCopies numcopiesattr
(_, have) <- trustPartition UnTrusted =<< Remote.keyLocations key
if length have `vs` needed then a else stop
if length have `vs` needed then a numcopiesattr else stop

View file

@ -16,7 +16,6 @@ import qualified Backend
import Logs.Location
import Annex.Content
import Utility.Touch
import Backend
def :: [Command]
def = [command "add" paramPaths seek "add files to annex"]
@ -28,8 +27,8 @@ seek = [withFilesNotInGit start, withFilesUnlocked start]
{- The add subcommand annexes a file, storing it in a backend, and then
- moving it into the annex directory and setting up the symlink pointing
- to its content. -}
start :: BackendFile -> CommandStart
start p@(_, file) = notBareRepo $ ifAnnexed file fixup add
start :: FilePath -> CommandStart
start file = notBareRepo $ ifAnnexed file fixup add
where
add = do
s <- liftIO $ getSymbolicLinkStatus file
@ -37,7 +36,7 @@ start p@(_, file) = notBareRepo $ ifAnnexed file fixup add
then stop
else do
showStart "add" file
next $ perform p
next $ perform file
fixup (key, _) = do
-- fixup from an interrupted add; the symlink
-- is present but not yet added to git
@ -45,8 +44,10 @@ start p@(_, file) = notBareRepo $ ifAnnexed file fixup add
liftIO $ removeFile file
next $ next $ cleanup file key =<< inAnnex key
perform :: BackendFile -> CommandPerform
perform (backend, file) = Backend.genKey file backend >>= go
perform :: FilePath -> CommandPerform
perform file = do
backend <- Backend.chooseBackend file
Backend.genKey file backend >>= go
where
go Nothing = stop
go (Just (key, _)) = do

View file

@ -63,7 +63,7 @@ download url file = do
tmp <- fromRepo $ gitAnnexTmpLocation dummykey
liftIO $ createDirectoryIfMissing True (parentDir tmp)
stopUnless (downloadUrl [url] tmp) $ do
[(backend, _)] <- Backend.chooseBackends [file]
backend <- Backend.chooseBackend file
k <- Backend.genKey tmp backend
case k of
Nothing -> stop

View file

@ -19,10 +19,10 @@ def = [withOptions Command.Move.options $ command "copy" paramPaths seek
seek :: [CommandSeek]
seek = [withField Command.Move.toOption Remote.byName $ \to ->
withField Command.Move.fromOption Remote.byName $ \from ->
withNumCopies $ \n -> whenAnnexed $ start to from n]
withFilesInGit $ whenAnnexed $ start to from]
-- A copy is just a move that does not delete the source file.
-- However, --auto mode avoids unnecessary copies.
start :: Maybe Remote -> Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start to from numcopies file (key, backend) = autoCopies key (<) numcopies $
start :: Maybe Remote -> Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
start to from file (key, backend) = autoCopies file key (<) $ \_numcopies ->
Command.Move.start to from False file (key, backend)

View file

@ -26,11 +26,11 @@ fromOption :: Option
fromOption = Option.field ['f'] "from" paramRemote "drop content from a remote"
seek :: [CommandSeek]
seek = [withField fromOption Remote.byName $ \from -> withNumCopies $ \n ->
whenAnnexed $ start from n]
seek = [withField fromOption Remote.byName $ \from ->
withFilesInGit $ whenAnnexed $ start from]
start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start from numcopies file (key, _) = autoCopies key (>) numcopies $ do
start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
start from file (key, _) = autoCopies file key (>) $ \numcopies -> do
case from of
Nothing -> startLocal file numcopies key
Just remote -> do

View file

@ -36,12 +36,13 @@ options = [fromOption]
seek :: [CommandSeek]
seek =
[ withField fromOption Remote.byName $ \from ->
withNumCopies $ \n -> whenAnnexed $ start from n
withFilesInGit $ whenAnnexed $ start from
, withBarePresentKeys startBare
]
start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start from numcopies file (key, backend) = do
start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
start from file (key, backend) = do
numcopies <- numCopies file
showStart "fsck" file
case from of
Nothing -> next $ perform key file backend numcopies

View file

@ -19,11 +19,11 @@ def = [withOptions [Command.Move.fromOption] $ command "get" paramPaths seek
seek :: [CommandSeek]
seek = [withField Command.Move.fromOption Remote.byName $ \from ->
withNumCopies $ \n -> whenAnnexed $ start from n]
withFilesInGit $ whenAnnexed $ start from]
start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start from numcopies file (key, _) = stopUnless (not <$> inAnnex key) $
autoCopies key (<) numcopies $ do
start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
start from file (key, _) = stopUnless (not <$> inAnnex key) $
autoCopies file key (<) $ \_numcopies -> do
case from of
Nothing -> go $ perform key
Just src -> do

View file

@ -10,7 +10,6 @@ module Command.Lock where
import Common.Annex
import Command
import qualified Annex.Queue
import Backend
def :: [Command]
def = [command "lock" paramPaths seek "undo unlock command"]
@ -18,9 +17,8 @@ def = [command "lock" paramPaths seek "undo unlock command"]
seek :: [CommandSeek]
seek = [withFilesUnlocked start, withFilesUnlockedToBeCommitted start]
{- Undo unlock -}
start :: BackendFile -> CommandStart
start (_, file) = do
start :: FilePath -> CommandStart
start file = do
showStart "lock" file
next $ perform file

View file

@ -19,12 +19,12 @@ def :: [Command]
def = [command "migrate" paramPaths seek "switch data to different backend"]
seek :: [CommandSeek]
seek = [withBackendFilesInGit $ \(b, f) -> whenAnnexed (start b) f]
seek = [withFilesInGit $ whenAnnexed start]
start :: Maybe Backend -> FilePath -> (Key, Backend) -> CommandStart
start b file (key, oldbackend) = do
start :: FilePath -> (Key, Backend) -> CommandStart
start file (key, oldbackend) = do
exists <- inAnnex key
newbackend <- choosebackend b
newbackend <- choosebackend =<< Backend.chooseBackend file
if (newbackend /= oldbackend || upgradableKey key) && exists
then do
showStart "migrate" file

View file

@ -10,7 +10,6 @@ module Command.PreCommit where
import Command
import qualified Command.Add
import qualified Command.Fix
import Backend
def :: [Command]
def = [command "pre-commit" paramPaths seek "run by git pre-commit hook"]
@ -22,12 +21,12 @@ seek =
[ withFilesToBeCommitted $ whenAnnexed Command.Fix.start
, withFilesUnlockedToBeCommitted start]
start :: BackendFile -> CommandStart
start p = next $ perform p
start :: FilePath -> CommandStart
start file = next $ perform file
perform :: BackendFile -> CommandPerform
perform pair@(_, file) = do
ok <- doCommand $ Command.Add.start pair
perform :: FilePath -> CommandPerform
perform file = do
ok <- doCommand $ Command.Add.start file
if ok
then next $ return True
else error $ "failed to add " ++ file ++ "; canceling commit"

View file

@ -1,6 +1,6 @@
{- git check-attr interface
-
- Copyright 2010, 2011 Joey Hess <joey@kitenet.net>
- Copyright 2010-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -12,20 +12,44 @@ import Git
import Git.Command
import qualified Git.Version
{- Efficiently looks up a gitattributes value for each file in a list. -}
lookup :: String -> [FilePath] -> Repo -> IO [(FilePath, String)]
lookup attr files repo = do
cwd <- getCurrentDirectory
(_, r) <- pipeBoth "git" (toCommand params) $
join "\0" $ input cwd
return $ zip files $ map attrvalue $ lines r
where
params = gitCommandLine
[ Param "check-attr"
, Param attr
, Params "-z --stdin"
] repo
type CheckAttrHandle = (PipeHandle, Handle, Handle, [Attr], String)
type Attr = String
{- Starts git check-attr running to look up the specified gitattributes
- values and return a handle. -}
checkAttrStart :: [Attr] -> Repo -> IO CheckAttrHandle
checkAttrStart attrs repo = do
cwd <- getCurrentDirectory
(pid, from, to) <- hPipeBoth "git" $ toCommand $
gitCommandLine params repo
return (pid, from, to, attrs, cwd)
where
params =
[ Param "check-attr" ]
++ map Param attrs ++
[ Params "-z --stdin" ]
{- Stops git check-attr. -}
checkAttrStop :: CheckAttrHandle -> IO ()
checkAttrStop (pid, from, to, _, _) = do
hClose to
hClose from
forceSuccess pid
{- Gets an attribute of a file. -}
checkAttr :: CheckAttrHandle -> Attr -> FilePath -> IO String
checkAttr (_, from, to, attrs, cwd) want file = do
hPutStr to $ file' ++ "\0"
hFlush to
pairs <- forM attrs $ \attr -> do
l <- hGetLine from
return (attr, attrvalue attr l)
let vals = map snd $ filter (\(attr, _) -> attr == want) pairs
case vals of
[v] -> return v
_ -> error $ "unable to determine " ++ want ++ " attribute of " ++ file
where
{- Before git 1.7.7, git check-attr worked best with
- absolute filenames; using them worked around some bugs
- with relative filenames.
@ -34,10 +58,10 @@ lookup attr files repo = do
- filenames, and the bugs that necessitated them were fixed,
- so use relative filenames. -}
oldgit = Git.Version.older "1.7.7"
input cwd
| oldgit = map (absPathFrom cwd) files
| otherwise = map (relPathDirToFile cwd . absPathFrom cwd) files
attrvalue l = end bits !! 0
file'
| oldgit = absPathFrom cwd file
| otherwise = relPathDirToFile cwd $ absPathFrom cwd file
attrvalue attr l = end bits !! 0
where
bits = split sep l
sep = ": " ++ attr ++ ": "

View file

@ -17,7 +17,7 @@ import Annex.UUID
import Config
import qualified Remote.Git
import qualified Remote.S3
--import qualified Remote.S3
import qualified Remote.Bup
import qualified Remote.Directory
import qualified Remote.Rsync
@ -27,7 +27,7 @@ import qualified Remote.Hook
remoteTypes :: [RemoteType]
remoteTypes =
[ Remote.Git.remote
, Remote.S3.remote
-- , Remote.S3.remote
, Remote.Bup.remote
, Remote.Directory.remote
, Remote.Rsync.remote

33
Seek.hs
View file

@ -14,11 +14,9 @@ module Seek where
import Common.Annex
import Types.Command
import Types.Key
import Backend
import qualified Annex
import qualified Git
import qualified Git.LsFiles as LsFiles
import qualified Git.CheckAttr
import qualified Limit
import qualified Option
@ -28,26 +26,12 @@ seekHelper a params = inRepo $ \g -> runPreserveOrder (`a` g) params
withFilesInGit :: (FilePath -> CommandStart) -> CommandSeek
withFilesInGit a params = prepFiltered a $ seekHelper LsFiles.inRepo params
withAttrFilesInGit :: String -> ((FilePath, String) -> CommandStart) -> CommandSeek
withAttrFilesInGit attr a params = do
files <- seekHelper LsFiles.inRepo params
prepFilteredGen a fst $ inRepo $ Git.CheckAttr.lookup attr files
withNumCopies :: (Maybe Int -> FilePath -> CommandStart) -> CommandSeek
withNumCopies a params = withAttrFilesInGit "annex.numcopies" go params
where
go (file, v) = a (readish v) file
withBackendFilesInGit :: (BackendFile -> CommandStart) -> CommandSeek
withBackendFilesInGit a params =
prepBackendPairs a =<< seekHelper LsFiles.inRepo params
withFilesNotInGit :: (BackendFile -> CommandStart) -> CommandSeek
withFilesNotInGit :: (FilePath -> CommandStart) -> CommandSeek
withFilesNotInGit a params = do
{- dotfiles are not acted on unless explicitly listed -}
files <- filter (not . dotfile) <$> seek ps
dotfiles <- if null dotps then return [] else seek dotps
prepBackendPairs a $ preserveOrder params (files++dotfiles)
prepFiltered a $ return $ preserveOrder params (files++dotfiles)
where
(dotps, ps) = partition dotfile params
seek l = do
@ -65,20 +49,20 @@ withFilesToBeCommitted :: (String -> CommandStart) -> CommandSeek
withFilesToBeCommitted a params = prepFiltered a $
seekHelper LsFiles.stagedNotDeleted params
withFilesUnlocked :: (BackendFile -> CommandStart) -> CommandSeek
withFilesUnlocked :: (FilePath -> CommandStart) -> CommandSeek
withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged
withFilesUnlockedToBeCommitted :: (BackendFile -> CommandStart) -> CommandSeek
withFilesUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CommandSeek
withFilesUnlockedToBeCommitted = withFilesUnlocked' LsFiles.typeChangedStaged
withFilesUnlocked' :: ([FilePath] -> Git.Repo -> IO [FilePath]) -> (BackendFile -> CommandStart) -> CommandSeek
withFilesUnlocked' :: ([FilePath] -> Git.Repo -> IO [FilePath]) -> (FilePath -> CommandStart) -> CommandSeek
withFilesUnlocked' typechanged a params = do
-- unlocked files have changed type from a symlink to a regular file
top <- fromRepo Git.workTree
typechangedfiles <- seekHelper typechanged params
unlockedfiles <- liftIO $ filterM notSymlink $
let unlockedfiles = liftIO $ filterM notSymlink $
map (\f -> top ++ "/" ++ f) typechangedfiles
prepBackendPairs a unlockedfiles
prepFiltered a unlockedfiles
withKeys :: (Key -> CommandStart) -> CommandSeek
withKeys a params = return $ map (a . parse) params
@ -109,9 +93,6 @@ withNothing _ _ = error "This command takes no parameters."
prepFiltered :: (FilePath -> CommandStart) -> Annex [FilePath] -> Annex [CommandStart]
prepFiltered a = prepFilteredGen a id
prepBackendPairs :: (BackendFile -> CommandStart) -> CommandSeek
prepBackendPairs a fs = prepFilteredGen a snd (chooseBackends fs)
prepFilteredGen :: (b -> CommandStart) -> (b -> FilePath) -> Annex [b] -> Annex [CommandStart]
prepFilteredGen a d fs = do
matcher <- Limit.getMatcher