support pointer files

Backend.lookupFile is changed to always fall back to catKey when
operating on a file that's not a symlink.

catKey is changed to understand pointer files, as well as annex symlinks.

Before, catKey needed a file mode witness, to be sure it was looking at a
symlink. That was complicated stuff. Now, it doesn't actually care if a
file in git is a symlink or not; in either case asking git for the content
of the file will get the pointer to the key.

This does mean that git-annex will treat a link
foo -> WORM--bar as a git-annex file, and also treats
a regular file containing annex/objects/WORM--bar as a git-annex file.

Calling catKey could make git-annex commands need to do more work than
before. This would especially be the case if a repo contained many regular
files, and only a few annexed files, as now git-annex will need to ask
git about the contents of the regular files.
This commit is contained in:
Joey Hess 2015-12-07 15:22:01 -04:00
parent 2cbcb4f1a8
commit 664cc987e8
Failed to extract signature
9 changed files with 55 additions and 82 deletions

View file

@ -25,7 +25,6 @@ import qualified Git.Branch
import Git.Types (BlobType(..))
import Config
import Annex.ReplaceFile
import Git.FileMode
import Annex.VariantFile
import qualified Data.Set as S
@ -135,7 +134,7 @@ resolveMerge' (Just us) them u = do
| select (LsFiles.unmergedBlobType u) == Just SymlinkBlob =
case select' (LsFiles.unmergedSha u) of
Nothing -> return Nothing
Just sha -> catKey sha symLinkMode
Just sha -> catKey sha
| otherwise = return Nothing
makelink key = do
@ -174,7 +173,7 @@ resolveMerge' (Just us) them u = do
case select' (LsFiles.unmergedSha u) of
Nothing -> noop
Just sha -> do
link <- catLink True sha
link <- catSymLinkTarget sha
replacewithlink item link
resolveby a = do

View file

@ -1,6 +1,6 @@
{- git cat-file interface, with handle automatically stored in the Annex monad
-
- Copyright 2011-2013 Joey Hess <id@joeyh.name>
- Copyright 2011-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -14,9 +14,10 @@ module Annex.CatFile (
catFileHandle,
catFileStop,
catKey,
parsePointer,
catKeyFile,
catKeyFileHEAD,
catLink,
catSymLinkTarget,
) where
import qualified Data.ByteString.Lazy as L
@ -29,8 +30,8 @@ import qualified Git.CatFile
import qualified Annex
import Git.Types
import Git.FilePath
import Git.FileMode
import qualified Git.Ref
import Types.Key
catFile :: Git.Branch -> FilePath -> Annex L.ByteString
catFile branch file = do
@ -80,52 +81,36 @@ catFileStop = do
(s { Annex.catfilehandles = M.empty }, Annex.catfilehandles s)
liftIO $ mapM_ Git.CatFile.catFileStop (M.elems m)
{- From the Sha or Ref of a symlink back to the key.
-
- Requires a mode witness, to guarantee that the file is a symlink.
-}
catKey :: Ref -> FileMode -> Annex (Maybe Key)
catKey = catKey' True
{- From ref to a symlink or a pointer file, get the key. -}
catKey :: Ref -> Annex (Maybe Key)
catKey ref = do
o <- catObject ref
if L.length o > maxsz
then return Nothing -- too big
else do
let l = decodeBS o
let l' = fromInternalGitPath l
return $ if isLinkToAnnex l'
then fileKey $ takeFileName l'
else parsePointer l
where
-- Want to avoid buffering really big files in git into memory.
-- 8192 bytes is plenty for a pointer to a key.
-- Pad some more to allow for any pointer files that might have
-- lines after the key explaining what the file is used for.
maxsz = 81920
catKey' :: Bool -> Sha -> FileMode -> Annex (Maybe Key)
catKey' modeguaranteed sha mode
| isSymLink mode = do
l <- catLink modeguaranteed sha
return $ if isLinkToAnnex l
then fileKey $ takeFileName l
else Nothing
| otherwise = return Nothing
{- Only look at the first line of a pointer file. -}
parsePointer :: String -> Maybe Key
parsePointer s = headMaybe (lines s) >>= file2key
{- Gets a symlink target. -}
catLink :: Bool -> Sha -> Annex String
catLink modeguaranteed sha = fromInternalGitPath . decodeBS <$> get
catSymLinkTarget :: Sha -> Annex String
catSymLinkTarget sha = fromInternalGitPath . decodeBS <$> get
where
-- If the mode is not guaranteed to be correct, avoid
-- buffering the whole file content, which might be large.
-- 8192 is enough if it really is a symlink.
get
| modeguaranteed = catObject sha
| otherwise = L.take 8192 <$> catObject sha
{- Looks up the key corresponding to the Ref using the running cat-file.
-
- Currently this always has to look in HEAD, because cat-file --batch
- does not offer a way to specify that we want to look up a tree object
- in the index. So if the index has a file staged not as a symlink,
- and it is a symlink in head, the wrong mode is gotten.
- Also, we have to assume the file is a symlink if it's not yet committed
- to HEAD. For these reasons, modeguaranteed is not set.
-}
catKeyChecked :: Bool -> Ref -> Annex (Maybe Key)
catKeyChecked needhead ref@(Ref r) =
catKey' False ref =<< findmode <$> catTree treeref
where
pathparts = split "/" r
dir = intercalate "/" $ take (length pathparts - 1) pathparts
file = fromMaybe "" $ lastMaybe pathparts
treeref = Ref $ if needhead then "HEAD" ++ dir ++ "/" else dir ++ "/"
findmode = fromMaybe symLinkMode . headMaybe .
map snd . filter (\p -> fst p == file)
-- Avoid buffering the whole file content, which might be large.
-- 8192 is enough if it really is a symlink or pointer file.
get = L.take 8192 <$> catObject sha
{- From a file in the repository back to the key.
-
@ -151,8 +136,8 @@ catKeyChecked needhead ref@(Ref r) =
catKeyFile :: FilePath -> Annex (Maybe Key)
catKeyFile f = ifM (Annex.getState Annex.daemon)
( catKeyFileHEAD f
, catKeyChecked True $ Git.Ref.fileRef f
, catKey $ Git.Ref.fileRef f
)
catKeyFileHEAD :: FilePath -> Annex (Maybe Key)
catKeyFileHEAD f = catKeyChecked False $ Git.Ref.fileFromRef Git.Ref.headRef f
catKeyFileHEAD f = catKey $ Git.Ref.fileFromRef Git.Ref.headRef f

View file

@ -53,8 +53,8 @@ stageDirect = do
{- Determine what kind of modified or deleted file this is, as
- efficiently as we can, by getting any key that's associated
- with it in git, as well as its stat info. -}
go (file, Just sha, Just mode) = withTSDelta $ \delta -> do
shakey <- catKey sha mode
go (file, Just sha, Just _mode) = withTSDelta $ \delta -> do
shakey <- catKey sha
mstat <- liftIO $ catchMaybeIO $ getSymbolicLinkStatus file
mcache <- liftIO $ maybe (pure Nothing) (toInodeCache delta file) mstat
filekey <- isAnnexLink file
@ -107,8 +107,8 @@ preCommitDirect = do
withkey (DiffTree.srcsha diff) (DiffTree.srcmode diff) removeAssociatedFile
withkey (DiffTree.dstsha diff) (DiffTree.dstmode diff) addAssociatedFile
where
withkey sha mode a = when (sha /= nullSha) $ do
k <- catKey sha mode
withkey sha _mode a = when (sha /= nullSha) $ do
k <- catKey sha
case k of
Nothing -> noop
Just key -> void $ a key $
@ -256,16 +256,16 @@ updateWorkTree d oldref force = do
makeabs <- flip fromTopFilePath <$> gitRepo
let fsitems = zip (map (makeabs . DiffTree.file) items) items
forM_ fsitems $
go makeabs DiffTree.srcsha DiffTree.srcmode moveout moveout_raw
go makeabs DiffTree.srcsha moveout moveout_raw
forM_ fsitems $
go makeabs DiffTree.dstsha DiffTree.dstmode movein movein_raw
go makeabs DiffTree.dstsha movein movein_raw
void $ liftIO cleanup
where
go makeabs getsha getmode a araw (f, item)
go makeabs getsha a araw (f, item)
| getsha item == nullSha = noop
| otherwise = void $
tryNonAsync . maybe (araw item makeabs f) (\k -> void $ a item makeabs k f)
=<< catKey (getsha item) (getmode item)
=<< catKey (getsha item)
moveout _ _ = removeDirect

View file

@ -413,13 +413,13 @@ withViewChanges addmeta removemeta = do
handleremovals item
| DiffTree.srcsha item /= nullSha =
handlechange item removemeta
=<< catKey (DiffTree.srcsha item) (DiffTree.srcmode item)
=<< catKey (DiffTree.srcsha item)
| otherwise = noop
handleadds makeabs item
| DiffTree.dstsha item /= nullSha =
handlechange item addmeta
=<< ifM isDirect
( catKey (DiffTree.dstsha item) (DiffTree.dstmode item)
( catKey (DiffTree.dstsha item)
-- optimisation
, isAnnexLink $ makeabs $ DiffTree.file item
)

View file

@ -26,7 +26,6 @@ import Annex.Link
import Types.Key
import Types.KeySource
import qualified Types.Backend as B
import Config
-- When adding a new backend, import it here and add it to the list.
import qualified Backend.Hash
@ -81,22 +80,17 @@ genKey' (b:bs) source = do
{- Looks up the key corresponding to an annexed file,
- by examining what the file links to.
-
- In direct mode, there is often no link on disk, in which case
- the symlink is looked up in git instead. However, a real link
- on disk still takes precedence over what was committed to git in direct
- mode.
- An unlocked file will not have a link on disk, so fall back to
- looking for a pointer to a key in git.
-}
lookupFile :: FilePath -> Annex (Maybe Key)
lookupFile file = do
mkey <- isAnnexLink file
case mkey of
Just key -> makeret key
Nothing -> ifM isDirect
( maybe (return Nothing) makeret =<< catKeyFile file
, return Nothing
)
Nothing -> maybe (return Nothing) makeret =<< catKeyFile file
where
makeret k = return $ Just k
makeret = return . Just
getBackend :: FilePath -> Key -> Annex (Maybe Backend)
getBackend file k = let bname = keyBackendName k in

View file

@ -80,7 +80,7 @@ withFilesInRefs a = mapM_ go
l <- inRepo $ LsTree.lsTree (Git.Ref r)
forM_ l $ \i -> do
let f = getTopFilePath $ LsTree.file i
v <- catKey (Git.Ref $ LsTree.sha i) (LsTree.mode i)
v <- catKey (Git.Ref $ LsTree.sha i)
case v of
Nothing -> noop
Just k -> whenM (matcher $ MatchingKey k) $

View file

@ -76,7 +76,7 @@ perform = do
return Nothing
| otherwise ->
maybe noop (fromdirect f)
=<< catKey sha mode
=<< catKey sha
_ -> noop
go _ = noop

View file

@ -11,6 +11,7 @@ import Common.Annex
import Command
import Types.Key
import Annex.Content
import Annex.CatFile
import Annex.MetaData
import Annex.FileMatcher
import Types.KeySource
@ -100,17 +101,11 @@ ingest file = do
=<< liftIO (getFileStatus file)
return k
-- Could add a newline and some text explaining this file is a pointer.
-- parsePointer only looks at the first line.
emitPointer :: Key -> IO ()
emitPointer = putStrLn . key2file
parsePointer :: String -> Maybe Key
parsePointer s
| length s' >= maxsz = Nothing -- too long to be a key pointer
| otherwise = headMaybe (lines s') >>= file2key
where
s' = take maxsz s
maxsz = 81920
updateAssociatedFiles :: Key -> FilePath -> Annex ()
updateAssociatedFiles k f = do
h <- AssociatedFiles.openDb

View file

@ -72,7 +72,7 @@ perform p = do
f <- mkrel di
whenM isDirect $
maybe noop (`removeDirect` f)
=<< catKey (srcsha di) (srcmode di)
=<< catKey (srcsha di)
liftIO $ nukeFile f
forM_ adds $ \di -> do
@ -80,6 +80,6 @@ perform p = do
inRepo $ Git.run [Param "checkout", Param "--", File f]
whenM isDirect $
maybe noop (`toDirect` f)
=<< catKey (dstsha di) (dstmode di)
=<< catKey (dstsha di)
next $ liftIO cleanup