712c9fc590
This removes ambiguity, because while someone might have "WORM--foo" in a
file that's not intended to be a git-annex pointer file,
"annex/objects/WORM--foo" is less likely.
Also, 664cc987e8
had a caveat about symlink
targets being parsed as pointer files, and now the same parser is used for
both.
I did not include any hash directories before the key in the pointer file,
as they're not needed. However, if they were included, the parser would
still work ok.
139 lines
4.5 KiB
Haskell
139 lines
4.5 KiB
Haskell
{- git cat-file interface, with handle automatically stored in the Annex monad
|
|
-
|
|
- Copyright 2011-2015 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
-}
|
|
|
|
module Annex.CatFile (
|
|
catFile,
|
|
catFileDetails,
|
|
catObject,
|
|
catTree,
|
|
catObjectDetails,
|
|
catFileHandle,
|
|
catFileStop,
|
|
catKey,
|
|
parsePointer,
|
|
catKeyFile,
|
|
catKeyFileHEAD,
|
|
catSymLinkTarget,
|
|
) where
|
|
|
|
import qualified Data.ByteString.Lazy as L
|
|
import qualified Data.Map as M
|
|
import System.PosixCompat.Types
|
|
|
|
import Common.Annex
|
|
import qualified Git
|
|
import qualified Git.CatFile
|
|
import qualified Annex
|
|
import Git.Types
|
|
import Git.FilePath
|
|
import qualified Git.Ref
|
|
import Types.Key
|
|
|
|
catFile :: Git.Branch -> FilePath -> Annex L.ByteString
|
|
catFile branch file = do
|
|
h <- catFileHandle
|
|
liftIO $ Git.CatFile.catFile h branch file
|
|
|
|
catFileDetails :: Git.Branch -> FilePath -> Annex (Maybe (L.ByteString, Sha, ObjectType))
|
|
catFileDetails branch file = do
|
|
h <- catFileHandle
|
|
liftIO $ Git.CatFile.catFileDetails h branch file
|
|
|
|
catObject :: Git.Ref -> Annex L.ByteString
|
|
catObject ref = do
|
|
h <- catFileHandle
|
|
liftIO $ Git.CatFile.catObject h ref
|
|
|
|
catTree :: Git.Ref -> Annex [(FilePath, FileMode)]
|
|
catTree ref = do
|
|
h <- catFileHandle
|
|
liftIO $ Git.CatFile.catTree h ref
|
|
|
|
catObjectDetails :: Git.Ref -> Annex (Maybe (L.ByteString, Sha, ObjectType))
|
|
catObjectDetails ref = do
|
|
h <- catFileHandle
|
|
liftIO $ Git.CatFile.catObjectDetails h ref
|
|
|
|
{- There can be multiple index files, and a different cat-file is needed
|
|
- for each. This is selected by setting GIT_INDEX_FILE in the gitEnv. -}
|
|
catFileHandle :: Annex Git.CatFile.CatFileHandle
|
|
catFileHandle = do
|
|
m <- Annex.getState Annex.catfilehandles
|
|
indexfile <- fromMaybe "" . maybe Nothing (lookup "GIT_INDEX_FILE")
|
|
<$> fromRepo gitEnv
|
|
case M.lookup indexfile m of
|
|
Just h -> return h
|
|
Nothing -> do
|
|
h <- inRepo Git.CatFile.catFileStart
|
|
let m' = M.insert indexfile h m
|
|
Annex.changeState $ \s -> s { Annex.catfilehandles = m' }
|
|
return h
|
|
|
|
{- Stops all running cat-files. Should only be run when it's known that
|
|
- nothing is using the handles, eg at shutdown. -}
|
|
catFileStop :: Annex ()
|
|
catFileStop = do
|
|
m <- Annex.withState $ \s ->
|
|
(s { Annex.catfilehandles = M.empty }, Annex.catfilehandles s)
|
|
liftIO $ mapM_ Git.CatFile.catFileStop (M.elems m)
|
|
|
|
{- From ref to a symlink or a pointer file, get the key. -}
|
|
catKey :: Ref -> Annex (Maybe Key)
|
|
catKey ref = parsePointer . fromInternalGitPath . decodeBS . L.take maxsz
|
|
<$> catObject ref
|
|
where
|
|
-- Want to avoid buffering really big files in git into memory.
|
|
-- 8192 bytes is plenty for a pointer to a key.
|
|
-- Pad some more to allow for any pointer files that might have
|
|
-- lines after the key explaining what the file is used for.
|
|
maxsz = 81920
|
|
|
|
{- Only look at the first line of a pointer file. -}
|
|
parsePointer :: String -> Maybe Key
|
|
parsePointer s = headMaybe (lines s) >>= go
|
|
where
|
|
go l
|
|
| isLinkToAnnex l = file2key $ takeFileName l
|
|
| otherwise = Nothing
|
|
|
|
{- Gets a symlink target. -}
|
|
catSymLinkTarget :: Sha -> Annex String
|
|
catSymLinkTarget sha = fromInternalGitPath . decodeBS <$> get
|
|
where
|
|
-- Avoid buffering the whole file content, which might be large.
|
|
-- 8192 is enough if it really is a symlink or pointer file.
|
|
get = L.take 8192 <$> catObject sha
|
|
|
|
{- From a file in the repository back to the key.
|
|
-
|
|
- Ideally, this should reflect the key that's staged in the index,
|
|
- not the key that's committed to HEAD. Unfortunately, git cat-file
|
|
- does not refresh the index file after it's started up, so things
|
|
- newly staged in the index won't show up. It does, however, notice
|
|
- when branches change.
|
|
-
|
|
- For command-line git-annex use, that doesn't matter. It's perfectly
|
|
- reasonable for things staged in the index after the currently running
|
|
- git-annex process to not be noticed by it. However, we do want to see
|
|
- what's in the index, since it may have uncommitted changes not in HEAD
|
|
-
|
|
- For the assistant, this is much more of a problem, since it commits
|
|
- files and then needs to be able to immediately look up their keys.
|
|
- OTOH, the assistant doesn't keep changes staged in the index for very
|
|
- long at all before committing them -- and it won't look at the keys
|
|
- of files until after committing them.
|
|
-
|
|
- So, this gets info from the index, unless running as a daemon.
|
|
-}
|
|
catKeyFile :: FilePath -> Annex (Maybe Key)
|
|
catKeyFile f = ifM (Annex.getState Annex.daemon)
|
|
( catKeyFileHEAD f
|
|
, catKey $ Git.Ref.fileRef f
|
|
)
|
|
|
|
catKeyFileHEAD :: FilePath -> Annex (Maybe Key)
|
|
catKeyFileHEAD f = catKey $ Git.Ref.fileFromRef Git.Ref.headRef f
|