refactor and improve pointer file handling code

This commit is contained in:
Joey Hess 2015-12-09 14:25:33 -04:00
parent 37c9026c6e
commit 78a6b8ce05
Failed to extract signature
5 changed files with 44 additions and 35 deletions

View file

@ -14,7 +14,6 @@ module Annex.CatFile (
catFileHandle, catFileHandle,
catFileStop, catFileStop,
catKey, catKey,
parsePointer,
catKeyFile, catKeyFile,
catKeyFileHEAD, catKeyFileHEAD,
catSymLinkTarget, catSymLinkTarget,
@ -31,7 +30,7 @@ import qualified Annex
import Git.Types import Git.Types
import Git.FilePath import Git.FilePath
import qualified Git.Ref import qualified Git.Ref
import Types.Key import Annex.Link
catFile :: Git.Branch -> FilePath -> Annex L.ByteString catFile :: Git.Branch -> FilePath -> Annex L.ByteString
catFile branch file = do catFile branch file = do
@ -83,22 +82,7 @@ catFileStop = do
{- From ref to a symlink or a pointer file, get the key. -} {- From ref to a symlink or a pointer file, get the key. -}
catKey :: Ref -> Annex (Maybe Key) catKey :: Ref -> Annex (Maybe Key)
catKey ref = parsePointer . fromInternalGitPath . decodeBS . L.take maxsz catKey ref = parseLinkOrPointer <$> catObject ref
<$> catObject ref
where
-- Want to avoid buffering really big files in git into memory.
-- 8192 bytes is plenty for a pointer to a key.
-- Pad some more to allow for any pointer files that might have
-- lines after the key explaining what the file is used for.
maxsz = 81920
{- Only look at the first line of a pointer file. -}
parsePointer :: String -> Maybe Key
parsePointer s = headMaybe (lines s) >>= go
where
go l
| isLinkToAnnex l = file2key $ takeFileName l
| otherwise = Nothing
{- Gets a symlink target. -} {- Gets a symlink target. -}
catSymLinkTarget :: Sha -> Annex String catSymLinkTarget :: Sha -> Annex String

View file

@ -29,11 +29,11 @@ import Types.TrustLevel
import Annex.Version import Annex.Version
import Annex.Difference import Annex.Difference
import Annex.UUID import Annex.UUID
import Annex.Link
import Config import Config
import Annex.Direct import Annex.Direct
import Annex.Content.Direct import Annex.Content.Direct
import Annex.Environment import Annex.Environment
import Backend
import Annex.Hook import Annex.Hook
import Upgrade import Upgrade
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS

View file

@ -5,7 +5,9 @@
- On other filesystems, git instead stores the symlink target in a regular - On other filesystems, git instead stores the symlink target in a regular
- file. - file.
- -
- Copyright 2013 Joey Hess <id@joeyh.name> - Pointer files are used instead of symlinks for unlocked files.
-
- Copyright 2013-2015 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -19,6 +21,9 @@ import qualified Git.UpdateIndex
import qualified Annex.Queue import qualified Annex.Queue
import Git.Types import Git.Types
import Git.FilePath import Git.FilePath
import Types.Key
import qualified Data.ByteString.Lazy as L
type LinkTarget = String type LinkTarget = String
@ -110,3 +115,32 @@ stageSymlink :: FilePath -> Sha -> Annex ()
stageSymlink file sha = stageSymlink file sha =
Annex.Queue.addUpdateIndex =<< Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.stageSymlink file sha) inRepo (Git.UpdateIndex.stageSymlink file sha)
{- Parses a symlink target or a pointer file to a Key.
- Only looks at the first line, as pointer files can have subsequent
- lines. -}
parseLinkOrPointer :: L.ByteString -> Maybe Key
parseLinkOrPointer = parseLinkOrPointer' . decodeBS . L.take maxsz
where
{- Want to avoid buffering really big files in git into
- memory when reading files that may be pointers.
-
- 8192 bytes is plenty for a pointer to a key.
- Pad some more to allow for any pointer files that might have
- lines after the key explaining what the file is used for. -}
maxsz = 81920
parseLinkOrPointer' :: String -> Maybe Key
parseLinkOrPointer' s = headMaybe (lines (fromInternalGitPath s)) >>= go
where
go l
| isLinkToAnnex l = file2key $ takeFileName l
| otherwise = Nothing
formatPointer :: Key -> String
formatPointer k = toInternalGitPath $ pathSeparator:objectDir </> key2file k
{- Checks if a file is a pointer to a key. -}
isPointerFile :: FilePath -> Annex (Maybe Key)
isPointerFile f = liftIO $ catchDefaultIO Nothing $
parseLinkOrPointer <$> L.readFile f

View file

@ -11,7 +11,6 @@ module Backend (
genKey, genKey,
lookupFile, lookupFile,
getBackend, getBackend,
isAnnexLink,
chooseBackend, chooseBackend,
lookupBackendName, lookupBackendName,
maybeLookupBackendName, maybeLookupBackendName,

View file

@ -9,16 +9,14 @@ module Command.Smudge where
import Common.Annex import Common.Annex
import Command import Command
import Types.Key
import Annex.Content import Annex.Content
import Annex.CatFile import Annex.Link
import Annex.MetaData import Annex.MetaData
import Annex.FileMatcher import Annex.FileMatcher
import Types.KeySource import Types.KeySource
import Backend import Backend
import Logs.Location import Logs.Location
import qualified Database.AssociatedFiles as AssociatedFiles import qualified Database.AssociatedFiles as AssociatedFiles
import Git.FilePath
import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy as B
@ -46,16 +44,13 @@ seek o = commandAction $
-- available annex object, should output its content. -- available annex object, should output its content.
smudge :: FilePath -> CommandStart smudge :: FilePath -> CommandStart
smudge file = do smudge file = do
liftIO $ fileEncoding stdin b <- liftIO $ B.hGetContents stdin
s <- liftIO $ hGetContents stdin case parseLinkOrPointer b of
case parsePointer s of Nothing -> liftIO $ B.putStr b
Nothing -> liftIO $ putStr s
Just k -> do Just k -> do
updateAssociatedFiles k file updateAssociatedFiles k file
content <- calcRepo (gitAnnexLocation k) content <- calcRepo (gitAnnexLocation k)
liftIO $ maybe liftIO $ B.hPut stdout . fromMaybe b
(putStr s)
(B.hPut stdout)
=<< catchMaybeIO (B.readFile content) =<< catchMaybeIO (B.readFile content)
stop stop
@ -102,11 +97,8 @@ ingest file = do
=<< liftIO (getFileStatus file) =<< liftIO (getFileStatus file)
return k return k
-- Could add a newline and some text explaining this file is a pointer.
-- parsePointer only looks at the first line.
emitPointer :: Key -> IO () emitPointer :: Key -> IO ()
emitPointer k = putStrLn $ toInternalGitPath $ emitPointer = putStrLn . formatPointer
pathSeparator:objectDir </> key2file k
updateAssociatedFiles :: Key -> FilePath -> Annex () updateAssociatedFiles :: Key -> FilePath -> Annex ()
updateAssociatedFiles k f = do updateAssociatedFiles k f = do