use ByteStrings when reading annex symlinks and pointers
Now there's a ByteString used all the way from disk to Key. The main complication in this conversion was the use of fromInternalGitPath in several places to munge things on Windows. The things that used that were changed to parse the ByteString using either path separator. Also some code that had read from files to a String lazily was changed to read a minimal strict ByteString.
This commit is contained in:
parent
0a8d93cb8a
commit
5d98cba923
10 changed files with 128 additions and 78 deletions
|
@ -105,7 +105,7 @@ catKey ref = go =<< catObjectMetaData ref
|
|||
-- Avoid catting large files, that cannot be symlinks or
|
||||
-- pointer files, which would require buffering their
|
||||
-- content in memory, as well as a lot of IO.
|
||||
| sz <= maxPointerSz = parseLinkOrPointer <$> catObject ref
|
||||
| sz <= maxPointerSz = parseLinkTargetOrPointer . L.toStrict <$> catObject ref
|
||||
go _ = return Nothing
|
||||
|
||||
{- Gets a symlink target. -}
|
||||
|
|
133
Annex/Link.hs
133
Annex/Link.hs
|
@ -7,7 +7,7 @@
|
|||
-
|
||||
- Pointer files are used instead of symlinks for unlocked files.
|
||||
-
|
||||
- Copyright 2013-2018 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -33,14 +33,17 @@ import Utility.FileMode
|
|||
import Utility.InodeCache
|
||||
import Utility.Tmp.Dir
|
||||
import Utility.CopyFile
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Char8 as S8
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
type LinkTarget = String
|
||||
|
||||
{- Checks if a file is a link to a key. -}
|
||||
isAnnexLink :: FilePath -> Annex (Maybe Key)
|
||||
isAnnexLink file = maybe Nothing (fileKey . takeFileName) <$> getAnnexLinkTarget file
|
||||
isAnnexLink file = maybe Nothing parseLinkTargetOrPointer <$> getAnnexLinkTarget file
|
||||
|
||||
{- Gets the link target of a symlink.
|
||||
-
|
||||
|
@ -50,40 +53,42 @@ isAnnexLink file = maybe Nothing (fileKey . takeFileName) <$> getAnnexLinkTarget
|
|||
- Returns Nothing if the file is not a symlink, or not a link to annex
|
||||
- content.
|
||||
-}
|
||||
getAnnexLinkTarget :: FilePath -> Annex (Maybe LinkTarget)
|
||||
getAnnexLinkTarget :: FilePath -> Annex (Maybe S.ByteString)
|
||||
getAnnexLinkTarget f = getAnnexLinkTarget' f
|
||||
=<< (coreSymlinks <$> Annex.getGitConfig)
|
||||
|
||||
{- Pass False to force looking inside file. -}
|
||||
getAnnexLinkTarget' :: FilePath -> Bool -> Annex (Maybe LinkTarget)
|
||||
{- Pass False to force looking inside file, for when git checks out
|
||||
- symlinks as plain files. -}
|
||||
getAnnexLinkTarget' :: FilePath -> Bool -> Annex (Maybe S.ByteString)
|
||||
getAnnexLinkTarget' file coresymlinks = if coresymlinks
|
||||
then check readSymbolicLink $
|
||||
then check probesymlink $
|
||||
return Nothing
|
||||
else check readSymbolicLink $
|
||||
else check probesymlink $
|
||||
check probefilecontent $
|
||||
return Nothing
|
||||
where
|
||||
check getlinktarget fallback =
|
||||
liftIO (catchMaybeIO $ getlinktarget file) >>= \case
|
||||
liftIO (catchMaybeIO getlinktarget) >>= \case
|
||||
Just l
|
||||
| isLinkToAnnex (fromInternalGitPath l) -> return (Just l)
|
||||
| isLinkToAnnex l -> return (Just l)
|
||||
| otherwise -> return Nothing
|
||||
Nothing -> fallback
|
||||
|
||||
probefilecontent f = withFile f ReadMode $ \h -> do
|
||||
-- The first 8k is more than enough to read; link
|
||||
-- files are small.
|
||||
s <- take 8192 <$> hGetContents h
|
||||
-- If we got the full 8k, the file is too large
|
||||
if length s == 8192
|
||||
then return ""
|
||||
probesymlink = R.readSymbolicLink $ toRawFilePath file
|
||||
|
||||
probefilecontent = withFile file ReadMode $ \h -> do
|
||||
s <- S.hGet h unpaddedMaxPointerSz
|
||||
-- If we got the full amount, the file is too large
|
||||
-- to be a symlink target.
|
||||
return $ if S.length s == unpaddedMaxPointerSz
|
||||
then mempty
|
||||
else
|
||||
-- If there are any NUL or newline
|
||||
-- characters, or whitespace, we
|
||||
-- certianly don't have a link to a
|
||||
-- certianly don't have a symlink to a
|
||||
-- git-annex key.
|
||||
return $ if any (`elem` s) "\0\n\r \t"
|
||||
then ""
|
||||
if any (`S8.elem` s) "\0\n\r \t"
|
||||
then mempty
|
||||
else s
|
||||
|
||||
makeAnnexLink :: LinkTarget -> FilePath -> Annex ()
|
||||
|
@ -122,7 +127,7 @@ stageSymlink file sha =
|
|||
|
||||
{- Injects a pointer file content into git, returning its Sha. -}
|
||||
hashPointerFile :: Key -> Annex Sha
|
||||
hashPointerFile key = hashBlob $ encodeBS $ formatPointer key
|
||||
hashPointerFile key = hashBlob $ formatPointer key
|
||||
|
||||
{- Stages a pointer file, using a Sha of its content -}
|
||||
stagePointerFile :: FilePath -> Maybe FileMode -> Sha -> Annex ()
|
||||
|
@ -136,7 +141,7 @@ stagePointerFile file mode sha =
|
|||
|
||||
writePointerFile :: FilePath -> Key -> Maybe FileMode -> IO ()
|
||||
writePointerFile file k mode = do
|
||||
writeFile file (formatPointer k)
|
||||
S.writeFile file (formatPointer k)
|
||||
maybe noop (setFileMode file) mode
|
||||
|
||||
newtype Restage = Restage Bool
|
||||
|
@ -222,49 +227,58 @@ unableToRestage mf = unwords
|
|||
, "git update-index -q --refresh " ++ fromMaybe "<file>" mf
|
||||
]
|
||||
|
||||
{- Parses a symlink target or a pointer file to a Key.
|
||||
- Only looks at the first line, as pointer files can have subsequent
|
||||
- lines. -}
|
||||
parseLinkOrPointer :: L.ByteString -> Maybe Key
|
||||
parseLinkOrPointer = parseLinkOrPointer'
|
||||
. decodeBL . L.take (fromIntegral maxPointerSz)
|
||||
{- Parses a symlink target or a pointer file to a Key. -}
|
||||
parseLinkTargetOrPointer :: S.ByteString -> Maybe Key
|
||||
parseLinkTargetOrPointer = parseLinkTarget . S8.takeWhile (not . lineend)
|
||||
where
|
||||
|
||||
{- Want to avoid buffering really big files in git into
|
||||
- memory when reading files that may be pointers.
|
||||
-
|
||||
- 8192 bytes is plenty for a pointer to a key.
|
||||
- Pad some more to allow for any pointer files that might have
|
||||
- lines after the key explaining what the file is used for. -}
|
||||
maxPointerSz :: Integer
|
||||
maxPointerSz = 81920
|
||||
|
||||
parseLinkOrPointer' :: String -> Maybe Key
|
||||
parseLinkOrPointer' = go . fromInternalGitPath . takeWhile (not . lineend)
|
||||
where
|
||||
go l
|
||||
| isLinkToAnnex l = fileKey $ takeFileName l
|
||||
| otherwise = Nothing
|
||||
lineend '\n' = True
|
||||
lineend '\r' = True
|
||||
lineend _ = False
|
||||
|
||||
formatPointer :: Key -> String
|
||||
formatPointer k =
|
||||
toInternalGitPath (pathSeparator:objectDir </> keyFile k) ++ "\n"
|
||||
{- Avoid looking at more of the lazy ByteString than necessary since it
|
||||
- could be reading from a large file that is not a pointer file. -}
|
||||
parseLinkTargetOrPointerLazy :: L.ByteString -> Maybe Key
|
||||
parseLinkTargetOrPointerLazy b =
|
||||
let b' = L.take (fromIntegral maxPointerSz) b
|
||||
in parseLinkTargetOrPointer (L.toStrict b')
|
||||
|
||||
{- Parses a symlink target to a Key. -}
|
||||
parseLinkTarget :: S.ByteString -> Maybe Key
|
||||
parseLinkTarget l
|
||||
| isLinkToAnnex l = fileKey' $ snd $ S8.breakEnd pathsep l
|
||||
| otherwise = Nothing
|
||||
where
|
||||
pathsep '/' = True
|
||||
#ifdef mingw32_HOST_OS
|
||||
pathsep '\\' = True
|
||||
#endif
|
||||
pathsep _ = False
|
||||
|
||||
formatPointer :: Key -> S.ByteString
|
||||
formatPointer k = prefix <> keyFile' k <> nl
|
||||
where
|
||||
prefix = toRawFilePath $ toInternalGitPath (pathSeparator:objectDir)
|
||||
nl = S8.singleton '\n'
|
||||
|
||||
{- Maximum size of a file that could be a pointer to a key.
|
||||
- Check to avoid buffering really big files in git into
|
||||
- memory when reading files that may be pointers.
|
||||
-
|
||||
- 8192 bytes is plenty for a pointer to a key. This adds some additional
|
||||
- padding to allow for any pointer files that might have
|
||||
- lines after the key explaining what the file is used for. -}
|
||||
maxPointerSz :: Integer
|
||||
maxPointerSz = 81920
|
||||
|
||||
unpaddedMaxPointerSz :: Int
|
||||
unpaddedMaxPointerSz = 8192
|
||||
|
||||
{- Checks if a worktree file is a pointer to a key.
|
||||
-
|
||||
- Unlocked files whose content is present are not detected by this. -}
|
||||
isPointerFile :: FilePath -> IO (Maybe Key)
|
||||
isPointerFile f = catchDefaultIO Nothing $ bracket open close $ \h -> do
|
||||
b <- take (fromIntegral maxPointerSz) <$> hGetContents h
|
||||
-- strict so it reads before the file handle is closed
|
||||
let !mk = parseLinkOrPointer' b
|
||||
return mk
|
||||
where
|
||||
open = openBinaryFile f ReadMode
|
||||
close = hClose
|
||||
isPointerFile f = catchDefaultIO Nothing $ withFile f ReadMode $ \h ->
|
||||
parseLinkTargetOrPointer <$> S.hGet h unpaddedMaxPointerSz
|
||||
|
||||
{- Checks a symlink target or pointer file first line to see if it
|
||||
- appears to point to annexed content.
|
||||
|
@ -273,10 +287,15 @@ isPointerFile f = catchDefaultIO Nothing $ bracket open close $ \h -> do
|
|||
- directory itself, because GIT_DIR may cause a directory name other
|
||||
- than .git to be used.
|
||||
-}
|
||||
isLinkToAnnex :: FilePath -> Bool
|
||||
isLinkToAnnex s = (pathSeparator:objectDir) `isInfixOf` s
|
||||
isLinkToAnnex :: S.ByteString -> Bool
|
||||
isLinkToAnnex s = p `S.isInfixOf` s
|
||||
#ifdef mingw32_HOST_OS
|
||||
-- '/' is still used inside pointer files on Windows, not the native
|
||||
-- '\'
|
||||
|| ('/':objectDir) `isInfixOf` s
|
||||
|| p' `S.isInfixOf` s
|
||||
#endif
|
||||
where
|
||||
p = toRawFilePath (pathSeparator:objectDir)
|
||||
#ifdef mingw32_HOST_OS
|
||||
p' = toRawFilePath ('/':objectDir)
|
||||
#endif
|
||||
|
|
|
@ -302,11 +302,11 @@ onAddUnlocked' isdirect contentchanged addassociatedfile addlink samefilestatus
|
|||
case linktarget of
|
||||
Nothing -> a
|
||||
Just lt -> do
|
||||
case fileKey $ takeFileName lt of
|
||||
case parseLinkTarget lt of
|
||||
Nothing -> noop
|
||||
Just key -> liftAnnex $
|
||||
addassociatedfile key file
|
||||
onAddSymlink' linktarget mk isdirect file fs
|
||||
onAddSymlink' (Just $ fromRawFilePath lt) mk isdirect file fs
|
||||
|
||||
{- A symlink might be an arbitrary symlink, which is just added.
|
||||
- Or, if it is a git-annex symlink, ensure it points to the content
|
||||
|
|
|
@ -88,7 +88,7 @@ fixupReq req@(Req {}) =
|
|||
check getfile getmode setfile r = case readTreeItemType (getmode r) of
|
||||
Just TreeSymlink -> do
|
||||
v <- getAnnexLinkTarget' (getfile r) False
|
||||
case fileKey . takeFileName =<< v of
|
||||
case parseLinkTargetOrPointer =<< v of
|
||||
Nothing -> return r
|
||||
Just k -> setfile r <$>
|
||||
withObjectLoc k
|
||||
|
|
|
@ -205,8 +205,7 @@ performKey key backend numcopies = do
|
|||
check :: [Annex Bool] -> Annex Bool
|
||||
check cs = and <$> sequence cs
|
||||
|
||||
{- Checks that symlinks points correctly to the annexed content.
|
||||
-}
|
||||
{- Checks that symlinks points correctly to the annexed content. -}
|
||||
fixLink :: Key -> FilePath -> Annex Bool
|
||||
fixLink key file = do
|
||||
want <- calcRepo $ gitAnnexLink file key
|
||||
|
@ -215,7 +214,7 @@ fixLink key file = do
|
|||
return True
|
||||
where
|
||||
go want have
|
||||
| want /= fromInternalGitPath have = do
|
||||
| want /= fromInternalGitPath (fromRawFilePath have) = do
|
||||
showNote "fixing link"
|
||||
liftIO $ createDirectoryIfMissing True (parentDir file)
|
||||
liftIO $ removeFile file
|
||||
|
@ -562,7 +561,7 @@ badContentDirect file key = do
|
|||
badContentRemote :: Remote -> FilePath -> Key -> Annex String
|
||||
badContentRemote remote localcopy key = do
|
||||
bad <- fromRepo gitAnnexBadDir
|
||||
let destbad = bad </> fileKey key
|
||||
let destbad = bad </> keyFile key
|
||||
movedbad <- ifM (inAnnex key <||> liftIO (doesFileExist destbad))
|
||||
( return False
|
||||
, do
|
||||
|
|
|
@ -22,7 +22,8 @@ import qualified Git
|
|||
import qualified Git.Ref
|
||||
import Backend
|
||||
|
||||
import qualified Data.ByteString.Lazy as B
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
cmd :: Command
|
||||
cmd = noCommit $ noMessages $
|
||||
|
@ -62,14 +63,14 @@ seek UpdateOption = commandAction update
|
|||
-- smudge filter in memory, which is a problem with large files.
|
||||
smudge :: FilePath -> CommandStart
|
||||
smudge file = do
|
||||
b <- liftIO $ B.hGetContents stdin
|
||||
case parseLinkOrPointer b of
|
||||
b <- liftIO $ L.hGetContents stdin
|
||||
case parseLinkTargetOrPointerLazy b of
|
||||
Nothing -> noop
|
||||
Just k -> do
|
||||
topfile <- inRepo (toTopFilePath file)
|
||||
Database.Keys.addAssociatedFile k topfile
|
||||
void $ smudgeLog k topfile
|
||||
liftIO $ B.putStr b
|
||||
liftIO $ L.putStr b
|
||||
stop
|
||||
|
||||
-- Clean filter is fed file content on stdin, decides if a file
|
||||
|
@ -77,13 +78,13 @@ smudge file = do
|
|||
-- injested content if so. Otherwise, the original content.
|
||||
clean :: FilePath -> CommandStart
|
||||
clean file = do
|
||||
b <- liftIO $ B.hGetContents stdin
|
||||
b <- liftIO $ L.hGetContents stdin
|
||||
ifM fileoutsiderepo
|
||||
( liftIO $ B.hPut stdout b
|
||||
, case parseLinkOrPointer b of
|
||||
( liftIO $ L.hPut stdout b
|
||||
, case parseLinkTargetOrPointerLazy b of
|
||||
Just k -> do
|
||||
getMoveRaceRecovery k file
|
||||
liftIO $ B.hPut stdout b
|
||||
liftIO $ L.hPut stdout b
|
||||
Nothing -> go b =<< catKeyFile file
|
||||
)
|
||||
stop
|
||||
|
@ -97,7 +98,7 @@ clean file = do
|
|||
-- to free memory when sending the file, so the
|
||||
-- less we let it send, the less memory it will waste.)
|
||||
if Git.BuildVersion.older "2.5"
|
||||
then B.length b `seq` return ()
|
||||
then L.length b `seq` return ()
|
||||
else liftIO $ hClose stdin
|
||||
|
||||
-- Optimization for the case when the file is already
|
||||
|
@ -108,7 +109,7 @@ clean file = do
|
|||
( liftIO $ emitPointer ko
|
||||
, doingest oldkey
|
||||
)
|
||||
, liftIO $ B.hPut stdout b
|
||||
, liftIO $ L.hPut stdout b
|
||||
)
|
||||
|
||||
doingest oldkey = do
|
||||
|
@ -158,7 +159,7 @@ shouldAnnex file moldkey = do
|
|||
Nothing -> isNothing <$> catObjectMetaData (Git.Ref.fileRef file)
|
||||
|
||||
emitPointer :: Key -> IO ()
|
||||
emitPointer = putStr . formatPointer
|
||||
emitPointer = S.putStr . formatPointer
|
||||
|
||||
-- Recover from a previous race between eg git mv and git-annex get.
|
||||
-- That could result in the file remaining a pointer file, while
|
||||
|
|
|
@ -197,7 +197,7 @@ lookupFile1 file = do
|
|||
makekey l = case maybeLookupBackendVariety (keyVariety k) of
|
||||
Nothing -> do
|
||||
unless (null kname || null bname ||
|
||||
not (isLinkToAnnex l)) $
|
||||
not (isLinkToAnnex (toRawFilePath l))) $
|
||||
warning skip
|
||||
return Nothing
|
||||
Just backend -> return $ Just (k, backend)
|
||||
|
|
|
@ -29,6 +29,8 @@ import Git.Ref
|
|||
import Utility.InodeCache
|
||||
import Annex.AdjustedBranch
|
||||
|
||||
import qualified Data.ByteString as S
|
||||
|
||||
upgrade :: Bool -> Annex Bool
|
||||
upgrade automatic = do
|
||||
unless automatic $
|
||||
|
@ -117,7 +119,7 @@ upgradeDirectWorkTree = do
|
|||
void $ linkToAnnex k f ic
|
||||
writepointer f k = liftIO $ do
|
||||
nukeFile f
|
||||
writeFile f (formatPointer k)
|
||||
S.writeFile f (formatPointer k)
|
||||
|
||||
{- Remove all direct mode bookkeeping files. -}
|
||||
removeDirectCruft :: Annex ()
|
||||
|
|
28
Utility/RawFilePath.hs
Normal file
28
Utility/RawFilePath.hs
Normal file
|
@ -0,0 +1,28 @@
|
|||
{- Portability shim around System.Posix.Files.ByteString
|
||||
-
|
||||
- Copyright 2019 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- License: BSD-2-clause
|
||||
-}
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Utility.RawFilePath (
|
||||
RawFilePath,
|
||||
readSymbolicLink,
|
||||
) where
|
||||
|
||||
#ifndef mingw32_HOST_OS
|
||||
import System.Posix.Files.ByteString
|
||||
import System.Posix.ByteString.FilePath
|
||||
#else
|
||||
import qualified Data.ByteString as B
|
||||
import System.IO.Error
|
||||
|
||||
type RawFilePath = B.ByteString
|
||||
|
||||
readSymbolicLink :: RawFilePath -> IO RawFilePath
|
||||
readSymbolicLink _ = ioError $ mkIOError illegalOperationErrorType x Nothing Nothing
|
||||
where
|
||||
x = "Utility.RawFilePath.readSymbolicLink: not supported"
|
||||
#endif
|
|
@ -1057,6 +1057,7 @@ Executable git-annex
|
|||
Utility.Process.Shim
|
||||
Utility.Process.Transcript
|
||||
Utility.QuickCheck
|
||||
Utility.RawFilePath
|
||||
Utility.Rsync
|
||||
Utility.SafeCommand
|
||||
Utility.Scheduled
|
||||
|
|
Loading…
Reference in a new issue