use filepath-bytestring for annex object manipulations

git-annex find is now RawFilePath end to end, no string conversions.
So is git-annex get when it does not need to get anything.
So this is a major milestone on optimisation.

Benchmarks indicate around 30% speedup in both commands.

Probably many other performance improvements. All or nearly all places
where a file is statted use RawFilePath now.
This commit is contained in:
Joey Hess 2019-12-11 14:12:22 -04:00
parent bdec7fed9c
commit c19211774f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
53 changed files with 324 additions and 234 deletions

View file

@ -89,17 +89,18 @@ import Annex.Content.LowLevel
import Annex.Content.PointerFile
import Annex.Concurrent
import Types.WorkerPool
import qualified Utility.RawFilePath as R
{- Checks if a given key's content is currently present. -}
inAnnex :: Key -> Annex Bool
inAnnex key = inAnnexCheck key $ liftIO . doesFileExist
inAnnex key = inAnnexCheck key $ liftIO . R.doesPathExist
{- Runs an arbitrary check on a key's content. -}
inAnnexCheck :: Key -> (FilePath -> Annex Bool) -> Annex Bool
inAnnexCheck :: Key -> (RawFilePath -> Annex Bool) -> Annex Bool
inAnnexCheck key check = inAnnex' id False check key
{- inAnnex that performs an arbitrary check of the key's content. -}
inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a
inAnnex' :: (a -> Bool) -> a -> (RawFilePath -> Annex a) -> Key -> Annex a
inAnnex' isgood bad check key = withObjectLoc key $ \loc -> do
r <- check loc
if isgood r
@ -120,12 +121,15 @@ inAnnex' isgood bad check key = withObjectLoc key $ \loc -> do
{- Like inAnnex, checks if the object file for a key exists,
- but there are no guarantees it has the right content. -}
objectFileExists :: Key -> Annex Bool
objectFileExists key = calcRepo (gitAnnexLocation key) >>= liftIO . doesFileExist
objectFileExists key =
calcRepo (gitAnnexLocation key)
>>= liftIO . R.doesPathExist
{- A safer check; the key's content must not only be present, but
- is not in the process of being removed. -}
inAnnexSafe :: Key -> Annex (Maybe Bool)
inAnnexSafe key = inAnnex' (fromMaybe True) (Just False) go key
inAnnexSafe key =
inAnnex' (fromMaybe True) (Just False) (go . fromRawFilePath) key
where
is_locked = Nothing
is_unlocked = Just True
@ -246,7 +250,7 @@ winLocker _ _ Nothing = return Nothing
lockContentUsing :: ContentLocker -> Key -> Annex a -> Annex a
lockContentUsing locker key a = do
contentfile <- calcRepo $ gitAnnexLocation key
contentfile <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
lockfile <- contentLockFile key
bracket
(lock contentfile lockfile)
@ -474,18 +478,20 @@ moveAnnex key src = ifM (checkSecureHashes key)
, return False
)
where
storeobject dest = ifM (liftIO $ doesFileExist dest)
storeobject dest = ifM (liftIO $ R.doesPathExist dest)
( alreadyhave
, modifyContent dest $ do
, modifyContent dest' $ do
freezeContent src
liftIO $ moveFile src dest
liftIO $ moveFile src dest'
g <- Annex.gitRepo
fs <- map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
unless (null fs) $ do
ics <- mapM (populatePointerFile (Restage True) key (toRawFilePath dest)) fs
ics <- mapM (populatePointerFile (Restage True) key dest) fs
Database.Keys.storeInodeCaches' key [dest] (catMaybes ics)
)
where
dest' = fromRawFilePath dest
alreadyhave = liftIO $ removeFile src
checkSecureHashes :: Key -> Annex Bool
@ -505,7 +511,7 @@ data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
linkToAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult
linkToAnnex key src srcic = ifM (checkSecureHashes key)
( do
dest <- calcRepo (gitAnnexLocation key)
dest <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
modifyContent dest $ linkAnnex To key src srcic dest Nothing
, return LinkAnnexFailed
)
@ -515,7 +521,7 @@ linkFromAnnex :: Key -> FilePath -> Maybe FileMode -> Annex LinkAnnexResult
linkFromAnnex key dest destmode = do
src <- calcRepo (gitAnnexLocation key)
srcic <- withTSDelta (liftIO . genInodeCache src)
linkAnnex From key src srcic dest destmode
linkAnnex From key (fromRawFilePath src) srcic dest destmode
data FromTo = From | To
@ -534,7 +540,7 @@ data FromTo = From | To
linkAnnex :: FromTo -> Key -> FilePath -> Maybe InodeCache -> FilePath -> Maybe FileMode -> Annex LinkAnnexResult
linkAnnex _ _ _ Nothing _ _ = return LinkAnnexFailed
linkAnnex fromto key src (Just srcic) dest destmode =
withTSDelta (liftIO . genInodeCache dest) >>= \case
withTSDelta (liftIO . genInodeCache dest') >>= \case
Just destic -> do
cs <- Database.Keys.getInodeCaches key
if null cs
@ -551,12 +557,13 @@ linkAnnex fromto key src (Just srcic) dest destmode =
Linked -> noop
checksrcunchanged
where
dest' = toRawFilePath dest
failed = do
Database.Keys.addInodeCaches key [srcic]
return LinkAnnexFailed
checksrcunchanged = withTSDelta (liftIO . genInodeCache src) >>= \case
checksrcunchanged = withTSDelta (liftIO . genInodeCache (toRawFilePath src)) >>= \case
Just srcic' | compareStrong srcic srcic' -> do
destic <- withTSDelta (liftIO . genInodeCache dest)
destic <- withTSDelta (liftIO . genInodeCache dest')
Database.Keys.addInodeCaches key $
catMaybes [destic, Just srcic]
return LinkAnnexOk
@ -567,7 +574,7 @@ linkAnnex fromto key src (Just srcic) dest destmode =
{- Removes the annex object file for a key. Lowlevel. -}
unlinkAnnex :: Key -> Annex ()
unlinkAnnex key = do
obj <- calcRepo $ gitAnnexLocation key
obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
modifyContent obj $ do
secureErase obj
liftIO $ nukeFile obj
@ -616,15 +623,15 @@ prepSendAnnex key = withObjectLoc key $ \f -> do
else pure cache
return $ if null cache'
then Nothing
else Just (f, sameInodeCache f cache')
else Just (fromRawFilePath f, sameInodeCache f cache')
{- Performs an action, passing it the location to use for a key's content. -}
withObjectLoc :: Key -> (FilePath -> Annex a) -> Annex a
withObjectLoc :: Key -> (RawFilePath -> Annex a) -> Annex a
withObjectLoc key a = a =<< calcRepo (gitAnnexLocation key)
cleanObjectLoc :: Key -> Annex () -> Annex ()
cleanObjectLoc key cleaner = do
file <- calcRepo $ gitAnnexLocation key
file <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
void $ tryIO $ thawContentDir file
cleaner
liftIO $ removeparents file (3 :: Int)
@ -640,22 +647,23 @@ cleanObjectLoc key cleaner = do
removeAnnex :: ContentRemovalLock -> Annex ()
removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file ->
cleanObjectLoc key $ do
secureErase file
liftIO $ nukeFile file
let file' = fromRawFilePath file
secureErase file'
liftIO $ nukeFile file'
g <- Annex.gitRepo
mapM_ (\f -> void $ tryIO $ resetpointer $ fromRawFilePath $ fromTopFilePath f g)
mapM_ (\f -> void $ tryIO $ resetpointer $ fromTopFilePath f g)
=<< Database.Keys.getAssociatedFiles key
Database.Keys.removeInodeCaches key
where
-- Check associated pointer file for modifications, and reset if
-- it's unmodified.
resetpointer file = ifM (isUnmodified key file)
( depopulatePointerFile key (toRawFilePath file)
( depopulatePointerFile key file
-- Modified file, so leave it alone.
-- If it was a hard link to the annex object,
-- that object might have been frozen as part of the
-- removal process, so thaw it.
, void $ tryIO $ thawContent file
, void $ tryIO $ thawContent $ fromRawFilePath file
)
{- Check if a file contains the unmodified content of the key.
@ -663,12 +671,12 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file ->
- The expensive way to tell is to do a verification of its content.
- The cheaper way is to see if the InodeCache for the key matches the
- file. -}
isUnmodified :: Key -> FilePath -> Annex Bool
isUnmodified :: Key -> RawFilePath -> Annex Bool
isUnmodified key f = go =<< geti
where
go Nothing = return False
go (Just fc) = isUnmodifiedCheap' key fc <||> expensivecheck fc
expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f)
expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key (fromRawFilePath f))
( do
-- The file could have been modified while it was
-- being verified. Detect that.
@ -691,7 +699,7 @@ isUnmodified key f = go =<< geti
- this may report a false positive when repeated edits are made to a file
- within a small time window (eg 1 second).
-}
isUnmodifiedCheap :: Key -> FilePath -> Annex Bool
isUnmodifiedCheap :: Key -> RawFilePath -> Annex Bool
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)
=<< withTSDelta (liftIO . genInodeCache f)
@ -703,7 +711,7 @@ isUnmodifiedCheap' key fc =
- returns the file it was moved to. -}
moveBad :: Key -> Annex FilePath
moveBad key = do
src <- calcRepo $ gitAnnexLocation key
src <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
bad <- fromRepo gitAnnexBadDir
let dest = bad </> takeFileName src
createAnnexDirectory (parentDir dest)
@ -791,7 +799,7 @@ preseedTmp key file = go =<< inAnnex key
copy = ifM (liftIO $ doesFileExist file)
( return True
, do
s <- calcRepo $ gitAnnexLocation key
s <- fromRawFilePath <$> (calcRepo $ gitAnnexLocation key)
liftIO $ ifM (doesFileExist s)
( copyFileExternal CopyTimeStamps s file
, return False