use filepath-bytestring for annex object manipulations

git-annex find is now RawFilePath end to end, no string conversions.
So is git-annex get when it does not need to get anything.
So this is a major milestone on optimisation.

Benchmarks indicate around 30% speedup in both commands.

Probably many other performance improvements. All or nearly all places
where a file is statted use RawFilePath now.
This commit is contained in:
Joey Hess 2019-12-11 14:12:22 -04:00
parent bdec7fed9c
commit c19211774f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
53 changed files with 324 additions and 234 deletions

View file

@ -9,6 +9,9 @@ module Command.ContentLocation where
import Command
import Annex.Content
import qualified Utility.RawFilePath as R
import qualified Data.ByteString.Char8 as B8
cmd :: Command
cmd = noCommit $ noMessages $
@ -20,10 +23,10 @@ cmd = noCommit $ noMessages $
run :: () -> String -> Annex Bool
run _ p = do
let k = fromMaybe (giveup "bad key") $ deserializeKey p
maybe (return False) (\f -> liftIO (putStrLn f) >> return True)
maybe (return False) (\f -> liftIO (B8.putStrLn f) >> return True)
=<< inAnnex' (pure True) Nothing check k
where
check f = ifM (liftIO (doesFileExist f))
check f = ifM (liftIO (R.doesPathExist f))
( return (Just f)
, return Nothing
)

View file

@ -90,7 +90,8 @@ fixupReq req@(Req {}) =
v <- getAnnexLinkTarget' (toRawFilePath (getfile r)) False
case parseLinkTargetOrPointer =<< v of
Nothing -> return r
Just k -> withObjectLoc k (pure . setfile r)
Just k -> withObjectLoc k $
pure . setfile r . fromRawFilePath
_ -> return r
externalDiffer :: String -> [String] -> Differ

View file

@ -93,8 +93,8 @@ keyVars key =
, ("bytesize", size show)
, ("humansize", size $ roughSize storageUnits True)
, ("keyname", decodeBS $ fromKey keyName key)
, ("hashdirlower", hashDirLower def key)
, ("hashdirmixed", hashDirMixed def key)
, ("hashdirlower", fromRawFilePath $ hashDirLower def key)
, ("hashdirmixed", fromRawFilePath $ hashDirMixed def key)
, ("mtime", whenavail show $ fromKey keyMtime key)
]
where

View file

@ -53,11 +53,11 @@ start fixwhat file key = do
where
fixby = starting "fix" (mkActionItem (key, file))
fixthin = do
obj <- calcRepo $ gitAnnexLocation key
stopUnless (isUnmodified key (fromRawFilePath file) <&&> isUnmodified key obj) $ do
obj <- calcRepo (gitAnnexLocation key)
stopUnless (isUnmodified key file <&&> isUnmodified key obj) $ do
thin <- annexThin <$> Annex.getGitConfig
fs <- liftIO $ catchMaybeIO $ R.getFileStatus file
os <- liftIO $ catchMaybeIO $ getFileStatus obj
os <- liftIO $ catchMaybeIO $ R.getFileStatus obj
case (linkCount <$> fs, linkCount <$> os, thin) of
(Just 1, Just 1, True) ->
fixby $ makeHardLink file key
@ -65,15 +65,16 @@ start fixwhat file key = do
fixby $ breakHardLink file key obj
_ -> stop
breakHardLink :: RawFilePath -> Key -> FilePath -> CommandPerform
breakHardLink :: RawFilePath -> Key -> RawFilePath -> CommandPerform
breakHardLink file key obj = do
replaceFile (fromRawFilePath file) $ \tmp -> do
mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus file
unlessM (checkedCopyFile key obj tmp mode) $
let obj' = fromRawFilePath obj
unlessM (checkedCopyFile key obj' tmp mode) $
error "unable to break hard link"
thawContent tmp
modifyContent obj $ freezeContent obj
Database.Keys.storeInodeCaches key [fromRawFilePath file]
modifyContent obj' $ freezeContent obj'
Database.Keys.storeInodeCaches key [file]
next $ return True
makeHardLink :: RawFilePath -> Key -> CommandPerform

View file

@ -223,7 +223,7 @@ fixLink key file = do
- in this repository only. -}
verifyLocationLog :: Key -> KeyStatus -> ActionItem -> Annex Bool
verifyLocationLog key keystatus ai = do
obj <- calcRepo $ gitAnnexLocation key
obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
present <- if isKeyUnlockedThin keystatus
then liftIO (doesFileExist obj)
else inAnnex key
@ -332,11 +332,11 @@ verifyWorkTree key file = do
ifM (annexThin <$> Annex.getGitConfig)
( void $ linkFromAnnex key tmp mode
, do
obj <- calcRepo $ gitAnnexLocation key
obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
void $ checkedCopyFile key obj tmp mode
thawContent tmp
)
Database.Keys.storeInodeCaches key [fromRawFilePath file]
Database.Keys.storeInodeCaches key [file]
_ -> return ()
return True
@ -349,8 +349,8 @@ checkKeySize :: Key -> KeyStatus -> ActionItem -> Annex Bool
checkKeySize _ KeyUnlockedThin _ = return True
checkKeySize key _ ai = do
file <- calcRepo $ gitAnnexLocation key
ifM (liftIO $ doesFileExist file)
( checkKeySizeOr badContent key file ai
ifM (liftIO $ R.doesPathExist file)
( checkKeySizeOr badContent key (fromRawFilePath file) ai
, return True
)
@ -417,10 +417,10 @@ checkKeyUpgrade _ _ _ (AssociatedFile Nothing) =
-}
checkBackend :: Backend -> Key -> KeyStatus -> AssociatedFile -> Annex Bool
checkBackend backend key keystatus afile = do
content <- calcRepo $ gitAnnexLocation key
content <- calcRepo (gitAnnexLocation key)
ifM (pure (isKeyUnlockedThin keystatus) <&&> (not <$> isUnmodified key content))
( nocheck
, checkBackendOr badContent backend key content ai
, checkBackendOr badContent backend key (fromRawFilePath content) ai
)
where
nocheck = return True
@ -670,8 +670,8 @@ isKeyUnlockedThin KeyMissing = False
getKeyStatus :: Key -> Annex KeyStatus
getKeyStatus key = catchDefaultIO KeyMissing $ do
afs <- not . null <$> Database.Keys.getAssociatedFiles key
obj <- calcRepo $ gitAnnexLocation key
multilink <- ((> 1) . linkCount <$> liftIO (getFileStatus obj))
obj <- calcRepo (gitAnnexLocation key)
multilink <- ((> 1) . linkCount <$> liftIO (R.getFileStatus obj))
return $ if multilink && afs
then KeyUnlockedThin
else KeyPresent

View file

@ -181,7 +181,7 @@ startLocal largematcher mode (srcfile, destfile) =
-- weakly the same as the origianlly locked down file's
-- inode cache. (Since the file may have been copied,
-- its inodes may not be the same.)
newcache <- withTSDelta $ liftIO . genInodeCache destfile
newcache <- withTSDelta $ liftIO . genInodeCache (toRawFilePath destfile)
let unchanged = case (newcache, inodeCache (keySource ld)) of
(_, Nothing) -> True
(Just newc, Just c) | compareWeak c newc -> True

View file

@ -20,6 +20,7 @@ import qualified Database.Keys
import Annex.Ingest
import Logs.Location
import Git.FilePath
import qualified Utility.RawFilePath as R
cmd :: Command
cmd = withGlobalOptions [jsonOptions, annexedMatchingOptions] $
@ -43,7 +44,7 @@ startNew file key = ifM (isJust <$> isAnnexLink file)
| key' == key = cont
| otherwise = errorModified
go Nothing =
ifM (isUnmodified key (fromRawFilePath file))
ifM (isUnmodified key file)
( cont
, ifM (Annex.getState Annex.force)
( cont
@ -56,37 +57,38 @@ performNew :: RawFilePath -> Key -> CommandPerform
performNew file key = do
lockdown =<< calcRepo (gitAnnexLocation key)
addLink (fromRawFilePath file) key
=<< withTSDelta (liftIO . genInodeCache' file)
=<< withTSDelta (liftIO . genInodeCache file)
next $ cleanupNew file key
where
lockdown obj = do
ifM (isUnmodified key obj)
( breakhardlink obj
, repopulate obj
, repopulate (fromRawFilePath obj)
)
whenM (liftIO $ doesFileExist obj) $
freezeContent obj
whenM (liftIO $ R.doesPathExist obj) $
freezeContent $ fromRawFilePath obj
-- It's ok if the file is hard linked to obj, but if some other
-- associated file is, we need to break that link to lock down obj.
breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (getFileStatus obj)) $ do
mfc <- withTSDelta (liftIO . genInodeCache' file)
breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (R.getFileStatus obj)) $ do
mfc <- withTSDelta (liftIO . genInodeCache file)
unlessM (sameInodeCache obj (maybeToList mfc)) $ do
modifyContent obj $ replaceFile obj $ \tmp -> do
unlessM (checkedCopyFile key obj tmp Nothing) $
let obj' = fromRawFilePath obj
modifyContent obj' $ replaceFile obj' $ \tmp -> do
unlessM (checkedCopyFile key obj' tmp Nothing) $
giveup "unable to lock file"
Database.Keys.storeInodeCaches key [obj]
-- Try to repopulate obj from an unmodified associated file.
repopulate obj = modifyContent obj $ do
g <- Annex.gitRepo
fs <- map fromRawFilePath . map (`fromTopFilePath` g)
fs <- map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
mfile <- firstM (isUnmodified key) fs
liftIO $ nukeFile obj
case mfile of
Just unmodified ->
unlessM (checkedCopyFile key unmodified obj Nothing)
unlessM (checkedCopyFile key (fromRawFilePath unmodified) obj Nothing)
lostcontent
Nothing -> lostcontent

View file

@ -86,7 +86,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey (fastMigrate oldbacken
content <- calcRepo $ gitAnnexLocation oldkey
let source = KeySource
{ keyFilename = fromRawFilePath file
, contentLocation = content
, contentLocation = fromRawFilePath content
, inodeCache = Nothing
}
v <- genKey source nullMeterUpdate (Just newbackend)

View file

@ -137,7 +137,8 @@ send ups fs = do
mk <- lookupFile f
case mk of
Nothing -> noop
Just k -> withObjectLoc k (addlist f)
Just k -> withObjectLoc k $
addlist f . fromRawFilePath
liftIO $ hClose h
serverkey <- uftpKey

View file

@ -83,12 +83,12 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file)
- unlocked file, which would leave the new key unlocked
- and vulnerable to corruption. -}
( getViaTmpFromDisk RetrievalAllKeysSecure DefaultVerify newkey $ \tmp -> unVerified $ do
oldobj <- calcRepo (gitAnnexLocation oldkey)
oldobj <- fromRawFilePath <$> calcRepo (gitAnnexLocation oldkey)
isJust <$> linkOrCopy' (return True) newkey oldobj tmp Nothing
, do
{- The file being rekeyed is itself an unlocked file; if
- it's hard linked to the old key, that link must be broken. -}
oldobj <- calcRepo (gitAnnexLocation oldkey)
oldobj <- fromRawFilePath <$> calcRepo (gitAnnexLocation oldkey)
v <- tryNonAsync $ do
st <- liftIO $ R.getFileStatus file
when (linkCount st > 1) $ do
@ -97,7 +97,7 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file)
unlessM (checkedCopyFile oldkey oldobj tmp Nothing) $
error "can't lock old key"
thawContent tmp
ic <- withTSDelta (liftIO . genInodeCache' file)
ic <- withTSDelta (liftIO . genInodeCache file)
case v of
Left e -> do
warning (show e)

View file

@ -108,7 +108,7 @@ clean file = do
-- annexed and is unmodified.
case oldkey of
Nothing -> doingest oldkey
Just ko -> ifM (isUnmodifiedCheap ko file)
Just ko -> ifM (isUnmodifiedCheap ko (toRawFilePath file))
( liftIO $ emitPointer ko
, doingest oldkey
)
@ -174,7 +174,7 @@ shouldAnnex file moldkey = ifM (annexGitAddToAnnex <$> Annex.getGitConfig)
Just _ -> return True
Nothing -> checkknowninode
checkknowninode = withTSDelta (liftIO . genInodeCache file) >>= \case
checkknowninode = withTSDelta (liftIO . genInodeCache (toRawFilePath file)) >>= \case
Nothing -> pure False
Just ic -> Database.Keys.isInodeKnown ic =<< sentinalStatus
@ -191,7 +191,7 @@ emitPointer = S.putStr . formatPointer
getMoveRaceRecovery :: Key -> RawFilePath -> Annex ()
getMoveRaceRecovery k file = void $ tryNonAsync $
whenM (inAnnex k) $ do
obj <- toRawFilePath <$> calcRepo (gitAnnexLocation k)
obj <- calcRepo (gitAnnexLocation k)
-- Cannot restage because git add is running and has
-- the index locked.
populatePointerFile (Restage False) k obj file >>= \case
@ -207,7 +207,7 @@ updateSmudged :: Restage -> Annex ()
updateSmudged restage = streamSmudged $ \k topf -> do
f <- fromRepo (fromTopFilePath topf)
whenM (inAnnex k) $ do
obj <- toRawFilePath <$> calcRepo (gitAnnexLocation k)
obj <- calcRepo (gitAnnexLocation k)
unlessM (isJust <$> populatePointerFile restage k obj f) $
liftIO (isPointerFile f) >>= \case
Just k' | k' == k -> toplevelWarning False $

View file

@ -168,7 +168,7 @@ test st r k = catMaybes
get
, Just $ check "fsck downloaded object" fsck
, Just $ check "retrieveKeyFile resume from 33%" $ do
loc <- Annex.calcRepo (gitAnnexLocation k)
loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k)
tmp <- prepTmp k
partial <- liftIO $ bracket (openBinaryFile loc ReadMode) hClose $ \h -> do
sz <- hFileSize h
@ -184,7 +184,7 @@ test st r k = catMaybes
get
, Just $ check "fsck downloaded object" fsck
, Just $ check "retrieveKeyFile resume from end" $ do
loc <- Annex.calcRepo (gitAnnexLocation k)
loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k)
tmp <- prepTmp k
void $ liftIO $ copyFileExternal CopyAllMetaData loc tmp
lockContentForRemoval k removeAnnex
@ -240,7 +240,7 @@ testExportTree st (Just _) ea k1 k2 =
check desc a = testCase desc $
Annex.eval st (Annex.setOutput QuietOutput >> a) @? "failed"
storeexport k = do
loc <- Annex.calcRepo (gitAnnexLocation k)
loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k)
Remote.storeExport ea loc k testexportlocation nullMeterUpdate
retrieveexport k = withTmpFile "exported" $ \tmp h -> do
liftIO $ hClose h

View file

@ -46,7 +46,7 @@ perform file key = do
cleanup :: RawFilePath -> Key -> CommandCleanup
cleanup file key = do
Database.Keys.removeAssociatedFile key =<< inRepo (toTopFilePath file)
src <- calcRepo $ gitAnnexLocation key
src <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
ifM (Annex.getState Annex.fast)
( do
-- Only make a hard link if the annexed file does not

View file

@ -17,6 +17,7 @@ import qualified Database.Keys
import Annex.Content
import Annex.Init
import Utility.FileMode
import qualified Utility.RawFilePath as R
cmd :: Command
cmd = addCheck check $
@ -117,5 +118,5 @@ removeUnannexed = go []
, go (k:c) ks
)
enoughlinks f = catchBoolIO $ do
s <- getFileStatus f
s <- R.getFileStatus f
return $ linkCount s > 1

View file

@ -283,7 +283,7 @@ associatedFilesFilter = filterM go
checkunmodified _ [] = return True
checkunmodified cs (f:fs) = do
relf <- fromRepo $ fromTopFilePath f
ifM (sameInodeCache (fromRawFilePath relf) cs)
ifM (sameInodeCache relf cs)
( return False
, checkunmodified cs fs
)