use filepath-bytestring for annex object manipulations
git-annex find is now RawFilePath end to end, no string conversions. So is git-annex get when it does not need to get anything. So this is a major milestone on optimisation. Benchmarks indicate around 30% speedup in both commands. Probably many other performance improvements. All or nearly all places where a file is statted use RawFilePath now.
This commit is contained in:
parent
bdec7fed9c
commit
c19211774f
53 changed files with 324 additions and 234 deletions
|
@ -9,6 +9,9 @@ module Command.ContentLocation where
|
|||
|
||||
import Command
|
||||
import Annex.Content
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
import qualified Data.ByteString.Char8 as B8
|
||||
|
||||
cmd :: Command
|
||||
cmd = noCommit $ noMessages $
|
||||
|
@ -20,10 +23,10 @@ cmd = noCommit $ noMessages $
|
|||
run :: () -> String -> Annex Bool
|
||||
run _ p = do
|
||||
let k = fromMaybe (giveup "bad key") $ deserializeKey p
|
||||
maybe (return False) (\f -> liftIO (putStrLn f) >> return True)
|
||||
maybe (return False) (\f -> liftIO (B8.putStrLn f) >> return True)
|
||||
=<< inAnnex' (pure True) Nothing check k
|
||||
where
|
||||
check f = ifM (liftIO (doesFileExist f))
|
||||
check f = ifM (liftIO (R.doesPathExist f))
|
||||
( return (Just f)
|
||||
, return Nothing
|
||||
)
|
||||
|
|
|
@ -90,7 +90,8 @@ fixupReq req@(Req {}) =
|
|||
v <- getAnnexLinkTarget' (toRawFilePath (getfile r)) False
|
||||
case parseLinkTargetOrPointer =<< v of
|
||||
Nothing -> return r
|
||||
Just k -> withObjectLoc k (pure . setfile r)
|
||||
Just k -> withObjectLoc k $
|
||||
pure . setfile r . fromRawFilePath
|
||||
_ -> return r
|
||||
|
||||
externalDiffer :: String -> [String] -> Differ
|
||||
|
|
|
@ -93,8 +93,8 @@ keyVars key =
|
|||
, ("bytesize", size show)
|
||||
, ("humansize", size $ roughSize storageUnits True)
|
||||
, ("keyname", decodeBS $ fromKey keyName key)
|
||||
, ("hashdirlower", hashDirLower def key)
|
||||
, ("hashdirmixed", hashDirMixed def key)
|
||||
, ("hashdirlower", fromRawFilePath $ hashDirLower def key)
|
||||
, ("hashdirmixed", fromRawFilePath $ hashDirMixed def key)
|
||||
, ("mtime", whenavail show $ fromKey keyMtime key)
|
||||
]
|
||||
where
|
||||
|
|
|
@ -53,11 +53,11 @@ start fixwhat file key = do
|
|||
where
|
||||
fixby = starting "fix" (mkActionItem (key, file))
|
||||
fixthin = do
|
||||
obj <- calcRepo $ gitAnnexLocation key
|
||||
stopUnless (isUnmodified key (fromRawFilePath file) <&&> isUnmodified key obj) $ do
|
||||
obj <- calcRepo (gitAnnexLocation key)
|
||||
stopUnless (isUnmodified key file <&&> isUnmodified key obj) $ do
|
||||
thin <- annexThin <$> Annex.getGitConfig
|
||||
fs <- liftIO $ catchMaybeIO $ R.getFileStatus file
|
||||
os <- liftIO $ catchMaybeIO $ getFileStatus obj
|
||||
os <- liftIO $ catchMaybeIO $ R.getFileStatus obj
|
||||
case (linkCount <$> fs, linkCount <$> os, thin) of
|
||||
(Just 1, Just 1, True) ->
|
||||
fixby $ makeHardLink file key
|
||||
|
@ -65,15 +65,16 @@ start fixwhat file key = do
|
|||
fixby $ breakHardLink file key obj
|
||||
_ -> stop
|
||||
|
||||
breakHardLink :: RawFilePath -> Key -> FilePath -> CommandPerform
|
||||
breakHardLink :: RawFilePath -> Key -> RawFilePath -> CommandPerform
|
||||
breakHardLink file key obj = do
|
||||
replaceFile (fromRawFilePath file) $ \tmp -> do
|
||||
mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus file
|
||||
unlessM (checkedCopyFile key obj tmp mode) $
|
||||
let obj' = fromRawFilePath obj
|
||||
unlessM (checkedCopyFile key obj' tmp mode) $
|
||||
error "unable to break hard link"
|
||||
thawContent tmp
|
||||
modifyContent obj $ freezeContent obj
|
||||
Database.Keys.storeInodeCaches key [fromRawFilePath file]
|
||||
modifyContent obj' $ freezeContent obj'
|
||||
Database.Keys.storeInodeCaches key [file]
|
||||
next $ return True
|
||||
|
||||
makeHardLink :: RawFilePath -> Key -> CommandPerform
|
||||
|
|
|
@ -223,7 +223,7 @@ fixLink key file = do
|
|||
- in this repository only. -}
|
||||
verifyLocationLog :: Key -> KeyStatus -> ActionItem -> Annex Bool
|
||||
verifyLocationLog key keystatus ai = do
|
||||
obj <- calcRepo $ gitAnnexLocation key
|
||||
obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
|
||||
present <- if isKeyUnlockedThin keystatus
|
||||
then liftIO (doesFileExist obj)
|
||||
else inAnnex key
|
||||
|
@ -332,11 +332,11 @@ verifyWorkTree key file = do
|
|||
ifM (annexThin <$> Annex.getGitConfig)
|
||||
( void $ linkFromAnnex key tmp mode
|
||||
, do
|
||||
obj <- calcRepo $ gitAnnexLocation key
|
||||
obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
|
||||
void $ checkedCopyFile key obj tmp mode
|
||||
thawContent tmp
|
||||
)
|
||||
Database.Keys.storeInodeCaches key [fromRawFilePath file]
|
||||
Database.Keys.storeInodeCaches key [file]
|
||||
_ -> return ()
|
||||
return True
|
||||
|
||||
|
@ -349,8 +349,8 @@ checkKeySize :: Key -> KeyStatus -> ActionItem -> Annex Bool
|
|||
checkKeySize _ KeyUnlockedThin _ = return True
|
||||
checkKeySize key _ ai = do
|
||||
file <- calcRepo $ gitAnnexLocation key
|
||||
ifM (liftIO $ doesFileExist file)
|
||||
( checkKeySizeOr badContent key file ai
|
||||
ifM (liftIO $ R.doesPathExist file)
|
||||
( checkKeySizeOr badContent key (fromRawFilePath file) ai
|
||||
, return True
|
||||
)
|
||||
|
||||
|
@ -417,10 +417,10 @@ checkKeyUpgrade _ _ _ (AssociatedFile Nothing) =
|
|||
-}
|
||||
checkBackend :: Backend -> Key -> KeyStatus -> AssociatedFile -> Annex Bool
|
||||
checkBackend backend key keystatus afile = do
|
||||
content <- calcRepo $ gitAnnexLocation key
|
||||
content <- calcRepo (gitAnnexLocation key)
|
||||
ifM (pure (isKeyUnlockedThin keystatus) <&&> (not <$> isUnmodified key content))
|
||||
( nocheck
|
||||
, checkBackendOr badContent backend key content ai
|
||||
, checkBackendOr badContent backend key (fromRawFilePath content) ai
|
||||
)
|
||||
where
|
||||
nocheck = return True
|
||||
|
@ -670,8 +670,8 @@ isKeyUnlockedThin KeyMissing = False
|
|||
getKeyStatus :: Key -> Annex KeyStatus
|
||||
getKeyStatus key = catchDefaultIO KeyMissing $ do
|
||||
afs <- not . null <$> Database.Keys.getAssociatedFiles key
|
||||
obj <- calcRepo $ gitAnnexLocation key
|
||||
multilink <- ((> 1) . linkCount <$> liftIO (getFileStatus obj))
|
||||
obj <- calcRepo (gitAnnexLocation key)
|
||||
multilink <- ((> 1) . linkCount <$> liftIO (R.getFileStatus obj))
|
||||
return $ if multilink && afs
|
||||
then KeyUnlockedThin
|
||||
else KeyPresent
|
||||
|
|
|
@ -181,7 +181,7 @@ startLocal largematcher mode (srcfile, destfile) =
|
|||
-- weakly the same as the origianlly locked down file's
|
||||
-- inode cache. (Since the file may have been copied,
|
||||
-- its inodes may not be the same.)
|
||||
newcache <- withTSDelta $ liftIO . genInodeCache destfile
|
||||
newcache <- withTSDelta $ liftIO . genInodeCache (toRawFilePath destfile)
|
||||
let unchanged = case (newcache, inodeCache (keySource ld)) of
|
||||
(_, Nothing) -> True
|
||||
(Just newc, Just c) | compareWeak c newc -> True
|
||||
|
|
|
@ -20,6 +20,7 @@ import qualified Database.Keys
|
|||
import Annex.Ingest
|
||||
import Logs.Location
|
||||
import Git.FilePath
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
cmd :: Command
|
||||
cmd = withGlobalOptions [jsonOptions, annexedMatchingOptions] $
|
||||
|
@ -43,7 +44,7 @@ startNew file key = ifM (isJust <$> isAnnexLink file)
|
|||
| key' == key = cont
|
||||
| otherwise = errorModified
|
||||
go Nothing =
|
||||
ifM (isUnmodified key (fromRawFilePath file))
|
||||
ifM (isUnmodified key file)
|
||||
( cont
|
||||
, ifM (Annex.getState Annex.force)
|
||||
( cont
|
||||
|
@ -56,37 +57,38 @@ performNew :: RawFilePath -> Key -> CommandPerform
|
|||
performNew file key = do
|
||||
lockdown =<< calcRepo (gitAnnexLocation key)
|
||||
addLink (fromRawFilePath file) key
|
||||
=<< withTSDelta (liftIO . genInodeCache' file)
|
||||
=<< withTSDelta (liftIO . genInodeCache file)
|
||||
next $ cleanupNew file key
|
||||
where
|
||||
lockdown obj = do
|
||||
ifM (isUnmodified key obj)
|
||||
( breakhardlink obj
|
||||
, repopulate obj
|
||||
, repopulate (fromRawFilePath obj)
|
||||
)
|
||||
whenM (liftIO $ doesFileExist obj) $
|
||||
freezeContent obj
|
||||
whenM (liftIO $ R.doesPathExist obj) $
|
||||
freezeContent $ fromRawFilePath obj
|
||||
|
||||
-- It's ok if the file is hard linked to obj, but if some other
|
||||
-- associated file is, we need to break that link to lock down obj.
|
||||
breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (getFileStatus obj)) $ do
|
||||
mfc <- withTSDelta (liftIO . genInodeCache' file)
|
||||
breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (R.getFileStatus obj)) $ do
|
||||
mfc <- withTSDelta (liftIO . genInodeCache file)
|
||||
unlessM (sameInodeCache obj (maybeToList mfc)) $ do
|
||||
modifyContent obj $ replaceFile obj $ \tmp -> do
|
||||
unlessM (checkedCopyFile key obj tmp Nothing) $
|
||||
let obj' = fromRawFilePath obj
|
||||
modifyContent obj' $ replaceFile obj' $ \tmp -> do
|
||||
unlessM (checkedCopyFile key obj' tmp Nothing) $
|
||||
giveup "unable to lock file"
|
||||
Database.Keys.storeInodeCaches key [obj]
|
||||
|
||||
-- Try to repopulate obj from an unmodified associated file.
|
||||
repopulate obj = modifyContent obj $ do
|
||||
g <- Annex.gitRepo
|
||||
fs <- map fromRawFilePath . map (`fromTopFilePath` g)
|
||||
fs <- map (`fromTopFilePath` g)
|
||||
<$> Database.Keys.getAssociatedFiles key
|
||||
mfile <- firstM (isUnmodified key) fs
|
||||
liftIO $ nukeFile obj
|
||||
case mfile of
|
||||
Just unmodified ->
|
||||
unlessM (checkedCopyFile key unmodified obj Nothing)
|
||||
unlessM (checkedCopyFile key (fromRawFilePath unmodified) obj Nothing)
|
||||
lostcontent
|
||||
Nothing -> lostcontent
|
||||
|
||||
|
|
|
@ -86,7 +86,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey (fastMigrate oldbacken
|
|||
content <- calcRepo $ gitAnnexLocation oldkey
|
||||
let source = KeySource
|
||||
{ keyFilename = fromRawFilePath file
|
||||
, contentLocation = content
|
||||
, contentLocation = fromRawFilePath content
|
||||
, inodeCache = Nothing
|
||||
}
|
||||
v <- genKey source nullMeterUpdate (Just newbackend)
|
||||
|
|
|
@ -137,7 +137,8 @@ send ups fs = do
|
|||
mk <- lookupFile f
|
||||
case mk of
|
||||
Nothing -> noop
|
||||
Just k -> withObjectLoc k (addlist f)
|
||||
Just k -> withObjectLoc k $
|
||||
addlist f . fromRawFilePath
|
||||
liftIO $ hClose h
|
||||
|
||||
serverkey <- uftpKey
|
||||
|
|
|
@ -83,12 +83,12 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file)
|
|||
- unlocked file, which would leave the new key unlocked
|
||||
- and vulnerable to corruption. -}
|
||||
( getViaTmpFromDisk RetrievalAllKeysSecure DefaultVerify newkey $ \tmp -> unVerified $ do
|
||||
oldobj <- calcRepo (gitAnnexLocation oldkey)
|
||||
oldobj <- fromRawFilePath <$> calcRepo (gitAnnexLocation oldkey)
|
||||
isJust <$> linkOrCopy' (return True) newkey oldobj tmp Nothing
|
||||
, do
|
||||
{- The file being rekeyed is itself an unlocked file; if
|
||||
- it's hard linked to the old key, that link must be broken. -}
|
||||
oldobj <- calcRepo (gitAnnexLocation oldkey)
|
||||
oldobj <- fromRawFilePath <$> calcRepo (gitAnnexLocation oldkey)
|
||||
v <- tryNonAsync $ do
|
||||
st <- liftIO $ R.getFileStatus file
|
||||
when (linkCount st > 1) $ do
|
||||
|
@ -97,7 +97,7 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file)
|
|||
unlessM (checkedCopyFile oldkey oldobj tmp Nothing) $
|
||||
error "can't lock old key"
|
||||
thawContent tmp
|
||||
ic <- withTSDelta (liftIO . genInodeCache' file)
|
||||
ic <- withTSDelta (liftIO . genInodeCache file)
|
||||
case v of
|
||||
Left e -> do
|
||||
warning (show e)
|
||||
|
|
|
@ -108,7 +108,7 @@ clean file = do
|
|||
-- annexed and is unmodified.
|
||||
case oldkey of
|
||||
Nothing -> doingest oldkey
|
||||
Just ko -> ifM (isUnmodifiedCheap ko file)
|
||||
Just ko -> ifM (isUnmodifiedCheap ko (toRawFilePath file))
|
||||
( liftIO $ emitPointer ko
|
||||
, doingest oldkey
|
||||
)
|
||||
|
@ -174,7 +174,7 @@ shouldAnnex file moldkey = ifM (annexGitAddToAnnex <$> Annex.getGitConfig)
|
|||
Just _ -> return True
|
||||
Nothing -> checkknowninode
|
||||
|
||||
checkknowninode = withTSDelta (liftIO . genInodeCache file) >>= \case
|
||||
checkknowninode = withTSDelta (liftIO . genInodeCache (toRawFilePath file)) >>= \case
|
||||
Nothing -> pure False
|
||||
Just ic -> Database.Keys.isInodeKnown ic =<< sentinalStatus
|
||||
|
||||
|
@ -191,7 +191,7 @@ emitPointer = S.putStr . formatPointer
|
|||
getMoveRaceRecovery :: Key -> RawFilePath -> Annex ()
|
||||
getMoveRaceRecovery k file = void $ tryNonAsync $
|
||||
whenM (inAnnex k) $ do
|
||||
obj <- toRawFilePath <$> calcRepo (gitAnnexLocation k)
|
||||
obj <- calcRepo (gitAnnexLocation k)
|
||||
-- Cannot restage because git add is running and has
|
||||
-- the index locked.
|
||||
populatePointerFile (Restage False) k obj file >>= \case
|
||||
|
@ -207,7 +207,7 @@ updateSmudged :: Restage -> Annex ()
|
|||
updateSmudged restage = streamSmudged $ \k topf -> do
|
||||
f <- fromRepo (fromTopFilePath topf)
|
||||
whenM (inAnnex k) $ do
|
||||
obj <- toRawFilePath <$> calcRepo (gitAnnexLocation k)
|
||||
obj <- calcRepo (gitAnnexLocation k)
|
||||
unlessM (isJust <$> populatePointerFile restage k obj f) $
|
||||
liftIO (isPointerFile f) >>= \case
|
||||
Just k' | k' == k -> toplevelWarning False $
|
||||
|
|
|
@ -168,7 +168,7 @@ test st r k = catMaybes
|
|||
get
|
||||
, Just $ check "fsck downloaded object" fsck
|
||||
, Just $ check "retrieveKeyFile resume from 33%" $ do
|
||||
loc <- Annex.calcRepo (gitAnnexLocation k)
|
||||
loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k)
|
||||
tmp <- prepTmp k
|
||||
partial <- liftIO $ bracket (openBinaryFile loc ReadMode) hClose $ \h -> do
|
||||
sz <- hFileSize h
|
||||
|
@ -184,7 +184,7 @@ test st r k = catMaybes
|
|||
get
|
||||
, Just $ check "fsck downloaded object" fsck
|
||||
, Just $ check "retrieveKeyFile resume from end" $ do
|
||||
loc <- Annex.calcRepo (gitAnnexLocation k)
|
||||
loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k)
|
||||
tmp <- prepTmp k
|
||||
void $ liftIO $ copyFileExternal CopyAllMetaData loc tmp
|
||||
lockContentForRemoval k removeAnnex
|
||||
|
@ -240,7 +240,7 @@ testExportTree st (Just _) ea k1 k2 =
|
|||
check desc a = testCase desc $
|
||||
Annex.eval st (Annex.setOutput QuietOutput >> a) @? "failed"
|
||||
storeexport k = do
|
||||
loc <- Annex.calcRepo (gitAnnexLocation k)
|
||||
loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k)
|
||||
Remote.storeExport ea loc k testexportlocation nullMeterUpdate
|
||||
retrieveexport k = withTmpFile "exported" $ \tmp h -> do
|
||||
liftIO $ hClose h
|
||||
|
|
|
@ -46,7 +46,7 @@ perform file key = do
|
|||
cleanup :: RawFilePath -> Key -> CommandCleanup
|
||||
cleanup file key = do
|
||||
Database.Keys.removeAssociatedFile key =<< inRepo (toTopFilePath file)
|
||||
src <- calcRepo $ gitAnnexLocation key
|
||||
src <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
|
||||
ifM (Annex.getState Annex.fast)
|
||||
( do
|
||||
-- Only make a hard link if the annexed file does not
|
||||
|
|
|
@ -17,6 +17,7 @@ import qualified Database.Keys
|
|||
import Annex.Content
|
||||
import Annex.Init
|
||||
import Utility.FileMode
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
cmd :: Command
|
||||
cmd = addCheck check $
|
||||
|
@ -117,5 +118,5 @@ removeUnannexed = go []
|
|||
, go (k:c) ks
|
||||
)
|
||||
enoughlinks f = catchBoolIO $ do
|
||||
s <- getFileStatus f
|
||||
s <- R.getFileStatus f
|
||||
return $ linkCount s > 1
|
||||
|
|
|
@ -283,7 +283,7 @@ associatedFilesFilter = filterM go
|
|||
checkunmodified _ [] = return True
|
||||
checkunmodified cs (f:fs) = do
|
||||
relf <- fromRepo $ fromTopFilePath f
|
||||
ifM (sameInodeCache (fromRawFilePath relf) cs)
|
||||
ifM (sameInodeCache relf cs)
|
||||
( return False
|
||||
, checkunmodified cs fs
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue