use filepath-bytestring for annex object manipulations

git-annex find is now RawFilePath end to end, no string conversions.
So is git-annex get when it does not need to get anything.
So this is a major milestone on optimisation.

Benchmarks indicate around 30% speedup in both commands.

Probably many other performance improvements. All or nearly all places
where a file is statted use RawFilePath now.
This commit is contained in:
Joey Hess 2019-12-11 14:12:22 -04:00
parent bdec7fed9c
commit c19211774f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
53 changed files with 324 additions and 234 deletions

View file

@ -334,10 +334,9 @@ inodeMap :: Annex ([RawFilePath], IO Bool) -> Annex InodeMap
inodeMap getfiles = do
(fs, cleanup) <- getfiles
fsis <- forM fs $ \f -> do
let f' = fromRawFilePath f
mi <- withTSDelta (liftIO . genInodeCache f')
mi <- withTSDelta (liftIO . genInodeCache f)
return $ case mi of
Nothing -> Nothing
Just i -> Just (inodeCacheToKey Strongly i, f')
Just i -> Just (inodeCacheToKey Strongly i, fromRawFilePath f)
void $ liftIO cleanup
return $ M.fromList $ catMaybes fsis

View file

@ -482,7 +482,7 @@ stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do
sha <- Git.HashObject.hashFile h path
hPutStrLn jlogh file
streamer $ Git.UpdateIndex.updateIndexLine
sha TreeFile (asTopFilePath $ toRawFilePath $ fileJournal file)
sha TreeFile (asTopFilePath $ fileJournal $ toRawFilePath file)
genstream dir h jh jlogh streamer
-- Clean up the staged files, as listed in the temp log file.
-- The temp file is used to avoid needing to buffer all the

View file

@ -89,17 +89,18 @@ import Annex.Content.LowLevel
import Annex.Content.PointerFile
import Annex.Concurrent
import Types.WorkerPool
import qualified Utility.RawFilePath as R
{- Checks if a given key's content is currently present. -}
inAnnex :: Key -> Annex Bool
inAnnex key = inAnnexCheck key $ liftIO . doesFileExist
inAnnex key = inAnnexCheck key $ liftIO . R.doesPathExist
{- Runs an arbitrary check on a key's content. -}
inAnnexCheck :: Key -> (FilePath -> Annex Bool) -> Annex Bool
inAnnexCheck :: Key -> (RawFilePath -> Annex Bool) -> Annex Bool
inAnnexCheck key check = inAnnex' id False check key
{- inAnnex that performs an arbitrary check of the key's content. -}
inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a
inAnnex' :: (a -> Bool) -> a -> (RawFilePath -> Annex a) -> Key -> Annex a
inAnnex' isgood bad check key = withObjectLoc key $ \loc -> do
r <- check loc
if isgood r
@ -120,12 +121,15 @@ inAnnex' isgood bad check key = withObjectLoc key $ \loc -> do
{- Like inAnnex, checks if the object file for a key exists,
- but there are no guarantees it has the right content. -}
objectFileExists :: Key -> Annex Bool
objectFileExists key = calcRepo (gitAnnexLocation key) >>= liftIO . doesFileExist
objectFileExists key =
calcRepo (gitAnnexLocation key)
>>= liftIO . R.doesPathExist
{- A safer check; the key's content must not only be present, but
- is not in the process of being removed. -}
inAnnexSafe :: Key -> Annex (Maybe Bool)
inAnnexSafe key = inAnnex' (fromMaybe True) (Just False) go key
inAnnexSafe key =
inAnnex' (fromMaybe True) (Just False) (go . fromRawFilePath) key
where
is_locked = Nothing
is_unlocked = Just True
@ -246,7 +250,7 @@ winLocker _ _ Nothing = return Nothing
lockContentUsing :: ContentLocker -> Key -> Annex a -> Annex a
lockContentUsing locker key a = do
contentfile <- calcRepo $ gitAnnexLocation key
contentfile <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
lockfile <- contentLockFile key
bracket
(lock contentfile lockfile)
@ -474,18 +478,20 @@ moveAnnex key src = ifM (checkSecureHashes key)
, return False
)
where
storeobject dest = ifM (liftIO $ doesFileExist dest)
storeobject dest = ifM (liftIO $ R.doesPathExist dest)
( alreadyhave
, modifyContent dest $ do
, modifyContent dest' $ do
freezeContent src
liftIO $ moveFile src dest
liftIO $ moveFile src dest'
g <- Annex.gitRepo
fs <- map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
unless (null fs) $ do
ics <- mapM (populatePointerFile (Restage True) key (toRawFilePath dest)) fs
ics <- mapM (populatePointerFile (Restage True) key dest) fs
Database.Keys.storeInodeCaches' key [dest] (catMaybes ics)
)
where
dest' = fromRawFilePath dest
alreadyhave = liftIO $ removeFile src
checkSecureHashes :: Key -> Annex Bool
@ -505,7 +511,7 @@ data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
linkToAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult
linkToAnnex key src srcic = ifM (checkSecureHashes key)
( do
dest <- calcRepo (gitAnnexLocation key)
dest <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
modifyContent dest $ linkAnnex To key src srcic dest Nothing
, return LinkAnnexFailed
)
@ -515,7 +521,7 @@ linkFromAnnex :: Key -> FilePath -> Maybe FileMode -> Annex LinkAnnexResult
linkFromAnnex key dest destmode = do
src <- calcRepo (gitAnnexLocation key)
srcic <- withTSDelta (liftIO . genInodeCache src)
linkAnnex From key src srcic dest destmode
linkAnnex From key (fromRawFilePath src) srcic dest destmode
data FromTo = From | To
@ -534,7 +540,7 @@ data FromTo = From | To
linkAnnex :: FromTo -> Key -> FilePath -> Maybe InodeCache -> FilePath -> Maybe FileMode -> Annex LinkAnnexResult
linkAnnex _ _ _ Nothing _ _ = return LinkAnnexFailed
linkAnnex fromto key src (Just srcic) dest destmode =
withTSDelta (liftIO . genInodeCache dest) >>= \case
withTSDelta (liftIO . genInodeCache dest') >>= \case
Just destic -> do
cs <- Database.Keys.getInodeCaches key
if null cs
@ -551,12 +557,13 @@ linkAnnex fromto key src (Just srcic) dest destmode =
Linked -> noop
checksrcunchanged
where
dest' = toRawFilePath dest
failed = do
Database.Keys.addInodeCaches key [srcic]
return LinkAnnexFailed
checksrcunchanged = withTSDelta (liftIO . genInodeCache src) >>= \case
checksrcunchanged = withTSDelta (liftIO . genInodeCache (toRawFilePath src)) >>= \case
Just srcic' | compareStrong srcic srcic' -> do
destic <- withTSDelta (liftIO . genInodeCache dest)
destic <- withTSDelta (liftIO . genInodeCache dest')
Database.Keys.addInodeCaches key $
catMaybes [destic, Just srcic]
return LinkAnnexOk
@ -567,7 +574,7 @@ linkAnnex fromto key src (Just srcic) dest destmode =
{- Removes the annex object file for a key. Lowlevel. -}
unlinkAnnex :: Key -> Annex ()
unlinkAnnex key = do
obj <- calcRepo $ gitAnnexLocation key
obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
modifyContent obj $ do
secureErase obj
liftIO $ nukeFile obj
@ -616,15 +623,15 @@ prepSendAnnex key = withObjectLoc key $ \f -> do
else pure cache
return $ if null cache'
then Nothing
else Just (f, sameInodeCache f cache')
else Just (fromRawFilePath f, sameInodeCache f cache')
{- Performs an action, passing it the location to use for a key's content. -}
withObjectLoc :: Key -> (FilePath -> Annex a) -> Annex a
withObjectLoc :: Key -> (RawFilePath -> Annex a) -> Annex a
withObjectLoc key a = a =<< calcRepo (gitAnnexLocation key)
cleanObjectLoc :: Key -> Annex () -> Annex ()
cleanObjectLoc key cleaner = do
file <- calcRepo $ gitAnnexLocation key
file <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
void $ tryIO $ thawContentDir file
cleaner
liftIO $ removeparents file (3 :: Int)
@ -640,22 +647,23 @@ cleanObjectLoc key cleaner = do
removeAnnex :: ContentRemovalLock -> Annex ()
removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file ->
cleanObjectLoc key $ do
secureErase file
liftIO $ nukeFile file
let file' = fromRawFilePath file
secureErase file'
liftIO $ nukeFile file'
g <- Annex.gitRepo
mapM_ (\f -> void $ tryIO $ resetpointer $ fromRawFilePath $ fromTopFilePath f g)
mapM_ (\f -> void $ tryIO $ resetpointer $ fromTopFilePath f g)
=<< Database.Keys.getAssociatedFiles key
Database.Keys.removeInodeCaches key
where
-- Check associated pointer file for modifications, and reset if
-- it's unmodified.
resetpointer file = ifM (isUnmodified key file)
( depopulatePointerFile key (toRawFilePath file)
( depopulatePointerFile key file
-- Modified file, so leave it alone.
-- If it was a hard link to the annex object,
-- that object might have been frozen as part of the
-- removal process, so thaw it.
, void $ tryIO $ thawContent file
, void $ tryIO $ thawContent $ fromRawFilePath file
)
{- Check if a file contains the unmodified content of the key.
@ -663,12 +671,12 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file ->
- The expensive way to tell is to do a verification of its content.
- The cheaper way is to see if the InodeCache for the key matches the
- file. -}
isUnmodified :: Key -> FilePath -> Annex Bool
isUnmodified :: Key -> RawFilePath -> Annex Bool
isUnmodified key f = go =<< geti
where
go Nothing = return False
go (Just fc) = isUnmodifiedCheap' key fc <||> expensivecheck fc
expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f)
expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key (fromRawFilePath f))
( do
-- The file could have been modified while it was
-- being verified. Detect that.
@ -691,7 +699,7 @@ isUnmodified key f = go =<< geti
- this may report a false positive when repeated edits are made to a file
- within a small time window (eg 1 second).
-}
isUnmodifiedCheap :: Key -> FilePath -> Annex Bool
isUnmodifiedCheap :: Key -> RawFilePath -> Annex Bool
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)
=<< withTSDelta (liftIO . genInodeCache f)
@ -703,7 +711,7 @@ isUnmodifiedCheap' key fc =
- returns the file it was moved to. -}
moveBad :: Key -> Annex FilePath
moveBad key = do
src <- calcRepo $ gitAnnexLocation key
src <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
bad <- fromRepo gitAnnexBadDir
let dest = bad </> takeFileName src
createAnnexDirectory (parentDir dest)
@ -791,7 +799,7 @@ preseedTmp key file = go =<< inAnnex key
copy = ifM (liftIO $ doesFileExist file)
( return True
, do
s <- calcRepo $ gitAnnexLocation key
s <- fromRawFilePath <$> (calcRepo $ gitAnnexLocation key)
liftIO $ ifM (doesFileExist s)
( copyFileExternal CopyTimeStamps s file
, return False

View file

@ -38,10 +38,11 @@ populatePointerFile restage k obj f = go =<< liftIO (isPointerFile f)
destmode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus f'
liftIO $ nukeFile f'
(ic, populated) <- replaceFile f' $ \tmp -> do
let tmp' = toRawFilePath tmp
ok <- linkOrCopy k (fromRawFilePath obj) tmp destmode >>= \case
Just _ -> thawContent tmp >> return True
Nothing -> liftIO (writePointerFile (toRawFilePath tmp) k destmode) >> return False
ic <- withTSDelta (liftIO . genInodeCache tmp)
Nothing -> liftIO (writePointerFile tmp' k destmode) >> return False
ic <- withTSDelta (liftIO . genInodeCache tmp')
return (ic, ok)
maybe noop (restagePointerFile restage f) ic
if populated
@ -68,5 +69,5 @@ depopulatePointerFile key file = do
(\t -> touch tmp t False)
(fmap modificationTimeHiRes st)
#endif
withTSDelta (liftIO . genInodeCache tmp)
withTSDelta (liftIO . genInodeCache (toRawFilePath tmp))
maybe noop (restagePointerFile (Restage True) file) ic

View file

@ -1,6 +1,6 @@
{- git-annex file locations
-
- Copyright 2010-2017 Joey Hess <id@joeyh.name>
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -19,7 +19,10 @@ module Annex.DirHashes (
import Data.Default
import Data.Bits
import qualified Data.ByteArray
import qualified Data.ByteArray as BA
import qualified Data.ByteArray.Encoding as BA
import qualified Data.ByteString as S
import qualified System.FilePath.ByteString as P
import Common
import Key
@ -28,7 +31,7 @@ import Types.Difference
import Utility.Hash
import Utility.MD5
type Hasher = Key -> FilePath
type Hasher = Key -> RawFilePath
-- Number of hash levels to use. 2 is the default.
newtype HashLevels = HashLevels Int
@ -47,7 +50,7 @@ configHashLevels d config
| hasDifference d (annexDifferences config) = HashLevels 1
| otherwise = def
branchHashDir :: GitConfig -> Key -> String
branchHashDir :: GitConfig -> Key -> S.ByteString
branchHashDir = hashDirLower . branchHashLevels
{- Two different directory hashes may be used. The mixed case hash
@ -60,19 +63,26 @@ branchHashDir = hashDirLower . branchHashLevels
dirHashes :: [HashLevels -> Hasher]
dirHashes = [hashDirLower, hashDirMixed]
hashDirs :: HashLevels -> Int -> String -> FilePath
hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
hashDirs :: HashLevels -> Int -> S.ByteString -> RawFilePath
hashDirs (HashLevels 1) sz s = P.addTrailingPathSeparator $ S.take sz s
hashDirs _ sz s = P.addTrailingPathSeparator $ h P.</> t
where
(h, t) = S.splitAt sz s
hashDirLower :: HashLevels -> Hasher
hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5s $ serializeKey' $ nonChunkKey k
hashDirLower n k = hashDirs n 3 $ S.pack $ take 6 $ conv $
md5s $ serializeKey' $ nonChunkKey k
where
conv v = BA.unpack $
(BA.convertToBase BA.Base16 v :: BA.Bytes)
{- This was originally using Data.Hash.MD5 from MissingH. This new version
- is faster, but ugly as it has to replicate the 4 Word32's that produced. -}
hashDirMixed :: HashLevels -> Hasher
hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $
encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $
Utility.Hash.md5s $ serializeKey' $ nonChunkKey k
hashDirMixed n k = hashDirs n 2 $ S.pack $ take 4 $
concatMap display_32bits_as_dir $
encodeWord32 $ map fromIntegral $ BA.unpack $
Utility.Hash.md5s $ serializeKey' $ nonChunkKey k
where
encodeWord32 (b1:b2:b3:b4:rest) =
(shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1)

View file

@ -92,7 +92,7 @@ lockDown' cfg file = tryIO $ ifM crippledFileSystem
nohardlink = withTSDelta $ liftIO . nohardlink'
nohardlink' delta = do
cache <- genInodeCache file delta
cache <- genInodeCache (toRawFilePath file) delta
return $ LockedDown cfg $ KeySource
{ keyFilename = file
, contentLocation = file
@ -112,7 +112,7 @@ lockDown' cfg file = tryIO $ ifM crippledFileSystem
withhardlink' delta tmpfile = do
createLink file tmpfile
cache <- genInodeCache tmpfile delta
cache <- genInodeCache (toRawFilePath tmpfile) delta
return $ LockedDown cfg $ KeySource
{ keyFilename = file
, contentLocation = tmpfile
@ -209,7 +209,7 @@ finishIngestUnlocked' key source restage = do
{- Copy to any other locations using the same key. -}
populateAssociatedFiles :: Key -> KeySource -> Restage -> Annex ()
populateAssociatedFiles key source restage = do
obj <- toRawFilePath <$> calcRepo (gitAnnexLocation key)
obj <- calcRepo (gitAnnexLocation key)
g <- Annex.gitRepo
ingestedf <- flip fromTopFilePath g
<$> inRepo (toTopFilePath (toRawFilePath (keyFilename source)))
@ -235,8 +235,7 @@ cleanOldKeys file newkey = do
unlessM (isUnmodified key =<< calcRepo (gitAnnexLocation key)) $ do
caches <- Database.Keys.getInodeCaches key
unlinkAnnex key
fs <- map fromRawFilePath
. filter (/= ingestedf)
fs <- filter (/= ingestedf)
. map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
filterM (`sameInodeCache` caches) fs >>= \case
@ -245,7 +244,7 @@ cleanOldKeys file newkey = do
-- so no need for any recovery.
(f:_) -> do
ic <- withTSDelta (liftIO . genInodeCache f)
void $ linkToAnnex key f ic
void $ linkToAnnex key (fromRawFilePath f) ic
_ -> logStatus key InfoMissing
{- On error, put the file back so it doesn't seem to have vanished.
@ -256,7 +255,7 @@ restoreFile file key e = do
liftIO $ nukeFile file
-- The key could be used by other files too, so leave the
-- content in the annex, and make a copy back to the file.
obj <- calcRepo $ gitAnnexLocation key
obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $
warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj
thawContent file

View file

@ -29,7 +29,7 @@ compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
{- Checks if one of the provided old InodeCache matches the current
- version of a file. -}
sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool
sameInodeCache :: RawFilePath -> [InodeCache] -> Annex Bool
sameInodeCache _ [] = return False
sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file)
where
@ -78,7 +78,7 @@ createInodeSentinalFile :: Bool -> Annex ()
createInodeSentinalFile evenwithobjects =
unlessM (alreadyexists <||> hasobjects) $ do
s <- annexSentinalFile
createAnnexDirectory (parentDir (sentinalFile s))
createAnnexDirectory (parentDir (fromRawFilePath (sentinalFile s)))
liftIO $ writeSentinalFile s
where
alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile

View file

@ -20,7 +20,9 @@ import Utility.Directory.Stream
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
import qualified System.FilePath.ByteString as P
import Data.ByteString.Builder
import Data.Char
class Journalable t where
writeJournalHandle :: Handle -> t -> IO ()
@ -48,7 +50,7 @@ setJournalFile :: Journalable content => JournalLocked -> RawFilePath -> content
setJournalFile _jl file content = withOtherTmp $ \tmp -> do
createAnnexDirectory =<< fromRepo gitAnnexJournalDir
-- journal file is written atomically
jfile <- fromRepo $ journalFile $ fromRawFilePath file
jfile <- fromRawFilePath <$> fromRepo (journalFile file)
let tmpfile = tmp </> takeFileName jfile
liftIO $ do
withFile tmpfile WriteMode $ \h -> writeJournalHandle h content
@ -71,7 +73,7 @@ getJournalFile _jl = getJournalFileStale
-}
getJournalFileStale :: RawFilePath -> Annex (Maybe L.ByteString)
getJournalFileStale file = inRepo $ \g -> catchMaybeIO $
L.fromStrict <$> S.readFile (journalFile (fromRawFilePath file) g)
L.fromStrict <$> S.readFile (fromRawFilePath $ journalFile file g)
{- List of existing journal files, but without locking, may miss new ones
- just being added, or may have false positives if the journal is staged
@ -81,7 +83,8 @@ getJournalledFilesStale = do
g <- gitRepo
fs <- liftIO $ catchDefaultIO [] $
getDirectoryContents $ gitAnnexJournalDir g
return $ filter (`notElem` [".", ".."]) $ map fileJournal fs
return $ filter (`notElem` [".", ".."]) $
map (fromRawFilePath . fileJournal . toRawFilePath) fs
withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a
withJournalHandle a = do
@ -97,24 +100,29 @@ journalDirty = do
`catchIO` (const $ doesDirectoryExist d)
{- Produces a filename to use in the journal for a file on the branch.
-
- The input filename is assumed to not contain any '_' character,
- since path separators are replaced with that.
-
- The journal typically won't have a lot of files in it, so the hashing
- used in the branch is not necessary, and all the files are put directly
- in the journal directory.
-}
journalFile :: FilePath -> Git.Repo -> FilePath
journalFile file repo = gitAnnexJournalDir repo </> concatMap mangle file
journalFile :: RawFilePath -> Git.Repo -> RawFilePath
journalFile file repo = gitAnnexJournalDir' repo P.</> S.map mangle file
where
mangle c
| c == pathSeparator = "_"
| c == '_' = "__"
| otherwise = [c]
| c == P.pathSeparator = fromIntegral (ord '_')
| otherwise = c
{- Converts a journal file (relative to the journal dir) back to the
- filename on the branch. -}
fileJournal :: FilePath -> FilePath
fileJournal = replace [pathSeparator, pathSeparator] "_" .
replace "_" [pathSeparator]
fileJournal :: RawFilePath -> RawFilePath
fileJournal = S.map unmangle
where
unmangle c
| c == fromIntegral (ord '_') = P.pathSeparator
| otherwise = c
{- Sentinal value, only produced by lockJournal; required
- as a parameter by things that need to ensure the journal is

View file

@ -39,6 +39,7 @@ import qualified Utility.RawFilePath as R
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import qualified System.FilePath.ByteString as P
type LinkTarget = String
@ -182,7 +183,7 @@ restagePointerFile (Restage True) f orig = withTSDelta $ \tsd -> do
absf <- liftIO $ absPath $ fromRawFilePath f
Annex.Queue.addInternalAction runner [(absf, isunmodified tsd)]
where
isunmodified tsd = genInodeCache' f tsd >>= return . \case
isunmodified tsd = genInodeCache f tsd >>= return . \case
Nothing -> False
Just new -> compareStrong orig new
@ -301,8 +302,7 @@ isLinkToAnnex s = p `S.isInfixOf` s
|| p' `S.isInfixOf` s
#endif
where
sp = (pathSeparator:objectDir)
p = toRawFilePath sp
p = P.pathSeparator `S.cons` objectDir'
#ifdef mingw32_HOST_OS
p' = toRawFilePath (toInternalGitPath sp)
p' = toInternalGitPath p
#endif

View file

@ -16,6 +16,7 @@ module Annex.Locations (
keyPath,
annexDir,
objectDir,
objectDir',
gitAnnexLocation,
gitAnnexLocationDepth,
gitAnnexLink,
@ -62,6 +63,7 @@ module Annex.Locations (
gitAnnexFeedState,
gitAnnexMergeDir,
gitAnnexJournalDir,
gitAnnexJournalDir',
gitAnnexJournalLock,
gitAnnexGitQueueLock,
gitAnnexPreCommitLock,
@ -105,6 +107,7 @@ import qualified Git.Types as Git
import Git.FilePath
import Annex.DirHashes
import Annex.Fixup
import qualified Utility.RawFilePath as R
{- Conventions:
-
@ -124,21 +127,27 @@ import Annex.Fixup
annexDir :: FilePath
annexDir = addTrailingPathSeparator "annex"
annexDir' :: RawFilePath
annexDir' = P.addTrailingPathSeparator "annex"
{- The directory git annex uses for locally available object content,
- relative to the .git directory -}
objectDir :: FilePath
objectDir = addTrailingPathSeparator $ annexDir </> "objects"
objectDir' :: RawFilePath
objectDir' = P.addTrailingPathSeparator $ annexDir' P.</> "objects"
{- Annexed file's possible locations relative to the .git directory.
- There are two different possibilities, using different hashes.
-
- Also, some repositories have a Difference in hash directory depth.
-}
annexLocations :: GitConfig -> Key -> [FilePath]
annexLocations :: GitConfig -> Key -> [RawFilePath]
annexLocations config key = map (annexLocation config key) dirHashes
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> FilePath
annexLocation config key hasher = objectDir </> keyPath key (hasher $ objectHashLevels config)
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath
annexLocation config key hasher = objectDir' P.</> keyPath key (hasher $ objectHashLevels config)
{- Number of subdirectories from the gitAnnexObjectDir
- to the gitAnnexLocation. -}
@ -158,14 +167,14 @@ gitAnnexLocationDepth config = hashlevels + 1
- This does not take direct mode into account, so in direct mode it is not
- the actual location of the file's content.
-}
gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLocation key r config = gitAnnexLocation' key r config
(annexCrippledFileSystem config)
(coreSymlinks config)
doesFileExist
(fromRawFilePath (Git.localGitDir r))
R.doesPathExist
(Git.localGitDir r)
gitAnnexLocation' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (FilePath -> IO Bool) -> FilePath -> IO FilePath
gitAnnexLocation' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (RawFilePath -> IO Bool) -> RawFilePath -> IO RawFilePath
gitAnnexLocation' key r config crippled symlinkssupported checker gitdir
{- Bare repositories default to hashDirLower for new
- content, as it's more portable. But check all locations. -}
@ -187,7 +196,7 @@ gitAnnexLocation' key r config crippled symlinkssupported checker gitdir
only = return . inrepo . annexLocation config key
checkall = check $ map inrepo $ annexLocations config key
inrepo d = gitdir </> d
inrepo d = gitdir P.</> d
check locs@(l:_) = fromMaybe l <$> firstM checker locs
check [] = error "internal"
@ -199,16 +208,17 @@ gitAnnexLink file key r config = do
let gitdir = getgitdir currdir
loc <- gitAnnexLocation' key r config False False (\_ -> return True) gitdir
fromRawFilePath . toInternalGitPath . toRawFilePath
<$> relPathDirToFile (parentDir absfile) loc
<$> relPathDirToFile (parentDir absfile) (fromRawFilePath loc)
where
getgitdir currdir
{- This special case is for git submodules on filesystems not
- supporting symlinks; generate link target that will
- work portably. -}
| not (coreSymlinks config) && needsSubmoduleFixup r =
absNormPathUnix currdir $ fromRawFilePath $
Git.repoPath r P.</> ".git"
| otherwise = fromRawFilePath $ Git.localGitDir r
toRawFilePath $
absNormPathUnix currdir $ fromRawFilePath $
Git.repoPath r P.</> ".git"
| otherwise = Git.localGitDir r
absNormPathUnix d p = fromRawFilePath $ toInternalGitPath $ toRawFilePath $
absPathFrom
(fromRawFilePath $ toInternalGitPath $ toRawFilePath d)
@ -232,33 +242,36 @@ gitAnnexLinkCanonical file key r config = gitAnnexLink file key r' config'
gitAnnexContentLock :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexContentLock key r config = do
loc <- gitAnnexLocation key r config
return $ loc ++ ".lck"
return $ fromRawFilePath loc ++ ".lck"
{- File that maps from a key to the file(s) in the git repository.
- Used in direct mode. -}
gitAnnexMapping :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexMapping key r config = do
loc <- gitAnnexLocation key r config
return $ loc ++ ".map"
return $ fromRawFilePath loc ++ ".map"
{- File that caches information about a key's content, used to determine
- if a file has changed.
- Used in direct mode. -}
gitAnnexInodeCache :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexInodeCache key r config = do
gitAnnexInodeCache key r config = do
loc <- gitAnnexLocation key r config
return $ loc ++ ".cache"
return $ fromRawFilePath loc ++ ".cache"
gitAnnexInodeSentinal :: Git.Repo -> FilePath
gitAnnexInodeSentinal r = gitAnnexDir r </> "sentinal"
gitAnnexInodeSentinal :: Git.Repo -> RawFilePath
gitAnnexInodeSentinal r = gitAnnexDir' r P.</> "sentinal"
gitAnnexInodeSentinalCache :: Git.Repo -> FilePath
gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r ++ ".cache"
gitAnnexInodeSentinalCache :: Git.Repo -> RawFilePath
gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r <> ".cache"
{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> FilePath
gitAnnexDir r = addTrailingPathSeparator $ fromRawFilePath (Git.localGitDir r) </> annexDir
gitAnnexDir' :: Git.Repo -> RawFilePath
gitAnnexDir' r = P.addTrailingPathSeparator $ Git.localGitDir r P.</> annexDir'
{- The part of the annex directory where file contents are stored. -}
gitAnnexObjectDir :: Git.Repo -> FilePath
gitAnnexObjectDir r = addTrailingPathSeparator $ fromRawFilePath (Git.localGitDir r) </> objectDir
@ -428,6 +441,9 @@ gitAnnexTransferDir r = addTrailingPathSeparator $ gitAnnexDir r </> "transfer"
gitAnnexJournalDir :: Git.Repo -> FilePath
gitAnnexJournalDir r = addTrailingPathSeparator $ gitAnnexDir r </> "journal"
gitAnnexJournalDir' :: Git.Repo -> RawFilePath
gitAnnexJournalDir' r = P.addTrailingPathSeparator $ gitAnnexDir' r P.</> "journal"
{- Lock file for the journal. -}
gitAnnexJournalLock :: Git.Repo -> FilePath
gitAnnexJournalLock r = gitAnnexDir r </> "journal.lck"
@ -609,10 +625,10 @@ fileKey' = deserializeKey' . S8.intercalate "/" . map go . S8.split '%'
- The file is put in a directory with the same name, this allows
- write-protecting the directory to avoid accidental deletion of the file.
-}
keyPath :: Key -> Hasher -> FilePath
keyPath key hasher = hasher key </> f </> f
keyPath :: Key -> Hasher -> RawFilePath
keyPath key hasher = hasher key P.</> f P.</> f
where
f = keyFile key
f = keyFile' key
{- All possibile locations to store a key in a special remote
- using different directory hashes.
@ -620,5 +636,5 @@ keyPath key hasher = hasher key </> f </> f
- This is compatible with the annexLocations, for interoperability between
- special remotes and git-annex repos.
-}
keyPaths :: Key -> [FilePath]
keyPaths :: Key -> [RawFilePath]
keyPaths key = map (\h -> keyPath key (h def)) dirHashes

View file

@ -101,13 +101,14 @@ scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $
Just k' | k' == k -> do
destmode <- liftIO $ catchMaybeIO $
fileMode <$> R.getFileStatus f
ic <- replaceFile (fromRawFilePath f) $ \tmp ->
ic <- replaceFile (fromRawFilePath f) $ \tmp -> do
let tmp' = toRawFilePath tmp
linkFromAnnex k tmp destmode >>= \case
LinkAnnexOk ->
withTSDelta (liftIO . genInodeCache tmp)
withTSDelta (liftIO . genInodeCache tmp')
LinkAnnexNoop -> return Nothing
LinkAnnexFailed -> liftIO $ do
writePointerFile (toRawFilePath tmp) k destmode
writePointerFile tmp' k destmode
return Nothing
maybe noop (restagePointerFile (Restage True) f) ic
_ -> noop

View file

@ -308,7 +308,7 @@ handleAdds lockdowndir havelsof delayadd cs = returnWhen (null incomplete) $ do
if M.null m
then forM toadd (add cfg)
else forM toadd $ \c -> do
mcache <- liftIO $ genInodeCache (changeFile c) delta
mcache <- liftIO $ genInodeCache (toRawFilePath (changeFile c)) delta
case mcache of
Nothing -> add cfg c
Just cache ->

View file

@ -113,7 +113,7 @@ distributionDownloadComplete d dest cleanup t
| transferDirection t == Download = do
debug ["finished downloading git-annex distribution"]
maybe (failedupgrade "bad download") go
=<< liftAnnex (withObjectLoc k fsckit)
=<< liftAnnex (withObjectLoc k (fsckit . fromRawFilePath))
| otherwise = cleanup
where
k = mkKey $ const $ distributionKey d

View file

@ -1,13 +1,9 @@
git-annex (7.20191115) UNRELEASED; urgency=medium
* Sped up many git-annex commands that operate on many files, by
using ByteStrings. Some commands like find got up to 60% faster.
* Sped up many git-annex commands that operate on many files, by
avoiding reserialization of keys.
find got 7% faster; whereis 3% faster; and git-annex get when
all files are already present got 5% faster
* Sped up many git-annex commands that query the git-annex branch.
In particular whereis got 1.5% faster.
* Optimised processing of many files, especially by commands like find
and whereis that only report on the state of the repository. Commands
like get also sped up in cases where they have to check a lot of
files but only transfer a few files. Speedups range from 30-100%.
* Stop displaying rsync progress, and use git-annex's own progress display
for local-to-local repo transfers.
* git-lfs: The url provided to initremote/enableremote will now be

View file

@ -131,7 +131,7 @@ withUnmodifiedUnlockedPointers a l = seekActions $
isUnmodifiedUnlocked :: RawFilePath -> Annex Bool
isUnmodifiedUnlocked f = catKeyFile f >>= \case
Nothing -> return False
Just k -> sameInodeCache (fromRawFilePath f) =<< Database.Keys.getInodeCaches k
Just k -> sameInodeCache f =<< Database.Keys.getInodeCaches k
{- Finds files that may be modified. -}
withFilesMaybeModified :: (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek

View file

@ -9,6 +9,9 @@ module Command.ContentLocation where
import Command
import Annex.Content
import qualified Utility.RawFilePath as R
import qualified Data.ByteString.Char8 as B8
cmd :: Command
cmd = noCommit $ noMessages $
@ -20,10 +23,10 @@ cmd = noCommit $ noMessages $
run :: () -> String -> Annex Bool
run _ p = do
let k = fromMaybe (giveup "bad key") $ deserializeKey p
maybe (return False) (\f -> liftIO (putStrLn f) >> return True)
maybe (return False) (\f -> liftIO (B8.putStrLn f) >> return True)
=<< inAnnex' (pure True) Nothing check k
where
check f = ifM (liftIO (doesFileExist f))
check f = ifM (liftIO (R.doesPathExist f))
( return (Just f)
, return Nothing
)

View file

@ -90,7 +90,8 @@ fixupReq req@(Req {}) =
v <- getAnnexLinkTarget' (toRawFilePath (getfile r)) False
case parseLinkTargetOrPointer =<< v of
Nothing -> return r
Just k -> withObjectLoc k (pure . setfile r)
Just k -> withObjectLoc k $
pure . setfile r . fromRawFilePath
_ -> return r
externalDiffer :: String -> [String] -> Differ

View file

@ -93,8 +93,8 @@ keyVars key =
, ("bytesize", size show)
, ("humansize", size $ roughSize storageUnits True)
, ("keyname", decodeBS $ fromKey keyName key)
, ("hashdirlower", hashDirLower def key)
, ("hashdirmixed", hashDirMixed def key)
, ("hashdirlower", fromRawFilePath $ hashDirLower def key)
, ("hashdirmixed", fromRawFilePath $ hashDirMixed def key)
, ("mtime", whenavail show $ fromKey keyMtime key)
]
where

View file

@ -53,11 +53,11 @@ start fixwhat file key = do
where
fixby = starting "fix" (mkActionItem (key, file))
fixthin = do
obj <- calcRepo $ gitAnnexLocation key
stopUnless (isUnmodified key (fromRawFilePath file) <&&> isUnmodified key obj) $ do
obj <- calcRepo (gitAnnexLocation key)
stopUnless (isUnmodified key file <&&> isUnmodified key obj) $ do
thin <- annexThin <$> Annex.getGitConfig
fs <- liftIO $ catchMaybeIO $ R.getFileStatus file
os <- liftIO $ catchMaybeIO $ getFileStatus obj
os <- liftIO $ catchMaybeIO $ R.getFileStatus obj
case (linkCount <$> fs, linkCount <$> os, thin) of
(Just 1, Just 1, True) ->
fixby $ makeHardLink file key
@ -65,15 +65,16 @@ start fixwhat file key = do
fixby $ breakHardLink file key obj
_ -> stop
breakHardLink :: RawFilePath -> Key -> FilePath -> CommandPerform
breakHardLink :: RawFilePath -> Key -> RawFilePath -> CommandPerform
breakHardLink file key obj = do
replaceFile (fromRawFilePath file) $ \tmp -> do
mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus file
unlessM (checkedCopyFile key obj tmp mode) $
let obj' = fromRawFilePath obj
unlessM (checkedCopyFile key obj' tmp mode) $
error "unable to break hard link"
thawContent tmp
modifyContent obj $ freezeContent obj
Database.Keys.storeInodeCaches key [fromRawFilePath file]
modifyContent obj' $ freezeContent obj'
Database.Keys.storeInodeCaches key [file]
next $ return True
makeHardLink :: RawFilePath -> Key -> CommandPerform

View file

@ -223,7 +223,7 @@ fixLink key file = do
- in this repository only. -}
verifyLocationLog :: Key -> KeyStatus -> ActionItem -> Annex Bool
verifyLocationLog key keystatus ai = do
obj <- calcRepo $ gitAnnexLocation key
obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
present <- if isKeyUnlockedThin keystatus
then liftIO (doesFileExist obj)
else inAnnex key
@ -332,11 +332,11 @@ verifyWorkTree key file = do
ifM (annexThin <$> Annex.getGitConfig)
( void $ linkFromAnnex key tmp mode
, do
obj <- calcRepo $ gitAnnexLocation key
obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
void $ checkedCopyFile key obj tmp mode
thawContent tmp
)
Database.Keys.storeInodeCaches key [fromRawFilePath file]
Database.Keys.storeInodeCaches key [file]
_ -> return ()
return True
@ -349,8 +349,8 @@ checkKeySize :: Key -> KeyStatus -> ActionItem -> Annex Bool
checkKeySize _ KeyUnlockedThin _ = return True
checkKeySize key _ ai = do
file <- calcRepo $ gitAnnexLocation key
ifM (liftIO $ doesFileExist file)
( checkKeySizeOr badContent key file ai
ifM (liftIO $ R.doesPathExist file)
( checkKeySizeOr badContent key (fromRawFilePath file) ai
, return True
)
@ -417,10 +417,10 @@ checkKeyUpgrade _ _ _ (AssociatedFile Nothing) =
-}
checkBackend :: Backend -> Key -> KeyStatus -> AssociatedFile -> Annex Bool
checkBackend backend key keystatus afile = do
content <- calcRepo $ gitAnnexLocation key
content <- calcRepo (gitAnnexLocation key)
ifM (pure (isKeyUnlockedThin keystatus) <&&> (not <$> isUnmodified key content))
( nocheck
, checkBackendOr badContent backend key content ai
, checkBackendOr badContent backend key (fromRawFilePath content) ai
)
where
nocheck = return True
@ -670,8 +670,8 @@ isKeyUnlockedThin KeyMissing = False
getKeyStatus :: Key -> Annex KeyStatus
getKeyStatus key = catchDefaultIO KeyMissing $ do
afs <- not . null <$> Database.Keys.getAssociatedFiles key
obj <- calcRepo $ gitAnnexLocation key
multilink <- ((> 1) . linkCount <$> liftIO (getFileStatus obj))
obj <- calcRepo (gitAnnexLocation key)
multilink <- ((> 1) . linkCount <$> liftIO (R.getFileStatus obj))
return $ if multilink && afs
then KeyUnlockedThin
else KeyPresent

View file

@ -181,7 +181,7 @@ startLocal largematcher mode (srcfile, destfile) =
-- weakly the same as the origianlly locked down file's
-- inode cache. (Since the file may have been copied,
-- its inodes may not be the same.)
newcache <- withTSDelta $ liftIO . genInodeCache destfile
newcache <- withTSDelta $ liftIO . genInodeCache (toRawFilePath destfile)
let unchanged = case (newcache, inodeCache (keySource ld)) of
(_, Nothing) -> True
(Just newc, Just c) | compareWeak c newc -> True

View file

@ -20,6 +20,7 @@ import qualified Database.Keys
import Annex.Ingest
import Logs.Location
import Git.FilePath
import qualified Utility.RawFilePath as R
cmd :: Command
cmd = withGlobalOptions [jsonOptions, annexedMatchingOptions] $
@ -43,7 +44,7 @@ startNew file key = ifM (isJust <$> isAnnexLink file)
| key' == key = cont
| otherwise = errorModified
go Nothing =
ifM (isUnmodified key (fromRawFilePath file))
ifM (isUnmodified key file)
( cont
, ifM (Annex.getState Annex.force)
( cont
@ -56,37 +57,38 @@ performNew :: RawFilePath -> Key -> CommandPerform
performNew file key = do
lockdown =<< calcRepo (gitAnnexLocation key)
addLink (fromRawFilePath file) key
=<< withTSDelta (liftIO . genInodeCache' file)
=<< withTSDelta (liftIO . genInodeCache file)
next $ cleanupNew file key
where
lockdown obj = do
ifM (isUnmodified key obj)
( breakhardlink obj
, repopulate obj
, repopulate (fromRawFilePath obj)
)
whenM (liftIO $ doesFileExist obj) $
freezeContent obj
whenM (liftIO $ R.doesPathExist obj) $
freezeContent $ fromRawFilePath obj
-- It's ok if the file is hard linked to obj, but if some other
-- associated file is, we need to break that link to lock down obj.
breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (getFileStatus obj)) $ do
mfc <- withTSDelta (liftIO . genInodeCache' file)
breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (R.getFileStatus obj)) $ do
mfc <- withTSDelta (liftIO . genInodeCache file)
unlessM (sameInodeCache obj (maybeToList mfc)) $ do
modifyContent obj $ replaceFile obj $ \tmp -> do
unlessM (checkedCopyFile key obj tmp Nothing) $
let obj' = fromRawFilePath obj
modifyContent obj' $ replaceFile obj' $ \tmp -> do
unlessM (checkedCopyFile key obj' tmp Nothing) $
giveup "unable to lock file"
Database.Keys.storeInodeCaches key [obj]
-- Try to repopulate obj from an unmodified associated file.
repopulate obj = modifyContent obj $ do
g <- Annex.gitRepo
fs <- map fromRawFilePath . map (`fromTopFilePath` g)
fs <- map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
mfile <- firstM (isUnmodified key) fs
liftIO $ nukeFile obj
case mfile of
Just unmodified ->
unlessM (checkedCopyFile key unmodified obj Nothing)
unlessM (checkedCopyFile key (fromRawFilePath unmodified) obj Nothing)
lostcontent
Nothing -> lostcontent

View file

@ -86,7 +86,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey (fastMigrate oldbacken
content <- calcRepo $ gitAnnexLocation oldkey
let source = KeySource
{ keyFilename = fromRawFilePath file
, contentLocation = content
, contentLocation = fromRawFilePath content
, inodeCache = Nothing
}
v <- genKey source nullMeterUpdate (Just newbackend)

View file

@ -137,7 +137,8 @@ send ups fs = do
mk <- lookupFile f
case mk of
Nothing -> noop
Just k -> withObjectLoc k (addlist f)
Just k -> withObjectLoc k $
addlist f . fromRawFilePath
liftIO $ hClose h
serverkey <- uftpKey

View file

@ -83,12 +83,12 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file)
- unlocked file, which would leave the new key unlocked
- and vulnerable to corruption. -}
( getViaTmpFromDisk RetrievalAllKeysSecure DefaultVerify newkey $ \tmp -> unVerified $ do
oldobj <- calcRepo (gitAnnexLocation oldkey)
oldobj <- fromRawFilePath <$> calcRepo (gitAnnexLocation oldkey)
isJust <$> linkOrCopy' (return True) newkey oldobj tmp Nothing
, do
{- The file being rekeyed is itself an unlocked file; if
- it's hard linked to the old key, that link must be broken. -}
oldobj <- calcRepo (gitAnnexLocation oldkey)
oldobj <- fromRawFilePath <$> calcRepo (gitAnnexLocation oldkey)
v <- tryNonAsync $ do
st <- liftIO $ R.getFileStatus file
when (linkCount st > 1) $ do
@ -97,7 +97,7 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file)
unlessM (checkedCopyFile oldkey oldobj tmp Nothing) $
error "can't lock old key"
thawContent tmp
ic <- withTSDelta (liftIO . genInodeCache' file)
ic <- withTSDelta (liftIO . genInodeCache file)
case v of
Left e -> do
warning (show e)

View file

@ -108,7 +108,7 @@ clean file = do
-- annexed and is unmodified.
case oldkey of
Nothing -> doingest oldkey
Just ko -> ifM (isUnmodifiedCheap ko file)
Just ko -> ifM (isUnmodifiedCheap ko (toRawFilePath file))
( liftIO $ emitPointer ko
, doingest oldkey
)
@ -174,7 +174,7 @@ shouldAnnex file moldkey = ifM (annexGitAddToAnnex <$> Annex.getGitConfig)
Just _ -> return True
Nothing -> checkknowninode
checkknowninode = withTSDelta (liftIO . genInodeCache file) >>= \case
checkknowninode = withTSDelta (liftIO . genInodeCache (toRawFilePath file)) >>= \case
Nothing -> pure False
Just ic -> Database.Keys.isInodeKnown ic =<< sentinalStatus
@ -191,7 +191,7 @@ emitPointer = S.putStr . formatPointer
getMoveRaceRecovery :: Key -> RawFilePath -> Annex ()
getMoveRaceRecovery k file = void $ tryNonAsync $
whenM (inAnnex k) $ do
obj <- toRawFilePath <$> calcRepo (gitAnnexLocation k)
obj <- calcRepo (gitAnnexLocation k)
-- Cannot restage because git add is running and has
-- the index locked.
populatePointerFile (Restage False) k obj file >>= \case
@ -207,7 +207,7 @@ updateSmudged :: Restage -> Annex ()
updateSmudged restage = streamSmudged $ \k topf -> do
f <- fromRepo (fromTopFilePath topf)
whenM (inAnnex k) $ do
obj <- toRawFilePath <$> calcRepo (gitAnnexLocation k)
obj <- calcRepo (gitAnnexLocation k)
unlessM (isJust <$> populatePointerFile restage k obj f) $
liftIO (isPointerFile f) >>= \case
Just k' | k' == k -> toplevelWarning False $

View file

@ -168,7 +168,7 @@ test st r k = catMaybes
get
, Just $ check "fsck downloaded object" fsck
, Just $ check "retrieveKeyFile resume from 33%" $ do
loc <- Annex.calcRepo (gitAnnexLocation k)
loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k)
tmp <- prepTmp k
partial <- liftIO $ bracket (openBinaryFile loc ReadMode) hClose $ \h -> do
sz <- hFileSize h
@ -184,7 +184,7 @@ test st r k = catMaybes
get
, Just $ check "fsck downloaded object" fsck
, Just $ check "retrieveKeyFile resume from end" $ do
loc <- Annex.calcRepo (gitAnnexLocation k)
loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k)
tmp <- prepTmp k
void $ liftIO $ copyFileExternal CopyAllMetaData loc tmp
lockContentForRemoval k removeAnnex
@ -240,7 +240,7 @@ testExportTree st (Just _) ea k1 k2 =
check desc a = testCase desc $
Annex.eval st (Annex.setOutput QuietOutput >> a) @? "failed"
storeexport k = do
loc <- Annex.calcRepo (gitAnnexLocation k)
loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k)
Remote.storeExport ea loc k testexportlocation nullMeterUpdate
retrieveexport k = withTmpFile "exported" $ \tmp h -> do
liftIO $ hClose h

View file

@ -46,7 +46,7 @@ perform file key = do
cleanup :: RawFilePath -> Key -> CommandCleanup
cleanup file key = do
Database.Keys.removeAssociatedFile key =<< inRepo (toTopFilePath file)
src <- calcRepo $ gitAnnexLocation key
src <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
ifM (Annex.getState Annex.fast)
( do
-- Only make a hard link if the annexed file does not

View file

@ -17,6 +17,7 @@ import qualified Database.Keys
import Annex.Content
import Annex.Init
import Utility.FileMode
import qualified Utility.RawFilePath as R
cmd :: Command
cmd = addCheck check $
@ -117,5 +118,5 @@ removeUnannexed = go []
, go (k:c) ks
)
enoughlinks f = catchBoolIO $ do
s <- getFileStatus f
s <- R.getFileStatus f
return $ linkCount s > 1

View file

@ -283,7 +283,7 @@ associatedFilesFilter = filterM go
checkunmodified _ [] = return True
checkunmodified cs (f:fs) = do
relf <- fromRepo $ fromTopFilePath f
ifM (sameInodeCache (fromRawFilePath relf) cs)
ifM (sameInodeCache relf cs)
( return False
, checkunmodified cs fs
)

View file

@ -169,13 +169,13 @@ removeAssociatedFile :: Key -> TopFilePath -> Annex ()
removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile (toIKey k)
{- Stats the files, and stores their InodeCaches. -}
storeInodeCaches :: Key -> [FilePath] -> Annex ()
storeInodeCaches :: Key -> [RawFilePath] -> Annex ()
storeInodeCaches k fs = storeInodeCaches' k fs []
storeInodeCaches' :: Key -> [FilePath] -> [InodeCache] -> Annex ()
storeInodeCaches' :: Key -> [RawFilePath] -> [InodeCache] -> Annex ()
storeInodeCaches' k fs ics = withTSDelta $ \d ->
addInodeCaches k . (++ ics) . catMaybes
=<< liftIO (mapM (`genInodeCache` d) fs)
=<< liftIO (mapM (\f -> genInodeCache f d) fs)
addInodeCaches :: Key -> [InodeCache] -> Annex ()
addInodeCaches k is = runWriterIO $ SQL.addInodeCaches (toIKey k) is
@ -223,7 +223,7 @@ reconcileStaged :: H.DbQueue -> Annex ()
reconcileStaged qh = do
gitindex <- inRepo currentIndexFile
indexcache <- fromRepo gitAnnexKeysDbIndexCache
withTSDelta (liftIO . genInodeCache gitindex) >>= \case
withTSDelta (liftIO . genInodeCache (toRawFilePath gitindex)) >>= \case
Just cur ->
liftIO (maybe Nothing readInodeCache <$> catchMaybeIO (readFile indexcache)) >>= \case
Nothing -> go cur indexcache
@ -295,10 +295,10 @@ reconcileStaged qh = do
keyloc <- calcRepo (gitAnnexLocation key)
keypopulated <- sameInodeCache keyloc caches
p <- fromRepo $ fromTopFilePath file
filepopulated <- sameInodeCache (fromRawFilePath p) caches
filepopulated <- sameInodeCache p caches
case (keypopulated, filepopulated) of
(True, False) ->
populatePointerFile (Restage True) key (toRawFilePath keyloc) p >>= \case
populatePointerFile (Restage True) key keyloc p >>= \case
Nothing -> return ()
Just ic -> liftIO $
SQL.addInodeCaches ikey [ic] (SQL.WriteHandle qh)

View file

@ -33,6 +33,7 @@ import Git.Types (RefDate(..))
import Utility.Glob
import Utility.HumanTime
import Utility.DataUnits
import qualified Utility.RawFilePath as R
import Data.Time.Clock.POSIX
import qualified Data.Set as S
@ -117,7 +118,8 @@ addMagicLimit limitname querymagic selectprovidedinfo glob = do
-- When the file is an annex symlink, get magic of the
-- object file.
Nothing -> isAnnexLink (toRawFilePath f) >>= \case
Just k -> withObjectLoc k $ querymagic magic
Just k -> withObjectLoc k $
querymagic magic . fromRawFilePath
Nothing -> querymagic magic f
matchMagic :: String -> (Magic -> FilePath -> Annex (Maybe String)) -> (ProvidedInfo -> OptInfo String) -> Maybe Magic -> MkLimit Annex
@ -363,7 +365,7 @@ addAccessedWithin duration = do
where
check now k = inAnnexCheck k $ \f ->
liftIO $ catchDefaultIO False $ do
s <- getFileStatus f
s <- R.getFileStatus f
let accessed = realToFrac (accessTime s)
let delta = now - accessed
return $ delta <= secs

25
Logs.hs
View file

@ -13,6 +13,7 @@ import Annex.Common
import Annex.DirHashes
import qualified Data.ByteString as S
import qualified System.FilePath.ByteString as P
{- There are several varieties of log file formats. -}
data LogVariety
@ -117,19 +118,19 @@ exportLog = "export.log"
{- The pathname of the location log file for a given key. -}
locationLogFile :: GitConfig -> Key -> RawFilePath
locationLogFile config key = toRawFilePath $
branchHashDir config key </> keyFile key ++ ".log"
locationLogFile config key =
branchHashDir config key P.</> keyFile' key <> ".log"
{- The filename of the url log for a given key. -}
urlLogFile :: GitConfig -> Key -> RawFilePath
urlLogFile config key = toRawFilePath $
branchHashDir config key </> keyFile key ++ decodeBS' urlLogExt
urlLogFile config key =
branchHashDir config key P.</> keyFile' key <> urlLogExt
{- Old versions stored the urls elsewhere. -}
oldurlLogs :: GitConfig -> Key -> [RawFilePath]
oldurlLogs config key = map toRawFilePath
[ "remote/web" </> hdir </> serializeKey key ++ ".log"
, "remote/web" </> hdir </> keyFile key ++ ".log"
oldurlLogs config key =
[ "remote/web" P.</> hdir P.</> serializeKey' key <> ".log"
, "remote/web" P.</> hdir P.</> keyFile' key <> ".log"
]
where
hdir = branchHashDir config key
@ -144,7 +145,7 @@ isUrlLog file = urlLogExt `S.isSuffixOf` file
{- The filename of the remote state log for a given key. -}
remoteStateLogFile :: GitConfig -> Key -> RawFilePath
remoteStateLogFile config key =
toRawFilePath (branchHashDir config key </> keyFile key)
(branchHashDir config key P.</> keyFile' key)
<> remoteStateLogExt
remoteStateLogExt :: S.ByteString
@ -156,7 +157,7 @@ isRemoteStateLog path = remoteStateLogExt `S.isSuffixOf` path
{- The filename of the chunk log for a given key. -}
chunkLogFile :: GitConfig -> Key -> RawFilePath
chunkLogFile config key =
toRawFilePath (branchHashDir config key </> keyFile key)
(branchHashDir config key P.</> keyFile' key)
<> chunkLogExt
chunkLogExt :: S.ByteString
@ -168,7 +169,7 @@ isChunkLog path = chunkLogExt `S.isSuffixOf` path
{- The filename of the metadata log for a given key. -}
metaDataLogFile :: GitConfig -> Key -> RawFilePath
metaDataLogFile config key =
toRawFilePath (branchHashDir config key </> keyFile key)
(branchHashDir config key P.</> keyFile' key)
<> metaDataLogExt
metaDataLogExt :: S.ByteString
@ -180,7 +181,7 @@ isMetaDataLog path = metaDataLogExt `S.isSuffixOf` path
{- The filename of the remote metadata log for a given key. -}
remoteMetaDataLogFile :: GitConfig -> Key -> RawFilePath
remoteMetaDataLogFile config key =
toRawFilePath (branchHashDir config key </> keyFile key)
(branchHashDir config key P.</> keyFile' key)
<> remoteMetaDataLogExt
remoteMetaDataLogExt :: S.ByteString
@ -192,7 +193,7 @@ isRemoteMetaDataLog path = remoteMetaDataLogExt `S.isSuffixOf` path
{- The filename of the remote content identifier log for a given key. -}
remoteContentIdentifierLogFile :: GitConfig -> Key -> RawFilePath
remoteContentIdentifierLogFile config key =
toRawFilePath (branchHashDir config key </> keyFile key)
(branchHashDir config key P.</> keyFile' key)
<> remoteContentIdentifierExt
remoteContentIdentifierExt :: S.ByteString

View file

@ -47,7 +47,7 @@ runLocal runst runner a = case a of
size <- liftIO $ catchDefaultIO 0 $ getFileSize f
runner (next (Len size))
ContentSize k next -> do
let getsize = liftIO . catchMaybeIO . getFileSize
let getsize = liftIO . catchMaybeIO . getFileSize . fromRawFilePath
size <- inAnnex' isJust Nothing getsize k
runner (next (Len <$> size))
ReadContent k af o sender next -> do

View file

@ -212,7 +212,7 @@ androidHashDir :: AndroidPath -> Key -> AndroidPath
androidHashDir adir k = AndroidPath $
fromAndroidPath adir ++ "/" ++ hdir
where
hdir = replace [pathSeparator] "/" (hashDirLower def k)
hdir = replace [pathSeparator] "/" (fromRawFilePath (hashDirLower def k))
storeExportM :: AndroidSerial -> AndroidPath -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool
storeExportM serial adir src _k loc _p = store' serial dest src

View file

@ -127,7 +127,7 @@ directorySetup _ mu _ c gc = do
- We try more than one since we used to write to different hash
- directories. -}
locations :: FilePath -> Key -> [FilePath]
locations d k = map (d </>) (keyPaths k)
locations d k = map (\f -> d </> fromRawFilePath f) (keyPaths k)
{- Returns the location off a Key in the directory. If the key is
- present, returns the location that is actually used, otherwise
@ -139,7 +139,8 @@ getLocation d k = do
{- Directory where the file(s) for a key are stored. -}
storeDir :: FilePath -> Key -> FilePath
storeDir d k = addTrailingPathSeparator $ d </> hashDirLower def k </> keyFile k
storeDir d k = addTrailingPathSeparator $
d </> fromRawFilePath (hashDirLower def k) </> keyFile k
{- Check if there is enough free disk space in the remote's directory to
- store the key. Note that the unencrypted key size is checked. -}

View file

@ -383,9 +383,9 @@ handleRequest' st external req mp responsehandler
handleRemoteRequest (PROGRESS bytesprocessed) =
maybe noop (\a -> liftIO $ a bytesprocessed) mp
handleRemoteRequest (DIRHASH k) =
send $ VALUE $ hashDirMixed def k
send $ VALUE $ fromRawFilePath $ hashDirMixed def k
handleRemoteRequest (DIRHASH_LOWER k) =
send $ VALUE $ hashDirLower def k
send $ VALUE $ fromRawFilePath $ hashDirLower def k
handleRemoteRequest (SETCONFIG setting value) =
liftIO $ atomically $ modifyTVar' (externalConfig st) $
M.insert setting value

View file

@ -422,7 +422,8 @@ checkKey' repo r rsyncopts k
{- Annexed objects are hashed using lower-case directories for max
- portability. -}
gCryptLocation :: Git.Repo -> Key -> FilePath
gCryptLocation repo key = Git.repoLocation repo </> objectDir </> keyPath key (hashDirLower def)
gCryptLocation repo key = Git.repoLocation repo </> objectDir
</> fromRawFilePath (keyPath key (hashDirLower def))
data AccessMethod = AccessDirect | AccessShell

View file

@ -61,6 +61,7 @@ import Creds
import Types.NumCopies
import Annex.Action
import Messages.Progress
import qualified Utility.RawFilePath as R
#ifndef mingw32_HOST_OS
import Utility.FileMode
@ -393,9 +394,9 @@ keyUrls gc repo r key = map tourl locs'
| remoteAnnexBare remoteconfig == Just False = reverse (annexLocations gc key)
| otherwise = annexLocations gc key
#ifndef mingw32_HOST_OS
locs' = locs
locs' = map fromRawFilePath locs
#else
locs' = map (replace "\\" "/") locs
locs' = map (replace "\\" "/" . fromRawFilePath) locs
#endif
remoteconfig = gitconfig r
@ -599,9 +600,9 @@ copyFromRemoteCheap' repo r st key af file
| not $ Git.repoIsUrl repo = guardUsable repo (return False) $ do
gc <- getGitConfigFromState st
loc <- liftIO $ gitAnnexLocation key repo gc
liftIO $ ifM (doesFileExist loc)
liftIO $ ifM (R.doesPathExist loc)
( do
absloc <- absPath loc
absloc <- absPath (fromRawFilePath loc)
catchBoolIO $ do
createSymbolicLink absloc file
return True

View file

@ -104,7 +104,8 @@ hookEnv action k f = Just <$> mergeenv (fileenv f ++ keyenv)
]
fileenv Nothing = []
fileenv (Just file) = [envvar "FILE" file]
hashbits = map takeDirectory $ splitPath $ hashDirMixed def k
hashbits = map takeDirectory $ splitPath $
fromRawFilePath $ hashDirMixed def k
lookupHook :: HookName -> Action -> Annex (Maybe String)
lookupHook hookname action = do

View file

@ -183,7 +183,7 @@ rsyncSetup _ mu _ c gc = do
store :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool
store o k src meterupdate = storeGeneric o meterupdate basedest populatedest
where
basedest = Prelude.head (keyPaths k)
basedest = fromRawFilePath $ Prelude.head (keyPaths k)
populatedest dest = liftIO $ if canrename
then do
rename src dest
@ -222,7 +222,7 @@ remove :: RsyncOpts -> Remover
remove o k = removeGeneric o includes
where
includes = concatMap use dirHashes
use h = let dir = h def k in
use h = let dir = fromRawFilePath (h def k) in
[ parentDir dir
, dir
-- match content directory and anything in it

View file

@ -13,13 +13,14 @@ import Types
import Annex.Locations
import Utility.Rsync
import Utility.SafeCommand
import Data.Default
import System.FilePath.Posix
import Utility.FileSystemEncoding
import Annex.DirHashes
#ifdef mingw32_HOST_OS
import Utility.Split
#endif
import Annex.DirHashes
import Data.Default
import System.FilePath.Posix
type RsyncUrl = String
@ -42,7 +43,7 @@ mkRsyncUrl o f = rsyncUrl o </> rsyncEscape o f
rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl]
rsyncUrls o k = map use dirHashes
where
use h = rsyncUrl o </> hash h </> rsyncEscape o (f </> f)
use h = rsyncUrl o </> fromRawFilePath (hash h) </> rsyncEscape o (f </> f)
f = keyFile k
#ifndef mingw32_HOST_OS
hash h = h def k

View file

@ -39,9 +39,9 @@ keyDir :: Key -> DavLocation
keyDir k = addTrailingPathSeparator $ hashdir </> keyFile k
where
#ifndef mingw32_HOST_OS
hashdir = hashDirLower def k
hashdir = fromRawFilePath $ hashDirLower def k
#else
hashdir = replace "\\" "/" (hashDirLower def k)
hashdir = replace "\\" "/" (fromRawFilePath $ hashDirLower def k)
#endif
keyLocation :: Key -> DavLocation

View file

@ -1638,7 +1638,8 @@ test_crypto = do
checkFile mvariant filename =
Utility.Gpg.checkEncryptionFile gpgcmd filename $
if mvariant == Just Types.Crypto.PubKey then ks else Nothing
serializeKeys cipher = Annex.Locations.keyPaths .
serializeKeys cipher = map fromRawFilePath .
Annex.Locations.keyPaths .
Crypto.encryptKey Types.Crypto.HmacSha1 cipher
#else
test_crypto = putStrLn "gpg testing not implemented on Windows"

View file

@ -236,9 +236,9 @@ logFile1 repo key = Upgrade.V2.gitStateDir repo ++ keyFile1 key ++ ".log"
logFile2 :: Key -> Git.Repo -> String
logFile2 = logFile' (hashDirLower def)
logFile' :: (Key -> FilePath) -> Key -> Git.Repo -> String
logFile' :: (Key -> RawFilePath) -> Key -> Git.Repo -> String
logFile' hasher key repo =
gitStateDir repo ++ hasher key ++ keyFile key ++ ".log"
gitStateDir repo ++ fromRawFilePath (hasher key) ++ keyFile key ++ ".log"
stateDir :: FilePath
stateDir = addTrailingPathSeparator ".git-annex"

View file

@ -135,7 +135,7 @@ upgradeDirectWorkTree = do
-- is just not populated with it. Since the work tree
-- file is recorded as an associated file, things will
-- still work that way, it's just not ideal.
ic <- withTSDelta (liftIO . genInodeCache f)
ic <- withTSDelta (liftIO . genInodeCache (toRawFilePath f))
void $ Content.linkToAnnex k f ic
, unlessM (Content.inAnnex k) $ do
-- Worktree file was deleted or modified;

View file

@ -107,7 +107,9 @@ removeAssociatedFiles key = do
- expected mtime and inode.
-}
goodContent :: Key -> FilePath -> Annex Bool
goodContent key file = sameInodeCache file =<< recordedInodeCache key
goodContent key file =
sameInodeCache (toRawFilePath file)
=<< recordedInodeCache key
{- Gets the recorded inode cache for a key.
-

View file

@ -22,7 +22,6 @@ module Utility.InodeCache (
readInodeCache,
showInodeCache,
genInodeCache,
genInodeCache',
toInodeCache,
likeInodeCacheWeak,
@ -182,12 +181,8 @@ readInodeCache s = case words s of
return $ InodeCache $ InodeCachePrim i sz (MTimeHighRes t)
_ -> Nothing
genInodeCache :: FilePath -> TSDelta -> IO (Maybe InodeCache)
genInodeCache :: RawFilePath -> TSDelta -> IO (Maybe InodeCache)
genInodeCache f delta = catchDefaultIO Nothing $
toInodeCache delta f =<< getFileStatus f
genInodeCache' :: RawFilePath -> TSDelta -> IO (Maybe InodeCache)
genInodeCache' f delta = catchDefaultIO Nothing $
toInodeCache delta (fromRawFilePath f) =<< R.getFileStatus f
toInodeCache :: TSDelta -> FilePath -> FileStatus -> IO (Maybe InodeCache)
@ -208,8 +203,8 @@ toInodeCache (TSDelta getdelta) f s
- Its InodeCache at the time of its creation is written to the cache file,
- so changes can later be detected. -}
data SentinalFile = SentinalFile
{ sentinalFile :: FilePath
, sentinalCacheFile :: FilePath
{ sentinalFile :: RawFilePath
, sentinalCacheFile :: RawFilePath
}
deriving (Show)
@ -226,8 +221,8 @@ noTSDelta = TSDelta (pure 0)
writeSentinalFile :: SentinalFile -> IO ()
writeSentinalFile s = do
writeFile (sentinalFile s) ""
maybe noop (writeFile (sentinalCacheFile s) . showInodeCache)
writeFile (fromRawFilePath (sentinalFile s)) ""
maybe noop (writeFile (fromRawFilePath (sentinalCacheFile s)) . showInodeCache)
=<< genInodeCache (sentinalFile s) noTSDelta
data SentinalStatus = SentinalStatus
@ -256,7 +251,7 @@ checkSentinalFile s = do
Just new -> return $ calc old new
where
loadoldcache = catchDefaultIO Nothing $
readInodeCache <$> readFile (sentinalCacheFile s)
readInodeCache <$> readFile (fromRawFilePath (sentinalCacheFile s))
gennewcache = genInodeCache (sentinalFile s) noTSDelta
calc (InodeCache (InodeCachePrim oldinode oldsize oldmtime)) (InodeCache (InodeCachePrim newinode newsize newmtime)) =
SentinalStatus (not unchanged) tsdelta
@ -281,7 +276,7 @@ checkSentinalFile s = do
dummy = SentinalStatus True noTSDelta
sentinalFileExists :: SentinalFile -> IO Bool
sentinalFileExists s = allM doesFileExist [sentinalCacheFile s, sentinalFile s]
sentinalFileExists s = allM R.doesPathExist [sentinalCacheFile s, sentinalFile s]
instance Arbitrary InodeCache where
arbitrary =

View file

@ -8,13 +8,14 @@ module Utility.MD5 where
import Data.Bits
import Data.Word
import Data.Char
display_32bits_as_dir :: Word32 -> String
display_32bits_as_dir :: Word32 -> [Word8]
display_32bits_as_dir w = trim $ swap_pairs cs
where
-- Need 32 characters to use. To avoid inaverdently making
-- a real word, use letters that appear less frequently.
chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF"
chars = map (fromIntegral . ord) (['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF")
cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
getc n = chars !! fromIntegral n
swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs

View file

@ -19,14 +19,20 @@ module Utility.RawFilePath (
readSymbolicLink,
getFileStatus,
getSymbolicLinkStatus,
doesPathExist,
) where
#ifndef mingw32_HOST_OS
import Utility.FileSystemEncoding (RawFilePath)
import System.Posix.Files.ByteString
doesPathExist :: RawFilePath -> IO Bool
doesPathExist = fileExist
#else
import qualified Data.ByteString as B
import qualified System.PosixCompat as P
import qualified System.Directory as D
import Utility.FileSystemEncoding
readSymbolicLink :: RawFilePath -> IO RawFilePath
@ -37,4 +43,7 @@ getFileStatus = P.getFileStatus . fromRawFilePath
getSymbolicLinkStatus :: RawFilePath -> IO FileStatus
getSymbolicLinkStatus = P.getSymbolicLinkStatus . fromRawFilePath
doesPathExist :: RawFilePath -> IO Bool
doesPathExist = D.doesPathExist . fromRawFilePath
#endif

View file

@ -11,26 +11,12 @@ than find so the improvement is not as large.
The `bs` branch is in a mergeable state now, but still needs work:
* Profile various commands and look for hot spots.
* Eliminate all the fromRawFilePath, toRawFilePath, encodeBS,
decodeBS conversions. Or at least most of them. There are likely
quite a few places where a value is converted back and forth several times.
As a first step, profile and look for the hot spots. Known hot spots:
some places where a value is converted back and forth several times.
* keyFile uses fromRawFilePath and that adds around 3% overhead in `git-annex find`.
Converting it to a RawFilePath needs a version of `</>` for RawFilePaths.
* getJournalFileStale uses fromRawFilePath, and adds 3-5% overhead in
`git-annex whereis`. Converting it to RawFilePath needs a version
of `</>` for RawFilePaths. It also needs a ByteString.readFile
for RawFilePath.
* System.FilePath is not available for RawFilePath, and many of the
conversions are to get a FilePath in order to use that library.
It should be entirely straightforward to make a version of System.FilePath
that can operate on RawFilePath, except possibly there could be some
complications due to Windows.
* Use versions of IO actions like getFileStatus that take a RawFilePath,
avoiding a conversion. Note that these are only available on unix, not
windows, so a compatability shim will be needed.

View file

@ -0,0 +1,40 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2019-12-11T18:16:13Z"
content="""
Updated profiling. git-annex find is now ByteString end-to-end!
Note the massive reduction in alloc, and improved runtime.
Wed Dec 11 14:41 2019 Time and Allocation Profiling Report (Final)
git-annex +RTS -p -RTS find
total time = 1.51 secs (1515 ticks @ 1000 us, 1 processor)
total alloc = 608,475,328 bytes (excludes profiling overheads)
COST CENTRE MODULE SRC %time %alloc
keyFile' Annex.Locations Annex/Locations.hs:(590,1)-(600,30) 8.2 16.6
>>=.\.succ' Data.Attoparsec.Internal.Types Data/Attoparsec/Internal/Types.hs:146:13-76 4.7 0.7
getAnnexLinkTarget'.probesymlink Annex.Link Annex/Link.hs:79:9-46 4.2 7.6
>>=.\ Data.Attoparsec.Internal.Types Data/Attoparsec/Internal/Types.hs:(146,9)-(147,44) 3.9 2.3
parseLinkTarget Annex.Link Annex/Link.hs:(255,1)-(263,25) 3.9 11.8
doesPathExist Utility.RawFilePath Utility/RawFilePath.hs:30:1-25 3.4 0.6
keyFile'.esc Annex.Locations Annex/Locations.hs:(596,9)-(600,30) 3.2 14.7
fileKey' Annex.Locations Annex/Locations.hs:(609,1)-(619,41) 3.0 4.7
parseLinkTargetOrPointer Annex.Link Annex/Link.hs:(240,1)-(244,25) 2.8 0.2
hashUpdates.\.\.\ Crypto.Hash Crypto/Hash.hs:85:48-99 2.5 0.1
combineAlways System.FilePath.Posix.ByteString System/FilePath/Posix/../Internal.hs:(698,1)-(704,67) 2.0 3.3
getState Annex Annex.hs:(251,1)-(254,27) 2.0 1.1
withPtr.makeTrampoline Basement.Block.Base Basement/Block/Base.hs:(401,5)-(404,31) 1.9 1.7
withMutablePtrHint Basement.Block.Base Basement/Block/Base.hs:(468,1)-(482,50) 1.8 1.2
parseKeyVariety Types.Key Types/Key.hs:(323,1)-(371,42) 1.8 0.0
fileKey'.go Annex.Locations Annex/Locations.hs:611:9-55 1.7 2.2
isLinkToAnnex Annex.Link Annex/Link.hs:(299,1)-(305,47) 1.7 1.0
hashDirMixed Annex.DirHashes Annex/DirHashes.hs:(82,1)-(90,27) 1.7 1.3
primitive Basement.Monad Basement/Monad.hs:72:5-18 1.6 0.1
withPtr Basement.Block.Base Basement/Block/Base.hs:(395,1)-(404,31) 1.5 1.6
mkKeySerialization Types.Key Types/Key.hs:(115,1)-(117,22) 1.1 2.8
decimal.step Data.Attoparsec.ByteString.Char8 Data/Attoparsec/ByteString/Char8.hs:448:9-49 0.8 1.2
"""]]

View file

@ -24,6 +24,7 @@ extra-deps:
- sandi-0.5
- http-client-0.5.14
- silently-1.2.5.1
- filepath-bytestring-1.4.2.1.0
explicit-setup-deps:
git-annex: true
resolver: lts-13.29