diff --git a/Annex/AutoMerge.hs b/Annex/AutoMerge.hs index d558c94c60..c2990eabf2 100644 --- a/Annex/AutoMerge.hs +++ b/Annex/AutoMerge.hs @@ -334,10 +334,9 @@ inodeMap :: Annex ([RawFilePath], IO Bool) -> Annex InodeMap inodeMap getfiles = do (fs, cleanup) <- getfiles fsis <- forM fs $ \f -> do - let f' = fromRawFilePath f - mi <- withTSDelta (liftIO . genInodeCache f') + mi <- withTSDelta (liftIO . genInodeCache f) return $ case mi of Nothing -> Nothing - Just i -> Just (inodeCacheToKey Strongly i, f') + Just i -> Just (inodeCacheToKey Strongly i, fromRawFilePath f) void $ liftIO cleanup return $ M.fromList $ catMaybes fsis diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 10fa59abc4..6934e62bab 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -482,7 +482,7 @@ stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do sha <- Git.HashObject.hashFile h path hPutStrLn jlogh file streamer $ Git.UpdateIndex.updateIndexLine - sha TreeFile (asTopFilePath $ toRawFilePath $ fileJournal file) + sha TreeFile (asTopFilePath $ fileJournal $ toRawFilePath file) genstream dir h jh jlogh streamer -- Clean up the staged files, as listed in the temp log file. -- The temp file is used to avoid needing to buffer all the diff --git a/Annex/Content.hs b/Annex/Content.hs index c109e3f1f8..74dd17886e 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -89,17 +89,18 @@ import Annex.Content.LowLevel import Annex.Content.PointerFile import Annex.Concurrent import Types.WorkerPool +import qualified Utility.RawFilePath as R {- Checks if a given key's content is currently present. -} inAnnex :: Key -> Annex Bool -inAnnex key = inAnnexCheck key $ liftIO . doesFileExist +inAnnex key = inAnnexCheck key $ liftIO . R.doesPathExist {- Runs an arbitrary check on a key's content. -} -inAnnexCheck :: Key -> (FilePath -> Annex Bool) -> Annex Bool +inAnnexCheck :: Key -> (RawFilePath -> Annex Bool) -> Annex Bool inAnnexCheck key check = inAnnex' id False check key {- inAnnex that performs an arbitrary check of the key's content. -} -inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a +inAnnex' :: (a -> Bool) -> a -> (RawFilePath -> Annex a) -> Key -> Annex a inAnnex' isgood bad check key = withObjectLoc key $ \loc -> do r <- check loc if isgood r @@ -120,12 +121,15 @@ inAnnex' isgood bad check key = withObjectLoc key $ \loc -> do {- Like inAnnex, checks if the object file for a key exists, - but there are no guarantees it has the right content. -} objectFileExists :: Key -> Annex Bool -objectFileExists key = calcRepo (gitAnnexLocation key) >>= liftIO . doesFileExist +objectFileExists key = + calcRepo (gitAnnexLocation key) + >>= liftIO . R.doesPathExist {- A safer check; the key's content must not only be present, but - is not in the process of being removed. -} inAnnexSafe :: Key -> Annex (Maybe Bool) -inAnnexSafe key = inAnnex' (fromMaybe True) (Just False) go key +inAnnexSafe key = + inAnnex' (fromMaybe True) (Just False) (go . fromRawFilePath) key where is_locked = Nothing is_unlocked = Just True @@ -246,7 +250,7 @@ winLocker _ _ Nothing = return Nothing lockContentUsing :: ContentLocker -> Key -> Annex a -> Annex a lockContentUsing locker key a = do - contentfile <- calcRepo $ gitAnnexLocation key + contentfile <- fromRawFilePath <$> calcRepo (gitAnnexLocation key) lockfile <- contentLockFile key bracket (lock contentfile lockfile) @@ -474,18 +478,20 @@ moveAnnex key src = ifM (checkSecureHashes key) , return False ) where - storeobject dest = ifM (liftIO $ doesFileExist dest) + storeobject dest = ifM (liftIO $ R.doesPathExist dest) ( alreadyhave - , modifyContent dest $ do + , modifyContent dest' $ do freezeContent src - liftIO $ moveFile src dest + liftIO $ moveFile src dest' g <- Annex.gitRepo fs <- map (`fromTopFilePath` g) <$> Database.Keys.getAssociatedFiles key unless (null fs) $ do - ics <- mapM (populatePointerFile (Restage True) key (toRawFilePath dest)) fs + ics <- mapM (populatePointerFile (Restage True) key dest) fs Database.Keys.storeInodeCaches' key [dest] (catMaybes ics) ) + where + dest' = fromRawFilePath dest alreadyhave = liftIO $ removeFile src checkSecureHashes :: Key -> Annex Bool @@ -505,7 +511,7 @@ data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop linkToAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult linkToAnnex key src srcic = ifM (checkSecureHashes key) ( do - dest <- calcRepo (gitAnnexLocation key) + dest <- fromRawFilePath <$> calcRepo (gitAnnexLocation key) modifyContent dest $ linkAnnex To key src srcic dest Nothing , return LinkAnnexFailed ) @@ -515,7 +521,7 @@ linkFromAnnex :: Key -> FilePath -> Maybe FileMode -> Annex LinkAnnexResult linkFromAnnex key dest destmode = do src <- calcRepo (gitAnnexLocation key) srcic <- withTSDelta (liftIO . genInodeCache src) - linkAnnex From key src srcic dest destmode + linkAnnex From key (fromRawFilePath src) srcic dest destmode data FromTo = From | To @@ -534,7 +540,7 @@ data FromTo = From | To linkAnnex :: FromTo -> Key -> FilePath -> Maybe InodeCache -> FilePath -> Maybe FileMode -> Annex LinkAnnexResult linkAnnex _ _ _ Nothing _ _ = return LinkAnnexFailed linkAnnex fromto key src (Just srcic) dest destmode = - withTSDelta (liftIO . genInodeCache dest) >>= \case + withTSDelta (liftIO . genInodeCache dest') >>= \case Just destic -> do cs <- Database.Keys.getInodeCaches key if null cs @@ -551,12 +557,13 @@ linkAnnex fromto key src (Just srcic) dest destmode = Linked -> noop checksrcunchanged where + dest' = toRawFilePath dest failed = do Database.Keys.addInodeCaches key [srcic] return LinkAnnexFailed - checksrcunchanged = withTSDelta (liftIO . genInodeCache src) >>= \case + checksrcunchanged = withTSDelta (liftIO . genInodeCache (toRawFilePath src)) >>= \case Just srcic' | compareStrong srcic srcic' -> do - destic <- withTSDelta (liftIO . genInodeCache dest) + destic <- withTSDelta (liftIO . genInodeCache dest') Database.Keys.addInodeCaches key $ catMaybes [destic, Just srcic] return LinkAnnexOk @@ -567,7 +574,7 @@ linkAnnex fromto key src (Just srcic) dest destmode = {- Removes the annex object file for a key. Lowlevel. -} unlinkAnnex :: Key -> Annex () unlinkAnnex key = do - obj <- calcRepo $ gitAnnexLocation key + obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key) modifyContent obj $ do secureErase obj liftIO $ nukeFile obj @@ -616,15 +623,15 @@ prepSendAnnex key = withObjectLoc key $ \f -> do else pure cache return $ if null cache' then Nothing - else Just (f, sameInodeCache f cache') + else Just (fromRawFilePath f, sameInodeCache f cache') {- Performs an action, passing it the location to use for a key's content. -} -withObjectLoc :: Key -> (FilePath -> Annex a) -> Annex a +withObjectLoc :: Key -> (RawFilePath -> Annex a) -> Annex a withObjectLoc key a = a =<< calcRepo (gitAnnexLocation key) cleanObjectLoc :: Key -> Annex () -> Annex () cleanObjectLoc key cleaner = do - file <- calcRepo $ gitAnnexLocation key + file <- fromRawFilePath <$> calcRepo (gitAnnexLocation key) void $ tryIO $ thawContentDir file cleaner liftIO $ removeparents file (3 :: Int) @@ -640,22 +647,23 @@ cleanObjectLoc key cleaner = do removeAnnex :: ContentRemovalLock -> Annex () removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file -> cleanObjectLoc key $ do - secureErase file - liftIO $ nukeFile file + let file' = fromRawFilePath file + secureErase file' + liftIO $ nukeFile file' g <- Annex.gitRepo - mapM_ (\f -> void $ tryIO $ resetpointer $ fromRawFilePath $ fromTopFilePath f g) + mapM_ (\f -> void $ tryIO $ resetpointer $ fromTopFilePath f g) =<< Database.Keys.getAssociatedFiles key Database.Keys.removeInodeCaches key where -- Check associated pointer file for modifications, and reset if -- it's unmodified. resetpointer file = ifM (isUnmodified key file) - ( depopulatePointerFile key (toRawFilePath file) + ( depopulatePointerFile key file -- Modified file, so leave it alone. -- If it was a hard link to the annex object, -- that object might have been frozen as part of the -- removal process, so thaw it. - , void $ tryIO $ thawContent file + , void $ tryIO $ thawContent $ fromRawFilePath file ) {- Check if a file contains the unmodified content of the key. @@ -663,12 +671,12 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file -> - The expensive way to tell is to do a verification of its content. - The cheaper way is to see if the InodeCache for the key matches the - file. -} -isUnmodified :: Key -> FilePath -> Annex Bool +isUnmodified :: Key -> RawFilePath -> Annex Bool isUnmodified key f = go =<< geti where go Nothing = return False go (Just fc) = isUnmodifiedCheap' key fc <||> expensivecheck fc - expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f) + expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key (fromRawFilePath f)) ( do -- The file could have been modified while it was -- being verified. Detect that. @@ -691,7 +699,7 @@ isUnmodified key f = go =<< geti - this may report a false positive when repeated edits are made to a file - within a small time window (eg 1 second). -} -isUnmodifiedCheap :: Key -> FilePath -> Annex Bool +isUnmodifiedCheap :: Key -> RawFilePath -> Annex Bool isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key) =<< withTSDelta (liftIO . genInodeCache f) @@ -703,7 +711,7 @@ isUnmodifiedCheap' key fc = - returns the file it was moved to. -} moveBad :: Key -> Annex FilePath moveBad key = do - src <- calcRepo $ gitAnnexLocation key + src <- fromRawFilePath <$> calcRepo (gitAnnexLocation key) bad <- fromRepo gitAnnexBadDir let dest = bad takeFileName src createAnnexDirectory (parentDir dest) @@ -791,7 +799,7 @@ preseedTmp key file = go =<< inAnnex key copy = ifM (liftIO $ doesFileExist file) ( return True , do - s <- calcRepo $ gitAnnexLocation key + s <- fromRawFilePath <$> (calcRepo $ gitAnnexLocation key) liftIO $ ifM (doesFileExist s) ( copyFileExternal CopyTimeStamps s file , return False diff --git a/Annex/Content/PointerFile.hs b/Annex/Content/PointerFile.hs index 59825a9d70..997f731ca6 100644 --- a/Annex/Content/PointerFile.hs +++ b/Annex/Content/PointerFile.hs @@ -38,10 +38,11 @@ populatePointerFile restage k obj f = go =<< liftIO (isPointerFile f) destmode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus f' liftIO $ nukeFile f' (ic, populated) <- replaceFile f' $ \tmp -> do + let tmp' = toRawFilePath tmp ok <- linkOrCopy k (fromRawFilePath obj) tmp destmode >>= \case Just _ -> thawContent tmp >> return True - Nothing -> liftIO (writePointerFile (toRawFilePath tmp) k destmode) >> return False - ic <- withTSDelta (liftIO . genInodeCache tmp) + Nothing -> liftIO (writePointerFile tmp' k destmode) >> return False + ic <- withTSDelta (liftIO . genInodeCache tmp') return (ic, ok) maybe noop (restagePointerFile restage f) ic if populated @@ -68,5 +69,5 @@ depopulatePointerFile key file = do (\t -> touch tmp t False) (fmap modificationTimeHiRes st) #endif - withTSDelta (liftIO . genInodeCache tmp) + withTSDelta (liftIO . genInodeCache (toRawFilePath tmp)) maybe noop (restagePointerFile (Restage True) file) ic diff --git a/Annex/DirHashes.hs b/Annex/DirHashes.hs index 1fb0073826..237345feb1 100644 --- a/Annex/DirHashes.hs +++ b/Annex/DirHashes.hs @@ -1,6 +1,6 @@ {- git-annex file locations - - - Copyright 2010-2017 Joey Hess + - Copyright 2010-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -19,7 +19,10 @@ module Annex.DirHashes ( import Data.Default import Data.Bits -import qualified Data.ByteArray +import qualified Data.ByteArray as BA +import qualified Data.ByteArray.Encoding as BA +import qualified Data.ByteString as S +import qualified System.FilePath.ByteString as P import Common import Key @@ -28,7 +31,7 @@ import Types.Difference import Utility.Hash import Utility.MD5 -type Hasher = Key -> FilePath +type Hasher = Key -> RawFilePath -- Number of hash levels to use. 2 is the default. newtype HashLevels = HashLevels Int @@ -47,7 +50,7 @@ configHashLevels d config | hasDifference d (annexDifferences config) = HashLevels 1 | otherwise = def -branchHashDir :: GitConfig -> Key -> String +branchHashDir :: GitConfig -> Key -> S.ByteString branchHashDir = hashDirLower . branchHashLevels {- Two different directory hashes may be used. The mixed case hash @@ -60,19 +63,26 @@ branchHashDir = hashDirLower . branchHashLevels dirHashes :: [HashLevels -> Hasher] dirHashes = [hashDirLower, hashDirMixed] -hashDirs :: HashLevels -> Int -> String -> FilePath -hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s -hashDirs _ sz s = addTrailingPathSeparator $ take sz s drop sz s +hashDirs :: HashLevels -> Int -> S.ByteString -> RawFilePath +hashDirs (HashLevels 1) sz s = P.addTrailingPathSeparator $ S.take sz s +hashDirs _ sz s = P.addTrailingPathSeparator $ h P. t + where + (h, t) = S.splitAt sz s hashDirLower :: HashLevels -> Hasher -hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5s $ serializeKey' $ nonChunkKey k +hashDirLower n k = hashDirs n 3 $ S.pack $ take 6 $ conv $ + md5s $ serializeKey' $ nonChunkKey k + where + conv v = BA.unpack $ + (BA.convertToBase BA.Base16 v :: BA.Bytes) {- This was originally using Data.Hash.MD5 from MissingH. This new version - is faster, but ugly as it has to replicate the 4 Word32's that produced. -} hashDirMixed :: HashLevels -> Hasher -hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $ - encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $ - Utility.Hash.md5s $ serializeKey' $ nonChunkKey k +hashDirMixed n k = hashDirs n 2 $ S.pack $ take 4 $ + concatMap display_32bits_as_dir $ + encodeWord32 $ map fromIntegral $ BA.unpack $ + Utility.Hash.md5s $ serializeKey' $ nonChunkKey k where encodeWord32 (b1:b2:b3:b4:rest) = (shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1) diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs index 85a4d38122..e1b22c7b8a 100644 --- a/Annex/Ingest.hs +++ b/Annex/Ingest.hs @@ -92,7 +92,7 @@ lockDown' cfg file = tryIO $ ifM crippledFileSystem nohardlink = withTSDelta $ liftIO . nohardlink' nohardlink' delta = do - cache <- genInodeCache file delta + cache <- genInodeCache (toRawFilePath file) delta return $ LockedDown cfg $ KeySource { keyFilename = file , contentLocation = file @@ -112,7 +112,7 @@ lockDown' cfg file = tryIO $ ifM crippledFileSystem withhardlink' delta tmpfile = do createLink file tmpfile - cache <- genInodeCache tmpfile delta + cache <- genInodeCache (toRawFilePath tmpfile) delta return $ LockedDown cfg $ KeySource { keyFilename = file , contentLocation = tmpfile @@ -209,7 +209,7 @@ finishIngestUnlocked' key source restage = do {- Copy to any other locations using the same key. -} populateAssociatedFiles :: Key -> KeySource -> Restage -> Annex () populateAssociatedFiles key source restage = do - obj <- toRawFilePath <$> calcRepo (gitAnnexLocation key) + obj <- calcRepo (gitAnnexLocation key) g <- Annex.gitRepo ingestedf <- flip fromTopFilePath g <$> inRepo (toTopFilePath (toRawFilePath (keyFilename source))) @@ -235,8 +235,7 @@ cleanOldKeys file newkey = do unlessM (isUnmodified key =<< calcRepo (gitAnnexLocation key)) $ do caches <- Database.Keys.getInodeCaches key unlinkAnnex key - fs <- map fromRawFilePath - . filter (/= ingestedf) + fs <- filter (/= ingestedf) . map (`fromTopFilePath` g) <$> Database.Keys.getAssociatedFiles key filterM (`sameInodeCache` caches) fs >>= \case @@ -245,7 +244,7 @@ cleanOldKeys file newkey = do -- so no need for any recovery. (f:_) -> do ic <- withTSDelta (liftIO . genInodeCache f) - void $ linkToAnnex key f ic + void $ linkToAnnex key (fromRawFilePath f) ic _ -> logStatus key InfoMissing {- On error, put the file back so it doesn't seem to have vanished. @@ -256,7 +255,7 @@ restoreFile file key e = do liftIO $ nukeFile file -- The key could be used by other files too, so leave the -- content in the annex, and make a copy back to the file. - obj <- calcRepo $ gitAnnexLocation key + obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key) unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $ warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj thawContent file diff --git a/Annex/InodeSentinal.hs b/Annex/InodeSentinal.hs index 0f5c7ca606..0dae0d6cac 100644 --- a/Annex/InodeSentinal.hs +++ b/Annex/InodeSentinal.hs @@ -29,7 +29,7 @@ compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly ) {- Checks if one of the provided old InodeCache matches the current - version of a file. -} -sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool +sameInodeCache :: RawFilePath -> [InodeCache] -> Annex Bool sameInodeCache _ [] = return False sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file) where @@ -78,7 +78,7 @@ createInodeSentinalFile :: Bool -> Annex () createInodeSentinalFile evenwithobjects = unlessM (alreadyexists <||> hasobjects) $ do s <- annexSentinalFile - createAnnexDirectory (parentDir (sentinalFile s)) + createAnnexDirectory (parentDir (fromRawFilePath (sentinalFile s))) liftIO $ writeSentinalFile s where alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile diff --git a/Annex/Journal.hs b/Annex/Journal.hs index e7e624f354..937e183e22 100644 --- a/Annex/Journal.hs +++ b/Annex/Journal.hs @@ -20,7 +20,9 @@ import Utility.Directory.Stream import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as S +import qualified System.FilePath.ByteString as P import Data.ByteString.Builder +import Data.Char class Journalable t where writeJournalHandle :: Handle -> t -> IO () @@ -48,7 +50,7 @@ setJournalFile :: Journalable content => JournalLocked -> RawFilePath -> content setJournalFile _jl file content = withOtherTmp $ \tmp -> do createAnnexDirectory =<< fromRepo gitAnnexJournalDir -- journal file is written atomically - jfile <- fromRepo $ journalFile $ fromRawFilePath file + jfile <- fromRawFilePath <$> fromRepo (journalFile file) let tmpfile = tmp takeFileName jfile liftIO $ do withFile tmpfile WriteMode $ \h -> writeJournalHandle h content @@ -71,7 +73,7 @@ getJournalFile _jl = getJournalFileStale -} getJournalFileStale :: RawFilePath -> Annex (Maybe L.ByteString) getJournalFileStale file = inRepo $ \g -> catchMaybeIO $ - L.fromStrict <$> S.readFile (journalFile (fromRawFilePath file) g) + L.fromStrict <$> S.readFile (fromRawFilePath $ journalFile file g) {- List of existing journal files, but without locking, may miss new ones - just being added, or may have false positives if the journal is staged @@ -81,7 +83,8 @@ getJournalledFilesStale = do g <- gitRepo fs <- liftIO $ catchDefaultIO [] $ getDirectoryContents $ gitAnnexJournalDir g - return $ filter (`notElem` [".", ".."]) $ map fileJournal fs + return $ filter (`notElem` [".", ".."]) $ + map (fromRawFilePath . fileJournal . toRawFilePath) fs withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a withJournalHandle a = do @@ -97,24 +100,29 @@ journalDirty = do `catchIO` (const $ doesDirectoryExist d) {- Produces a filename to use in the journal for a file on the branch. + - + - The input filename is assumed to not contain any '_' character, + - since path separators are replaced with that. - - The journal typically won't have a lot of files in it, so the hashing - used in the branch is not necessary, and all the files are put directly - in the journal directory. -} -journalFile :: FilePath -> Git.Repo -> FilePath -journalFile file repo = gitAnnexJournalDir repo concatMap mangle file +journalFile :: RawFilePath -> Git.Repo -> RawFilePath +journalFile file repo = gitAnnexJournalDir' repo P. S.map mangle file where mangle c - | c == pathSeparator = "_" - | c == '_' = "__" - | otherwise = [c] + | c == P.pathSeparator = fromIntegral (ord '_') + | otherwise = c {- Converts a journal file (relative to the journal dir) back to the - filename on the branch. -} -fileJournal :: FilePath -> FilePath -fileJournal = replace [pathSeparator, pathSeparator] "_" . - replace "_" [pathSeparator] +fileJournal :: RawFilePath -> RawFilePath +fileJournal = S.map unmangle + where + unmangle c + | c == fromIntegral (ord '_') = P.pathSeparator + | otherwise = c {- Sentinal value, only produced by lockJournal; required - as a parameter by things that need to ensure the journal is diff --git a/Annex/Link.hs b/Annex/Link.hs index fe9e1d52d7..ede132a5b9 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -39,6 +39,7 @@ import qualified Utility.RawFilePath as R import qualified Data.ByteString as S import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Lazy as L +import qualified System.FilePath.ByteString as P type LinkTarget = String @@ -182,7 +183,7 @@ restagePointerFile (Restage True) f orig = withTSDelta $ \tsd -> do absf <- liftIO $ absPath $ fromRawFilePath f Annex.Queue.addInternalAction runner [(absf, isunmodified tsd)] where - isunmodified tsd = genInodeCache' f tsd >>= return . \case + isunmodified tsd = genInodeCache f tsd >>= return . \case Nothing -> False Just new -> compareStrong orig new @@ -301,8 +302,7 @@ isLinkToAnnex s = p `S.isInfixOf` s || p' `S.isInfixOf` s #endif where - sp = (pathSeparator:objectDir) - p = toRawFilePath sp + p = P.pathSeparator `S.cons` objectDir' #ifdef mingw32_HOST_OS - p' = toRawFilePath (toInternalGitPath sp) + p' = toInternalGitPath p #endif diff --git a/Annex/Locations.hs b/Annex/Locations.hs index 3c49099094..36858a72bb 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -16,6 +16,7 @@ module Annex.Locations ( keyPath, annexDir, objectDir, + objectDir', gitAnnexLocation, gitAnnexLocationDepth, gitAnnexLink, @@ -62,6 +63,7 @@ module Annex.Locations ( gitAnnexFeedState, gitAnnexMergeDir, gitAnnexJournalDir, + gitAnnexJournalDir', gitAnnexJournalLock, gitAnnexGitQueueLock, gitAnnexPreCommitLock, @@ -105,6 +107,7 @@ import qualified Git.Types as Git import Git.FilePath import Annex.DirHashes import Annex.Fixup +import qualified Utility.RawFilePath as R {- Conventions: - @@ -124,21 +127,27 @@ import Annex.Fixup annexDir :: FilePath annexDir = addTrailingPathSeparator "annex" +annexDir' :: RawFilePath +annexDir' = P.addTrailingPathSeparator "annex" + {- The directory git annex uses for locally available object content, - relative to the .git directory -} objectDir :: FilePath objectDir = addTrailingPathSeparator $ annexDir "objects" +objectDir' :: RawFilePath +objectDir' = P.addTrailingPathSeparator $ annexDir' P. "objects" + {- Annexed file's possible locations relative to the .git directory. - There are two different possibilities, using different hashes. - - Also, some repositories have a Difference in hash directory depth. -} -annexLocations :: GitConfig -> Key -> [FilePath] +annexLocations :: GitConfig -> Key -> [RawFilePath] annexLocations config key = map (annexLocation config key) dirHashes -annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> FilePath -annexLocation config key hasher = objectDir keyPath key (hasher $ objectHashLevels config) +annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath +annexLocation config key hasher = objectDir' P. keyPath key (hasher $ objectHashLevels config) {- Number of subdirectories from the gitAnnexObjectDir - to the gitAnnexLocation. -} @@ -158,14 +167,14 @@ gitAnnexLocationDepth config = hashlevels + 1 - This does not take direct mode into account, so in direct mode it is not - the actual location of the file's content. -} -gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO FilePath +gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO RawFilePath gitAnnexLocation key r config = gitAnnexLocation' key r config (annexCrippledFileSystem config) (coreSymlinks config) - doesFileExist - (fromRawFilePath (Git.localGitDir r)) + R.doesPathExist + (Git.localGitDir r) -gitAnnexLocation' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (FilePath -> IO Bool) -> FilePath -> IO FilePath +gitAnnexLocation' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (RawFilePath -> IO Bool) -> RawFilePath -> IO RawFilePath gitAnnexLocation' key r config crippled symlinkssupported checker gitdir {- Bare repositories default to hashDirLower for new - content, as it's more portable. But check all locations. -} @@ -187,7 +196,7 @@ gitAnnexLocation' key r config crippled symlinkssupported checker gitdir only = return . inrepo . annexLocation config key checkall = check $ map inrepo $ annexLocations config key - inrepo d = gitdir d + inrepo d = gitdir P. d check locs@(l:_) = fromMaybe l <$> firstM checker locs check [] = error "internal" @@ -199,16 +208,17 @@ gitAnnexLink file key r config = do let gitdir = getgitdir currdir loc <- gitAnnexLocation' key r config False False (\_ -> return True) gitdir fromRawFilePath . toInternalGitPath . toRawFilePath - <$> relPathDirToFile (parentDir absfile) loc + <$> relPathDirToFile (parentDir absfile) (fromRawFilePath loc) where getgitdir currdir {- This special case is for git submodules on filesystems not - supporting symlinks; generate link target that will - work portably. -} | not (coreSymlinks config) && needsSubmoduleFixup r = - absNormPathUnix currdir $ fromRawFilePath $ - Git.repoPath r P. ".git" - | otherwise = fromRawFilePath $ Git.localGitDir r + toRawFilePath $ + absNormPathUnix currdir $ fromRawFilePath $ + Git.repoPath r P. ".git" + | otherwise = Git.localGitDir r absNormPathUnix d p = fromRawFilePath $ toInternalGitPath $ toRawFilePath $ absPathFrom (fromRawFilePath $ toInternalGitPath $ toRawFilePath d) @@ -232,33 +242,36 @@ gitAnnexLinkCanonical file key r config = gitAnnexLink file key r' config' gitAnnexContentLock :: Key -> Git.Repo -> GitConfig -> IO FilePath gitAnnexContentLock key r config = do loc <- gitAnnexLocation key r config - return $ loc ++ ".lck" + return $ fromRawFilePath loc ++ ".lck" {- File that maps from a key to the file(s) in the git repository. - Used in direct mode. -} gitAnnexMapping :: Key -> Git.Repo -> GitConfig -> IO FilePath gitAnnexMapping key r config = do loc <- gitAnnexLocation key r config - return $ loc ++ ".map" + return $ fromRawFilePath loc ++ ".map" {- File that caches information about a key's content, used to determine - if a file has changed. - Used in direct mode. -} gitAnnexInodeCache :: Key -> Git.Repo -> GitConfig -> IO FilePath -gitAnnexInodeCache key r config = do +gitAnnexInodeCache key r config = do loc <- gitAnnexLocation key r config - return $ loc ++ ".cache" + return $ fromRawFilePath loc ++ ".cache" -gitAnnexInodeSentinal :: Git.Repo -> FilePath -gitAnnexInodeSentinal r = gitAnnexDir r "sentinal" +gitAnnexInodeSentinal :: Git.Repo -> RawFilePath +gitAnnexInodeSentinal r = gitAnnexDir' r P. "sentinal" -gitAnnexInodeSentinalCache :: Git.Repo -> FilePath -gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r ++ ".cache" +gitAnnexInodeSentinalCache :: Git.Repo -> RawFilePath +gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r <> ".cache" {- The annex directory of a repository. -} gitAnnexDir :: Git.Repo -> FilePath gitAnnexDir r = addTrailingPathSeparator $ fromRawFilePath (Git.localGitDir r) annexDir +gitAnnexDir' :: Git.Repo -> RawFilePath +gitAnnexDir' r = P.addTrailingPathSeparator $ Git.localGitDir r P. annexDir' + {- The part of the annex directory where file contents are stored. -} gitAnnexObjectDir :: Git.Repo -> FilePath gitAnnexObjectDir r = addTrailingPathSeparator $ fromRawFilePath (Git.localGitDir r) objectDir @@ -428,6 +441,9 @@ gitAnnexTransferDir r = addTrailingPathSeparator $ gitAnnexDir r "transfer" gitAnnexJournalDir :: Git.Repo -> FilePath gitAnnexJournalDir r = addTrailingPathSeparator $ gitAnnexDir r "journal" +gitAnnexJournalDir' :: Git.Repo -> RawFilePath +gitAnnexJournalDir' r = P.addTrailingPathSeparator $ gitAnnexDir' r P. "journal" + {- Lock file for the journal. -} gitAnnexJournalLock :: Git.Repo -> FilePath gitAnnexJournalLock r = gitAnnexDir r "journal.lck" @@ -609,10 +625,10 @@ fileKey' = deserializeKey' . S8.intercalate "/" . map go . S8.split '%' - The file is put in a directory with the same name, this allows - write-protecting the directory to avoid accidental deletion of the file. -} -keyPath :: Key -> Hasher -> FilePath -keyPath key hasher = hasher key f f +keyPath :: Key -> Hasher -> RawFilePath +keyPath key hasher = hasher key P. f P. f where - f = keyFile key + f = keyFile' key {- All possibile locations to store a key in a special remote - using different directory hashes. @@ -620,5 +636,5 @@ keyPath key hasher = hasher key f f - This is compatible with the annexLocations, for interoperability between - special remotes and git-annex repos. -} -keyPaths :: Key -> [FilePath] +keyPaths :: Key -> [RawFilePath] keyPaths key = map (\h -> keyPath key (h def)) dirHashes diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs index 1b2c11061e..bca75be864 100644 --- a/Annex/WorkTree.hs +++ b/Annex/WorkTree.hs @@ -101,13 +101,14 @@ scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $ Just k' | k' == k -> do destmode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus f - ic <- replaceFile (fromRawFilePath f) $ \tmp -> + ic <- replaceFile (fromRawFilePath f) $ \tmp -> do + let tmp' = toRawFilePath tmp linkFromAnnex k tmp destmode >>= \case LinkAnnexOk -> - withTSDelta (liftIO . genInodeCache tmp) + withTSDelta (liftIO . genInodeCache tmp') LinkAnnexNoop -> return Nothing LinkAnnexFailed -> liftIO $ do - writePointerFile (toRawFilePath tmp) k destmode + writePointerFile tmp' k destmode return Nothing maybe noop (restagePointerFile (Restage True) f) ic _ -> noop diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index 5ed49166bb..53d72b6454 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -308,7 +308,7 @@ handleAdds lockdowndir havelsof delayadd cs = returnWhen (null incomplete) $ do if M.null m then forM toadd (add cfg) else forM toadd $ \c -> do - mcache <- liftIO $ genInodeCache (changeFile c) delta + mcache <- liftIO $ genInodeCache (toRawFilePath (changeFile c)) delta case mcache of Nothing -> add cfg c Just cache -> diff --git a/Assistant/Upgrade.hs b/Assistant/Upgrade.hs index 0ea52f3158..a8a6778abe 100644 --- a/Assistant/Upgrade.hs +++ b/Assistant/Upgrade.hs @@ -113,7 +113,7 @@ distributionDownloadComplete d dest cleanup t | transferDirection t == Download = do debug ["finished downloading git-annex distribution"] maybe (failedupgrade "bad download") go - =<< liftAnnex (withObjectLoc k fsckit) + =<< liftAnnex (withObjectLoc k (fsckit . fromRawFilePath)) | otherwise = cleanup where k = mkKey $ const $ distributionKey d diff --git a/CHANGELOG b/CHANGELOG index 66ae7e8bdc..a3c748ce93 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,13 +1,9 @@ git-annex (7.20191115) UNRELEASED; urgency=medium - * Sped up many git-annex commands that operate on many files, by - using ByteStrings. Some commands like find got up to 60% faster. - * Sped up many git-annex commands that operate on many files, by - avoiding reserialization of keys. - find got 7% faster; whereis 3% faster; and git-annex get when - all files are already present got 5% faster - * Sped up many git-annex commands that query the git-annex branch. - In particular whereis got 1.5% faster. + * Optimised processing of many files, especially by commands like find + and whereis that only report on the state of the repository. Commands + like get also sped up in cases where they have to check a lot of + files but only transfer a few files. Speedups range from 30-100%. * Stop displaying rsync progress, and use git-annex's own progress display for local-to-local repo transfers. * git-lfs: The url provided to initremote/enableremote will now be diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 1811698f00..0ffa1cbfb6 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -131,7 +131,7 @@ withUnmodifiedUnlockedPointers a l = seekActions $ isUnmodifiedUnlocked :: RawFilePath -> Annex Bool isUnmodifiedUnlocked f = catKeyFile f >>= \case Nothing -> return False - Just k -> sameInodeCache (fromRawFilePath f) =<< Database.Keys.getInodeCaches k + Just k -> sameInodeCache f =<< Database.Keys.getInodeCaches k {- Finds files that may be modified. -} withFilesMaybeModified :: (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek diff --git a/Command/ContentLocation.hs b/Command/ContentLocation.hs index 9576f86044..ef2e467bb5 100644 --- a/Command/ContentLocation.hs +++ b/Command/ContentLocation.hs @@ -9,6 +9,9 @@ module Command.ContentLocation where import Command import Annex.Content +import qualified Utility.RawFilePath as R + +import qualified Data.ByteString.Char8 as B8 cmd :: Command cmd = noCommit $ noMessages $ @@ -20,10 +23,10 @@ cmd = noCommit $ noMessages $ run :: () -> String -> Annex Bool run _ p = do let k = fromMaybe (giveup "bad key") $ deserializeKey p - maybe (return False) (\f -> liftIO (putStrLn f) >> return True) + maybe (return False) (\f -> liftIO (B8.putStrLn f) >> return True) =<< inAnnex' (pure True) Nothing check k where - check f = ifM (liftIO (doesFileExist f)) + check f = ifM (liftIO (R.doesPathExist f)) ( return (Just f) , return Nothing ) diff --git a/Command/DiffDriver.hs b/Command/DiffDriver.hs index ecc05ca093..e0cef22234 100644 --- a/Command/DiffDriver.hs +++ b/Command/DiffDriver.hs @@ -90,7 +90,8 @@ fixupReq req@(Req {}) = v <- getAnnexLinkTarget' (toRawFilePath (getfile r)) False case parseLinkTargetOrPointer =<< v of Nothing -> return r - Just k -> withObjectLoc k (pure . setfile r) + Just k -> withObjectLoc k $ + pure . setfile r . fromRawFilePath _ -> return r externalDiffer :: String -> [String] -> Differ diff --git a/Command/Find.hs b/Command/Find.hs index 4e71ac845a..eba431c92c 100644 --- a/Command/Find.hs +++ b/Command/Find.hs @@ -93,8 +93,8 @@ keyVars key = , ("bytesize", size show) , ("humansize", size $ roughSize storageUnits True) , ("keyname", decodeBS $ fromKey keyName key) - , ("hashdirlower", hashDirLower def key) - , ("hashdirmixed", hashDirMixed def key) + , ("hashdirlower", fromRawFilePath $ hashDirLower def key) + , ("hashdirmixed", fromRawFilePath $ hashDirMixed def key) , ("mtime", whenavail show $ fromKey keyMtime key) ] where diff --git a/Command/Fix.hs b/Command/Fix.hs index 52e076f30b..e26d184092 100644 --- a/Command/Fix.hs +++ b/Command/Fix.hs @@ -53,11 +53,11 @@ start fixwhat file key = do where fixby = starting "fix" (mkActionItem (key, file)) fixthin = do - obj <- calcRepo $ gitAnnexLocation key - stopUnless (isUnmodified key (fromRawFilePath file) <&&> isUnmodified key obj) $ do + obj <- calcRepo (gitAnnexLocation key) + stopUnless (isUnmodified key file <&&> isUnmodified key obj) $ do thin <- annexThin <$> Annex.getGitConfig fs <- liftIO $ catchMaybeIO $ R.getFileStatus file - os <- liftIO $ catchMaybeIO $ getFileStatus obj + os <- liftIO $ catchMaybeIO $ R.getFileStatus obj case (linkCount <$> fs, linkCount <$> os, thin) of (Just 1, Just 1, True) -> fixby $ makeHardLink file key @@ -65,15 +65,16 @@ start fixwhat file key = do fixby $ breakHardLink file key obj _ -> stop -breakHardLink :: RawFilePath -> Key -> FilePath -> CommandPerform +breakHardLink :: RawFilePath -> Key -> RawFilePath -> CommandPerform breakHardLink file key obj = do replaceFile (fromRawFilePath file) $ \tmp -> do mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus file - unlessM (checkedCopyFile key obj tmp mode) $ + let obj' = fromRawFilePath obj + unlessM (checkedCopyFile key obj' tmp mode) $ error "unable to break hard link" thawContent tmp - modifyContent obj $ freezeContent obj - Database.Keys.storeInodeCaches key [fromRawFilePath file] + modifyContent obj' $ freezeContent obj' + Database.Keys.storeInodeCaches key [file] next $ return True makeHardLink :: RawFilePath -> Key -> CommandPerform diff --git a/Command/Fsck.hs b/Command/Fsck.hs index a55b882c09..3010a6ce37 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -223,7 +223,7 @@ fixLink key file = do - in this repository only. -} verifyLocationLog :: Key -> KeyStatus -> ActionItem -> Annex Bool verifyLocationLog key keystatus ai = do - obj <- calcRepo $ gitAnnexLocation key + obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key) present <- if isKeyUnlockedThin keystatus then liftIO (doesFileExist obj) else inAnnex key @@ -332,11 +332,11 @@ verifyWorkTree key file = do ifM (annexThin <$> Annex.getGitConfig) ( void $ linkFromAnnex key tmp mode , do - obj <- calcRepo $ gitAnnexLocation key + obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key) void $ checkedCopyFile key obj tmp mode thawContent tmp ) - Database.Keys.storeInodeCaches key [fromRawFilePath file] + Database.Keys.storeInodeCaches key [file] _ -> return () return True @@ -349,8 +349,8 @@ checkKeySize :: Key -> KeyStatus -> ActionItem -> Annex Bool checkKeySize _ KeyUnlockedThin _ = return True checkKeySize key _ ai = do file <- calcRepo $ gitAnnexLocation key - ifM (liftIO $ doesFileExist file) - ( checkKeySizeOr badContent key file ai + ifM (liftIO $ R.doesPathExist file) + ( checkKeySizeOr badContent key (fromRawFilePath file) ai , return True ) @@ -417,10 +417,10 @@ checkKeyUpgrade _ _ _ (AssociatedFile Nothing) = -} checkBackend :: Backend -> Key -> KeyStatus -> AssociatedFile -> Annex Bool checkBackend backend key keystatus afile = do - content <- calcRepo $ gitAnnexLocation key + content <- calcRepo (gitAnnexLocation key) ifM (pure (isKeyUnlockedThin keystatus) <&&> (not <$> isUnmodified key content)) ( nocheck - , checkBackendOr badContent backend key content ai + , checkBackendOr badContent backend key (fromRawFilePath content) ai ) where nocheck = return True @@ -670,8 +670,8 @@ isKeyUnlockedThin KeyMissing = False getKeyStatus :: Key -> Annex KeyStatus getKeyStatus key = catchDefaultIO KeyMissing $ do afs <- not . null <$> Database.Keys.getAssociatedFiles key - obj <- calcRepo $ gitAnnexLocation key - multilink <- ((> 1) . linkCount <$> liftIO (getFileStatus obj)) + obj <- calcRepo (gitAnnexLocation key) + multilink <- ((> 1) . linkCount <$> liftIO (R.getFileStatus obj)) return $ if multilink && afs then KeyUnlockedThin else KeyPresent diff --git a/Command/Import.hs b/Command/Import.hs index 615fe5db1c..7e8ea18642 100644 --- a/Command/Import.hs +++ b/Command/Import.hs @@ -181,7 +181,7 @@ startLocal largematcher mode (srcfile, destfile) = -- weakly the same as the origianlly locked down file's -- inode cache. (Since the file may have been copied, -- its inodes may not be the same.) - newcache <- withTSDelta $ liftIO . genInodeCache destfile + newcache <- withTSDelta $ liftIO . genInodeCache (toRawFilePath destfile) let unchanged = case (newcache, inodeCache (keySource ld)) of (_, Nothing) -> True (Just newc, Just c) | compareWeak c newc -> True diff --git a/Command/Lock.hs b/Command/Lock.hs index e0ca6e4594..6e8a7f4ffb 100644 --- a/Command/Lock.hs +++ b/Command/Lock.hs @@ -20,6 +20,7 @@ import qualified Database.Keys import Annex.Ingest import Logs.Location import Git.FilePath +import qualified Utility.RawFilePath as R cmd :: Command cmd = withGlobalOptions [jsonOptions, annexedMatchingOptions] $ @@ -43,7 +44,7 @@ startNew file key = ifM (isJust <$> isAnnexLink file) | key' == key = cont | otherwise = errorModified go Nothing = - ifM (isUnmodified key (fromRawFilePath file)) + ifM (isUnmodified key file) ( cont , ifM (Annex.getState Annex.force) ( cont @@ -56,37 +57,38 @@ performNew :: RawFilePath -> Key -> CommandPerform performNew file key = do lockdown =<< calcRepo (gitAnnexLocation key) addLink (fromRawFilePath file) key - =<< withTSDelta (liftIO . genInodeCache' file) + =<< withTSDelta (liftIO . genInodeCache file) next $ cleanupNew file key where lockdown obj = do ifM (isUnmodified key obj) ( breakhardlink obj - , repopulate obj + , repopulate (fromRawFilePath obj) ) - whenM (liftIO $ doesFileExist obj) $ - freezeContent obj + whenM (liftIO $ R.doesPathExist obj) $ + freezeContent $ fromRawFilePath obj -- It's ok if the file is hard linked to obj, but if some other -- associated file is, we need to break that link to lock down obj. - breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (getFileStatus obj)) $ do - mfc <- withTSDelta (liftIO . genInodeCache' file) + breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (R.getFileStatus obj)) $ do + mfc <- withTSDelta (liftIO . genInodeCache file) unlessM (sameInodeCache obj (maybeToList mfc)) $ do - modifyContent obj $ replaceFile obj $ \tmp -> do - unlessM (checkedCopyFile key obj tmp Nothing) $ + let obj' = fromRawFilePath obj + modifyContent obj' $ replaceFile obj' $ \tmp -> do + unlessM (checkedCopyFile key obj' tmp Nothing) $ giveup "unable to lock file" Database.Keys.storeInodeCaches key [obj] -- Try to repopulate obj from an unmodified associated file. repopulate obj = modifyContent obj $ do g <- Annex.gitRepo - fs <- map fromRawFilePath . map (`fromTopFilePath` g) + fs <- map (`fromTopFilePath` g) <$> Database.Keys.getAssociatedFiles key mfile <- firstM (isUnmodified key) fs liftIO $ nukeFile obj case mfile of Just unmodified -> - unlessM (checkedCopyFile key unmodified obj Nothing) + unlessM (checkedCopyFile key (fromRawFilePath unmodified) obj Nothing) lostcontent Nothing -> lostcontent diff --git a/Command/Migrate.hs b/Command/Migrate.hs index 0f964bb749..2feb879aa5 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -86,7 +86,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey (fastMigrate oldbacken content <- calcRepo $ gitAnnexLocation oldkey let source = KeySource { keyFilename = fromRawFilePath file - , contentLocation = content + , contentLocation = fromRawFilePath content , inodeCache = Nothing } v <- genKey source nullMeterUpdate (Just newbackend) diff --git a/Command/Multicast.hs b/Command/Multicast.hs index 6c6d2c418b..fcb36800d4 100644 --- a/Command/Multicast.hs +++ b/Command/Multicast.hs @@ -137,7 +137,8 @@ send ups fs = do mk <- lookupFile f case mk of Nothing -> noop - Just k -> withObjectLoc k (addlist f) + Just k -> withObjectLoc k $ + addlist f . fromRawFilePath liftIO $ hClose h serverkey <- uftpKey diff --git a/Command/ReKey.hs b/Command/ReKey.hs index a67d876df7..52984928bd 100644 --- a/Command/ReKey.hs +++ b/Command/ReKey.hs @@ -83,12 +83,12 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file) - unlocked file, which would leave the new key unlocked - and vulnerable to corruption. -} ( getViaTmpFromDisk RetrievalAllKeysSecure DefaultVerify newkey $ \tmp -> unVerified $ do - oldobj <- calcRepo (gitAnnexLocation oldkey) + oldobj <- fromRawFilePath <$> calcRepo (gitAnnexLocation oldkey) isJust <$> linkOrCopy' (return True) newkey oldobj tmp Nothing , do {- The file being rekeyed is itself an unlocked file; if - it's hard linked to the old key, that link must be broken. -} - oldobj <- calcRepo (gitAnnexLocation oldkey) + oldobj <- fromRawFilePath <$> calcRepo (gitAnnexLocation oldkey) v <- tryNonAsync $ do st <- liftIO $ R.getFileStatus file when (linkCount st > 1) $ do @@ -97,7 +97,7 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file) unlessM (checkedCopyFile oldkey oldobj tmp Nothing) $ error "can't lock old key" thawContent tmp - ic <- withTSDelta (liftIO . genInodeCache' file) + ic <- withTSDelta (liftIO . genInodeCache file) case v of Left e -> do warning (show e) diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 9b5e57ede1..d8f6c08454 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -108,7 +108,7 @@ clean file = do -- annexed and is unmodified. case oldkey of Nothing -> doingest oldkey - Just ko -> ifM (isUnmodifiedCheap ko file) + Just ko -> ifM (isUnmodifiedCheap ko (toRawFilePath file)) ( liftIO $ emitPointer ko , doingest oldkey ) @@ -174,7 +174,7 @@ shouldAnnex file moldkey = ifM (annexGitAddToAnnex <$> Annex.getGitConfig) Just _ -> return True Nothing -> checkknowninode - checkknowninode = withTSDelta (liftIO . genInodeCache file) >>= \case + checkknowninode = withTSDelta (liftIO . genInodeCache (toRawFilePath file)) >>= \case Nothing -> pure False Just ic -> Database.Keys.isInodeKnown ic =<< sentinalStatus @@ -191,7 +191,7 @@ emitPointer = S.putStr . formatPointer getMoveRaceRecovery :: Key -> RawFilePath -> Annex () getMoveRaceRecovery k file = void $ tryNonAsync $ whenM (inAnnex k) $ do - obj <- toRawFilePath <$> calcRepo (gitAnnexLocation k) + obj <- calcRepo (gitAnnexLocation k) -- Cannot restage because git add is running and has -- the index locked. populatePointerFile (Restage False) k obj file >>= \case @@ -207,7 +207,7 @@ updateSmudged :: Restage -> Annex () updateSmudged restage = streamSmudged $ \k topf -> do f <- fromRepo (fromTopFilePath topf) whenM (inAnnex k) $ do - obj <- toRawFilePath <$> calcRepo (gitAnnexLocation k) + obj <- calcRepo (gitAnnexLocation k) unlessM (isJust <$> populatePointerFile restage k obj f) $ liftIO (isPointerFile f) >>= \case Just k' | k' == k -> toplevelWarning False $ diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 292697a781..bf8c24cd5d 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -168,7 +168,7 @@ test st r k = catMaybes get , Just $ check "fsck downloaded object" fsck , Just $ check "retrieveKeyFile resume from 33%" $ do - loc <- Annex.calcRepo (gitAnnexLocation k) + loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k) tmp <- prepTmp k partial <- liftIO $ bracket (openBinaryFile loc ReadMode) hClose $ \h -> do sz <- hFileSize h @@ -184,7 +184,7 @@ test st r k = catMaybes get , Just $ check "fsck downloaded object" fsck , Just $ check "retrieveKeyFile resume from end" $ do - loc <- Annex.calcRepo (gitAnnexLocation k) + loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k) tmp <- prepTmp k void $ liftIO $ copyFileExternal CopyAllMetaData loc tmp lockContentForRemoval k removeAnnex @@ -240,7 +240,7 @@ testExportTree st (Just _) ea k1 k2 = check desc a = testCase desc $ Annex.eval st (Annex.setOutput QuietOutput >> a) @? "failed" storeexport k = do - loc <- Annex.calcRepo (gitAnnexLocation k) + loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k) Remote.storeExport ea loc k testexportlocation nullMeterUpdate retrieveexport k = withTmpFile "exported" $ \tmp h -> do liftIO $ hClose h diff --git a/Command/Unannex.hs b/Command/Unannex.hs index 356ff1d946..d63f9a6b4f 100644 --- a/Command/Unannex.hs +++ b/Command/Unannex.hs @@ -46,7 +46,7 @@ perform file key = do cleanup :: RawFilePath -> Key -> CommandCleanup cleanup file key = do Database.Keys.removeAssociatedFile key =<< inRepo (toTopFilePath file) - src <- calcRepo $ gitAnnexLocation key + src <- fromRawFilePath <$> calcRepo (gitAnnexLocation key) ifM (Annex.getState Annex.fast) ( do -- Only make a hard link if the annexed file does not diff --git a/Command/Uninit.hs b/Command/Uninit.hs index 6c62694543..29278a6c4e 100644 --- a/Command/Uninit.hs +++ b/Command/Uninit.hs @@ -17,6 +17,7 @@ import qualified Database.Keys import Annex.Content import Annex.Init import Utility.FileMode +import qualified Utility.RawFilePath as R cmd :: Command cmd = addCheck check $ @@ -117,5 +118,5 @@ removeUnannexed = go [] , go (k:c) ks ) enoughlinks f = catchBoolIO $ do - s <- getFileStatus f + s <- R.getFileStatus f return $ linkCount s > 1 diff --git a/Command/Unused.hs b/Command/Unused.hs index 7f49440e6b..78400db7e1 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -283,7 +283,7 @@ associatedFilesFilter = filterM go checkunmodified _ [] = return True checkunmodified cs (f:fs) = do relf <- fromRepo $ fromTopFilePath f - ifM (sameInodeCache (fromRawFilePath relf) cs) + ifM (sameInodeCache relf cs) ( return False , checkunmodified cs fs ) diff --git a/Database/Keys.hs b/Database/Keys.hs index b04dff02be..48d51caf4e 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -169,13 +169,13 @@ removeAssociatedFile :: Key -> TopFilePath -> Annex () removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile (toIKey k) {- Stats the files, and stores their InodeCaches. -} -storeInodeCaches :: Key -> [FilePath] -> Annex () +storeInodeCaches :: Key -> [RawFilePath] -> Annex () storeInodeCaches k fs = storeInodeCaches' k fs [] -storeInodeCaches' :: Key -> [FilePath] -> [InodeCache] -> Annex () +storeInodeCaches' :: Key -> [RawFilePath] -> [InodeCache] -> Annex () storeInodeCaches' k fs ics = withTSDelta $ \d -> addInodeCaches k . (++ ics) . catMaybes - =<< liftIO (mapM (`genInodeCache` d) fs) + =<< liftIO (mapM (\f -> genInodeCache f d) fs) addInodeCaches :: Key -> [InodeCache] -> Annex () addInodeCaches k is = runWriterIO $ SQL.addInodeCaches (toIKey k) is @@ -223,7 +223,7 @@ reconcileStaged :: H.DbQueue -> Annex () reconcileStaged qh = do gitindex <- inRepo currentIndexFile indexcache <- fromRepo gitAnnexKeysDbIndexCache - withTSDelta (liftIO . genInodeCache gitindex) >>= \case + withTSDelta (liftIO . genInodeCache (toRawFilePath gitindex)) >>= \case Just cur -> liftIO (maybe Nothing readInodeCache <$> catchMaybeIO (readFile indexcache)) >>= \case Nothing -> go cur indexcache @@ -295,10 +295,10 @@ reconcileStaged qh = do keyloc <- calcRepo (gitAnnexLocation key) keypopulated <- sameInodeCache keyloc caches p <- fromRepo $ fromTopFilePath file - filepopulated <- sameInodeCache (fromRawFilePath p) caches + filepopulated <- sameInodeCache p caches case (keypopulated, filepopulated) of (True, False) -> - populatePointerFile (Restage True) key (toRawFilePath keyloc) p >>= \case + populatePointerFile (Restage True) key keyloc p >>= \case Nothing -> return () Just ic -> liftIO $ SQL.addInodeCaches ikey [ic] (SQL.WriteHandle qh) diff --git a/Limit.hs b/Limit.hs index 9e8ece2d11..2069822711 100644 --- a/Limit.hs +++ b/Limit.hs @@ -33,6 +33,7 @@ import Git.Types (RefDate(..)) import Utility.Glob import Utility.HumanTime import Utility.DataUnits +import qualified Utility.RawFilePath as R import Data.Time.Clock.POSIX import qualified Data.Set as S @@ -117,7 +118,8 @@ addMagicLimit limitname querymagic selectprovidedinfo glob = do -- When the file is an annex symlink, get magic of the -- object file. Nothing -> isAnnexLink (toRawFilePath f) >>= \case - Just k -> withObjectLoc k $ querymagic magic + Just k -> withObjectLoc k $ + querymagic magic . fromRawFilePath Nothing -> querymagic magic f matchMagic :: String -> (Magic -> FilePath -> Annex (Maybe String)) -> (ProvidedInfo -> OptInfo String) -> Maybe Magic -> MkLimit Annex @@ -363,7 +365,7 @@ addAccessedWithin duration = do where check now k = inAnnexCheck k $ \f -> liftIO $ catchDefaultIO False $ do - s <- getFileStatus f + s <- R.getFileStatus f let accessed = realToFrac (accessTime s) let delta = now - accessed return $ delta <= secs diff --git a/Logs.hs b/Logs.hs index d612aa8d56..18a045b452 100644 --- a/Logs.hs +++ b/Logs.hs @@ -13,6 +13,7 @@ import Annex.Common import Annex.DirHashes import qualified Data.ByteString as S +import qualified System.FilePath.ByteString as P {- There are several varieties of log file formats. -} data LogVariety @@ -117,19 +118,19 @@ exportLog = "export.log" {- The pathname of the location log file for a given key. -} locationLogFile :: GitConfig -> Key -> RawFilePath -locationLogFile config key = toRawFilePath $ - branchHashDir config key keyFile key ++ ".log" +locationLogFile config key = + branchHashDir config key P. keyFile' key <> ".log" {- The filename of the url log for a given key. -} urlLogFile :: GitConfig -> Key -> RawFilePath -urlLogFile config key = toRawFilePath $ - branchHashDir config key keyFile key ++ decodeBS' urlLogExt +urlLogFile config key = + branchHashDir config key P. keyFile' key <> urlLogExt {- Old versions stored the urls elsewhere. -} oldurlLogs :: GitConfig -> Key -> [RawFilePath] -oldurlLogs config key = map toRawFilePath - [ "remote/web" hdir serializeKey key ++ ".log" - , "remote/web" hdir keyFile key ++ ".log" +oldurlLogs config key = + [ "remote/web" P. hdir P. serializeKey' key <> ".log" + , "remote/web" P. hdir P. keyFile' key <> ".log" ] where hdir = branchHashDir config key @@ -144,7 +145,7 @@ isUrlLog file = urlLogExt `S.isSuffixOf` file {- The filename of the remote state log for a given key. -} remoteStateLogFile :: GitConfig -> Key -> RawFilePath remoteStateLogFile config key = - toRawFilePath (branchHashDir config key keyFile key) + (branchHashDir config key P. keyFile' key) <> remoteStateLogExt remoteStateLogExt :: S.ByteString @@ -156,7 +157,7 @@ isRemoteStateLog path = remoteStateLogExt `S.isSuffixOf` path {- The filename of the chunk log for a given key. -} chunkLogFile :: GitConfig -> Key -> RawFilePath chunkLogFile config key = - toRawFilePath (branchHashDir config key keyFile key) + (branchHashDir config key P. keyFile' key) <> chunkLogExt chunkLogExt :: S.ByteString @@ -168,7 +169,7 @@ isChunkLog path = chunkLogExt `S.isSuffixOf` path {- The filename of the metadata log for a given key. -} metaDataLogFile :: GitConfig -> Key -> RawFilePath metaDataLogFile config key = - toRawFilePath (branchHashDir config key keyFile key) + (branchHashDir config key P. keyFile' key) <> metaDataLogExt metaDataLogExt :: S.ByteString @@ -180,7 +181,7 @@ isMetaDataLog path = metaDataLogExt `S.isSuffixOf` path {- The filename of the remote metadata log for a given key. -} remoteMetaDataLogFile :: GitConfig -> Key -> RawFilePath remoteMetaDataLogFile config key = - toRawFilePath (branchHashDir config key keyFile key) + (branchHashDir config key P. keyFile' key) <> remoteMetaDataLogExt remoteMetaDataLogExt :: S.ByteString @@ -192,7 +193,7 @@ isRemoteMetaDataLog path = remoteMetaDataLogExt `S.isSuffixOf` path {- The filename of the remote content identifier log for a given key. -} remoteContentIdentifierLogFile :: GitConfig -> Key -> RawFilePath remoteContentIdentifierLogFile config key = - toRawFilePath (branchHashDir config key keyFile key) + (branchHashDir config key P. keyFile' key) <> remoteContentIdentifierExt remoteContentIdentifierExt :: S.ByteString diff --git a/P2P/Annex.hs b/P2P/Annex.hs index dd84668bf8..bcdde75cd1 100644 --- a/P2P/Annex.hs +++ b/P2P/Annex.hs @@ -47,7 +47,7 @@ runLocal runst runner a = case a of size <- liftIO $ catchDefaultIO 0 $ getFileSize f runner (next (Len size)) ContentSize k next -> do - let getsize = liftIO . catchMaybeIO . getFileSize + let getsize = liftIO . catchMaybeIO . getFileSize . fromRawFilePath size <- inAnnex' isJust Nothing getsize k runner (next (Len <$> size)) ReadContent k af o sender next -> do diff --git a/Remote/Adb.hs b/Remote/Adb.hs index 03e3819cff..e7e8fae3b9 100644 --- a/Remote/Adb.hs +++ b/Remote/Adb.hs @@ -212,7 +212,7 @@ androidHashDir :: AndroidPath -> Key -> AndroidPath androidHashDir adir k = AndroidPath $ fromAndroidPath adir ++ "/" ++ hdir where - hdir = replace [pathSeparator] "/" (hashDirLower def k) + hdir = replace [pathSeparator] "/" (fromRawFilePath (hashDirLower def k)) storeExportM :: AndroidSerial -> AndroidPath -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool storeExportM serial adir src _k loc _p = store' serial dest src diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 0387474f9a..933ccd23ce 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -127,7 +127,7 @@ directorySetup _ mu _ c gc = do - We try more than one since we used to write to different hash - directories. -} locations :: FilePath -> Key -> [FilePath] -locations d k = map (d ) (keyPaths k) +locations d k = map (\f -> d fromRawFilePath f) (keyPaths k) {- Returns the location off a Key in the directory. If the key is - present, returns the location that is actually used, otherwise @@ -139,7 +139,8 @@ getLocation d k = do {- Directory where the file(s) for a key are stored. -} storeDir :: FilePath -> Key -> FilePath -storeDir d k = addTrailingPathSeparator $ d hashDirLower def k keyFile k +storeDir d k = addTrailingPathSeparator $ + d fromRawFilePath (hashDirLower def k) keyFile k {- Check if there is enough free disk space in the remote's directory to - store the key. Note that the unencrypted key size is checked. -} diff --git a/Remote/External.hs b/Remote/External.hs index 2b5c99457a..4c4c156848 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -383,9 +383,9 @@ handleRequest' st external req mp responsehandler handleRemoteRequest (PROGRESS bytesprocessed) = maybe noop (\a -> liftIO $ a bytesprocessed) mp handleRemoteRequest (DIRHASH k) = - send $ VALUE $ hashDirMixed def k + send $ VALUE $ fromRawFilePath $ hashDirMixed def k handleRemoteRequest (DIRHASH_LOWER k) = - send $ VALUE $ hashDirLower def k + send $ VALUE $ fromRawFilePath $ hashDirLower def k handleRemoteRequest (SETCONFIG setting value) = liftIO $ atomically $ modifyTVar' (externalConfig st) $ M.insert setting value diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 4682637eaf..c3a3f31348 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -422,7 +422,8 @@ checkKey' repo r rsyncopts k {- Annexed objects are hashed using lower-case directories for max - portability. -} gCryptLocation :: Git.Repo -> Key -> FilePath -gCryptLocation repo key = Git.repoLocation repo objectDir keyPath key (hashDirLower def) +gCryptLocation repo key = Git.repoLocation repo objectDir + fromRawFilePath (keyPath key (hashDirLower def)) data AccessMethod = AccessDirect | AccessShell diff --git a/Remote/Git.hs b/Remote/Git.hs index 459cd80d65..b6dd02ae5f 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -61,6 +61,7 @@ import Creds import Types.NumCopies import Annex.Action import Messages.Progress +import qualified Utility.RawFilePath as R #ifndef mingw32_HOST_OS import Utility.FileMode @@ -393,9 +394,9 @@ keyUrls gc repo r key = map tourl locs' | remoteAnnexBare remoteconfig == Just False = reverse (annexLocations gc key) | otherwise = annexLocations gc key #ifndef mingw32_HOST_OS - locs' = locs + locs' = map fromRawFilePath locs #else - locs' = map (replace "\\" "/") locs + locs' = map (replace "\\" "/" . fromRawFilePath) locs #endif remoteconfig = gitconfig r @@ -599,9 +600,9 @@ copyFromRemoteCheap' repo r st key af file | not $ Git.repoIsUrl repo = guardUsable repo (return False) $ do gc <- getGitConfigFromState st loc <- liftIO $ gitAnnexLocation key repo gc - liftIO $ ifM (doesFileExist loc) + liftIO $ ifM (R.doesPathExist loc) ( do - absloc <- absPath loc + absloc <- absPath (fromRawFilePath loc) catchBoolIO $ do createSymbolicLink absloc file return True diff --git a/Remote/Hook.hs b/Remote/Hook.hs index f0a67d808e..897e73cc1f 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -104,7 +104,8 @@ hookEnv action k f = Just <$> mergeenv (fileenv f ++ keyenv) ] fileenv Nothing = [] fileenv (Just file) = [envvar "FILE" file] - hashbits = map takeDirectory $ splitPath $ hashDirMixed def k + hashbits = map takeDirectory $ splitPath $ + fromRawFilePath $ hashDirMixed def k lookupHook :: HookName -> Action -> Annex (Maybe String) lookupHook hookname action = do diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 566f95bab6..f171b69e60 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -183,7 +183,7 @@ rsyncSetup _ mu _ c gc = do store :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool store o k src meterupdate = storeGeneric o meterupdate basedest populatedest where - basedest = Prelude.head (keyPaths k) + basedest = fromRawFilePath $ Prelude.head (keyPaths k) populatedest dest = liftIO $ if canrename then do rename src dest @@ -222,7 +222,7 @@ remove :: RsyncOpts -> Remover remove o k = removeGeneric o includes where includes = concatMap use dirHashes - use h = let dir = h def k in + use h = let dir = fromRawFilePath (h def k) in [ parentDir dir , dir -- match content directory and anything in it diff --git a/Remote/Rsync/RsyncUrl.hs b/Remote/Rsync/RsyncUrl.hs index 4c2f10843c..2b0dbc1966 100644 --- a/Remote/Rsync/RsyncUrl.hs +++ b/Remote/Rsync/RsyncUrl.hs @@ -13,13 +13,14 @@ import Types import Annex.Locations import Utility.Rsync import Utility.SafeCommand - -import Data.Default -import System.FilePath.Posix +import Utility.FileSystemEncoding +import Annex.DirHashes #ifdef mingw32_HOST_OS import Utility.Split #endif -import Annex.DirHashes + +import Data.Default +import System.FilePath.Posix type RsyncUrl = String @@ -42,7 +43,7 @@ mkRsyncUrl o f = rsyncUrl o rsyncEscape o f rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl] rsyncUrls o k = map use dirHashes where - use h = rsyncUrl o hash h rsyncEscape o (f f) + use h = rsyncUrl o fromRawFilePath (hash h) rsyncEscape o (f f) f = keyFile k #ifndef mingw32_HOST_OS hash h = h def k diff --git a/Remote/WebDAV/DavLocation.hs b/Remote/WebDAV/DavLocation.hs index 4464ed2d36..3893533a22 100644 --- a/Remote/WebDAV/DavLocation.hs +++ b/Remote/WebDAV/DavLocation.hs @@ -39,9 +39,9 @@ keyDir :: Key -> DavLocation keyDir k = addTrailingPathSeparator $ hashdir keyFile k where #ifndef mingw32_HOST_OS - hashdir = hashDirLower def k + hashdir = fromRawFilePath $ hashDirLower def k #else - hashdir = replace "\\" "/" (hashDirLower def k) + hashdir = replace "\\" "/" (fromRawFilePath $ hashDirLower def k) #endif keyLocation :: Key -> DavLocation diff --git a/Test.hs b/Test.hs index 4752ff07e2..7bcfdd3560 100644 --- a/Test.hs +++ b/Test.hs @@ -1638,7 +1638,8 @@ test_crypto = do checkFile mvariant filename = Utility.Gpg.checkEncryptionFile gpgcmd filename $ if mvariant == Just Types.Crypto.PubKey then ks else Nothing - serializeKeys cipher = Annex.Locations.keyPaths . + serializeKeys cipher = map fromRawFilePath . + Annex.Locations.keyPaths . Crypto.encryptKey Types.Crypto.HmacSha1 cipher #else test_crypto = putStrLn "gpg testing not implemented on Windows" diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index bad1183dfd..e311044664 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -236,9 +236,9 @@ logFile1 repo key = Upgrade.V2.gitStateDir repo ++ keyFile1 key ++ ".log" logFile2 :: Key -> Git.Repo -> String logFile2 = logFile' (hashDirLower def) -logFile' :: (Key -> FilePath) -> Key -> Git.Repo -> String +logFile' :: (Key -> RawFilePath) -> Key -> Git.Repo -> String logFile' hasher key repo = - gitStateDir repo ++ hasher key ++ keyFile key ++ ".log" + gitStateDir repo ++ fromRawFilePath (hasher key) ++ keyFile key ++ ".log" stateDir :: FilePath stateDir = addTrailingPathSeparator ".git-annex" diff --git a/Upgrade/V5.hs b/Upgrade/V5.hs index 7cbdd04e65..a8a84283b3 100644 --- a/Upgrade/V5.hs +++ b/Upgrade/V5.hs @@ -135,7 +135,7 @@ upgradeDirectWorkTree = do -- is just not populated with it. Since the work tree -- file is recorded as an associated file, things will -- still work that way, it's just not ideal. - ic <- withTSDelta (liftIO . genInodeCache f) + ic <- withTSDelta (liftIO . genInodeCache (toRawFilePath f)) void $ Content.linkToAnnex k f ic , unlessM (Content.inAnnex k) $ do -- Worktree file was deleted or modified; diff --git a/Upgrade/V5/Direct.hs b/Upgrade/V5/Direct.hs index baf7dae9a0..600efc616d 100644 --- a/Upgrade/V5/Direct.hs +++ b/Upgrade/V5/Direct.hs @@ -107,7 +107,9 @@ removeAssociatedFiles key = do - expected mtime and inode. -} goodContent :: Key -> FilePath -> Annex Bool -goodContent key file = sameInodeCache file =<< recordedInodeCache key +goodContent key file = + sameInodeCache (toRawFilePath file) + =<< recordedInodeCache key {- Gets the recorded inode cache for a key. - diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs index a918e7bd08..d14d1f9d15 100644 --- a/Utility/InodeCache.hs +++ b/Utility/InodeCache.hs @@ -22,7 +22,6 @@ module Utility.InodeCache ( readInodeCache, showInodeCache, genInodeCache, - genInodeCache', toInodeCache, likeInodeCacheWeak, @@ -182,12 +181,8 @@ readInodeCache s = case words s of return $ InodeCache $ InodeCachePrim i sz (MTimeHighRes t) _ -> Nothing -genInodeCache :: FilePath -> TSDelta -> IO (Maybe InodeCache) +genInodeCache :: RawFilePath -> TSDelta -> IO (Maybe InodeCache) genInodeCache f delta = catchDefaultIO Nothing $ - toInodeCache delta f =<< getFileStatus f - -genInodeCache' :: RawFilePath -> TSDelta -> IO (Maybe InodeCache) -genInodeCache' f delta = catchDefaultIO Nothing $ toInodeCache delta (fromRawFilePath f) =<< R.getFileStatus f toInodeCache :: TSDelta -> FilePath -> FileStatus -> IO (Maybe InodeCache) @@ -208,8 +203,8 @@ toInodeCache (TSDelta getdelta) f s - Its InodeCache at the time of its creation is written to the cache file, - so changes can later be detected. -} data SentinalFile = SentinalFile - { sentinalFile :: FilePath - , sentinalCacheFile :: FilePath + { sentinalFile :: RawFilePath + , sentinalCacheFile :: RawFilePath } deriving (Show) @@ -226,8 +221,8 @@ noTSDelta = TSDelta (pure 0) writeSentinalFile :: SentinalFile -> IO () writeSentinalFile s = do - writeFile (sentinalFile s) "" - maybe noop (writeFile (sentinalCacheFile s) . showInodeCache) + writeFile (fromRawFilePath (sentinalFile s)) "" + maybe noop (writeFile (fromRawFilePath (sentinalCacheFile s)) . showInodeCache) =<< genInodeCache (sentinalFile s) noTSDelta data SentinalStatus = SentinalStatus @@ -256,7 +251,7 @@ checkSentinalFile s = do Just new -> return $ calc old new where loadoldcache = catchDefaultIO Nothing $ - readInodeCache <$> readFile (sentinalCacheFile s) + readInodeCache <$> readFile (fromRawFilePath (sentinalCacheFile s)) gennewcache = genInodeCache (sentinalFile s) noTSDelta calc (InodeCache (InodeCachePrim oldinode oldsize oldmtime)) (InodeCache (InodeCachePrim newinode newsize newmtime)) = SentinalStatus (not unchanged) tsdelta @@ -281,7 +276,7 @@ checkSentinalFile s = do dummy = SentinalStatus True noTSDelta sentinalFileExists :: SentinalFile -> IO Bool -sentinalFileExists s = allM doesFileExist [sentinalCacheFile s, sentinalFile s] +sentinalFileExists s = allM R.doesPathExist [sentinalCacheFile s, sentinalFile s] instance Arbitrary InodeCache where arbitrary = diff --git a/Utility/MD5.hs b/Utility/MD5.hs index d0475bf480..aabb5d724b 100644 --- a/Utility/MD5.hs +++ b/Utility/MD5.hs @@ -8,13 +8,14 @@ module Utility.MD5 where import Data.Bits import Data.Word +import Data.Char -display_32bits_as_dir :: Word32 -> String +display_32bits_as_dir :: Word32 -> [Word8] display_32bits_as_dir w = trim $ swap_pairs cs where -- Need 32 characters to use. To avoid inaverdently making -- a real word, use letters that appear less frequently. - chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF" + chars = map (fromIntegral . ord) (['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF") cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7] getc n = chars !! fromIntegral n swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs diff --git a/Utility/RawFilePath.hs b/Utility/RawFilePath.hs index a62ba65e51..426f5633a3 100644 --- a/Utility/RawFilePath.hs +++ b/Utility/RawFilePath.hs @@ -19,14 +19,20 @@ module Utility.RawFilePath ( readSymbolicLink, getFileStatus, getSymbolicLinkStatus, + doesPathExist, ) where #ifndef mingw32_HOST_OS import Utility.FileSystemEncoding (RawFilePath) import System.Posix.Files.ByteString + +doesPathExist :: RawFilePath -> IO Bool +doesPathExist = fileExist + #else import qualified Data.ByteString as B import qualified System.PosixCompat as P +import qualified System.Directory as D import Utility.FileSystemEncoding readSymbolicLink :: RawFilePath -> IO RawFilePath @@ -37,4 +43,7 @@ getFileStatus = P.getFileStatus . fromRawFilePath getSymbolicLinkStatus :: RawFilePath -> IO FileStatus getSymbolicLinkStatus = P.getSymbolicLinkStatus . fromRawFilePath + +doesPathExist :: RawFilePath -> IO Bool +doesPathExist = D.doesPathExist . fromRawFilePath #endif diff --git a/doc/todo/optimize_by_converting_String_to_ByteString.mdwn b/doc/todo/optimize_by_converting_String_to_ByteString.mdwn index 7ac7efe382..4a6d2b6dcd 100644 --- a/doc/todo/optimize_by_converting_String_to_ByteString.mdwn +++ b/doc/todo/optimize_by_converting_String_to_ByteString.mdwn @@ -11,26 +11,12 @@ than find so the improvement is not as large. The `bs` branch is in a mergeable state now, but still needs work: +* Profile various commands and look for hot spots. + * Eliminate all the fromRawFilePath, toRawFilePath, encodeBS, decodeBS conversions. Or at least most of them. There are likely - quite a few places where a value is converted back and forth several times. - - As a first step, profile and look for the hot spots. Known hot spots: + some places where a value is converted back and forth several times. - * keyFile uses fromRawFilePath and that adds around 3% overhead in `git-annex find`. - Converting it to a RawFilePath needs a version of `` for RawFilePaths. - * getJournalFileStale uses fromRawFilePath, and adds 3-5% overhead in - `git-annex whereis`. Converting it to RawFilePath needs a version - of `` for RawFilePaths. It also needs a ByteString.readFile - for RawFilePath. - -* System.FilePath is not available for RawFilePath, and many of the - conversions are to get a FilePath in order to use that library. - - It should be entirely straightforward to make a version of System.FilePath - that can operate on RawFilePath, except possibly there could be some - complications due to Windows. - * Use versions of IO actions like getFileStatus that take a RawFilePath, avoiding a conversion. Note that these are only available on unix, not windows, so a compatability shim will be needed. diff --git a/doc/todo/optimize_by_converting_String_to_ByteString/comment_3_5cad0557a1409703f8c71078f0785309._comment b/doc/todo/optimize_by_converting_String_to_ByteString/comment_3_5cad0557a1409703f8c71078f0785309._comment new file mode 100644 index 0000000000..c888f617c0 --- /dev/null +++ b/doc/todo/optimize_by_converting_String_to_ByteString/comment_3_5cad0557a1409703f8c71078f0785309._comment @@ -0,0 +1,40 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2019-12-11T18:16:13Z" + content=""" +Updated profiling. git-annex find is now ByteString end-to-end! +Note the massive reduction in alloc, and improved runtime. + + Wed Dec 11 14:41 2019 Time and Allocation Profiling Report (Final) + + git-annex +RTS -p -RTS find + + total time = 1.51 secs (1515 ticks @ 1000 us, 1 processor) + total alloc = 608,475,328 bytes (excludes profiling overheads) + + COST CENTRE MODULE SRC %time %alloc + + keyFile' Annex.Locations Annex/Locations.hs:(590,1)-(600,30) 8.2 16.6 + >>=.\.succ' Data.Attoparsec.Internal.Types Data/Attoparsec/Internal/Types.hs:146:13-76 4.7 0.7 + getAnnexLinkTarget'.probesymlink Annex.Link Annex/Link.hs:79:9-46 4.2 7.6 + >>=.\ Data.Attoparsec.Internal.Types Data/Attoparsec/Internal/Types.hs:(146,9)-(147,44) 3.9 2.3 + parseLinkTarget Annex.Link Annex/Link.hs:(255,1)-(263,25) 3.9 11.8 + doesPathExist Utility.RawFilePath Utility/RawFilePath.hs:30:1-25 3.4 0.6 + keyFile'.esc Annex.Locations Annex/Locations.hs:(596,9)-(600,30) 3.2 14.7 + fileKey' Annex.Locations Annex/Locations.hs:(609,1)-(619,41) 3.0 4.7 + parseLinkTargetOrPointer Annex.Link Annex/Link.hs:(240,1)-(244,25) 2.8 0.2 + hashUpdates.\.\.\ Crypto.Hash Crypto/Hash.hs:85:48-99 2.5 0.1 + combineAlways System.FilePath.Posix.ByteString System/FilePath/Posix/../Internal.hs:(698,1)-(704,67) 2.0 3.3 + getState Annex Annex.hs:(251,1)-(254,27) 2.0 1.1 + withPtr.makeTrampoline Basement.Block.Base Basement/Block/Base.hs:(401,5)-(404,31) 1.9 1.7 + withMutablePtrHint Basement.Block.Base Basement/Block/Base.hs:(468,1)-(482,50) 1.8 1.2 + parseKeyVariety Types.Key Types/Key.hs:(323,1)-(371,42) 1.8 0.0 + fileKey'.go Annex.Locations Annex/Locations.hs:611:9-55 1.7 2.2 + isLinkToAnnex Annex.Link Annex/Link.hs:(299,1)-(305,47) 1.7 1.0 + hashDirMixed Annex.DirHashes Annex/DirHashes.hs:(82,1)-(90,27) 1.7 1.3 + primitive Basement.Monad Basement/Monad.hs:72:5-18 1.6 0.1 + withPtr Basement.Block.Base Basement/Block/Base.hs:(395,1)-(404,31) 1.5 1.6 + mkKeySerialization Types.Key Types/Key.hs:(115,1)-(117,22) 1.1 2.8 + decimal.step Data.Attoparsec.ByteString.Char8 Data/Attoparsec/ByteString/Char8.hs:448:9-49 0.8 1.2 +"""]] diff --git a/stack.yaml b/stack.yaml index d97bf2f263..dde1d76583 100644 --- a/stack.yaml +++ b/stack.yaml @@ -24,6 +24,7 @@ extra-deps: - sandi-0.5 - http-client-0.5.14 - silently-1.2.5.1 +- filepath-bytestring-1.4.2.1.0 explicit-setup-deps: git-annex: true resolver: lts-13.29