diff --git a/Annex/Content.hs b/Annex/Content.hs index 74dd17886e..7c57cf5040 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -91,6 +91,8 @@ import Annex.Concurrent import Types.WorkerPool import qualified Utility.RawFilePath as R +import qualified System.FilePath.ByteString as P + {- Checks if a given key's content is currently present. -} inAnnex :: Key -> Annex Bool inAnnex key = inAnnexCheck key $ liftIO . R.doesPathExist @@ -742,7 +744,7 @@ listKeys keyloc = do if depth < 2 then do contents' <- filterM (present s) contents - let keys = mapMaybe (fileKey . takeFileName) contents' + let keys = mapMaybe (fileKey . P.takeFileName . toRawFilePath) contents' continue keys [] else do let deeper = walk s (depth - 1) @@ -816,7 +818,7 @@ dirKeys dirspec = do contents <- liftIO $ getDirectoryContents dir files <- liftIO $ filterM doesFileExist $ map (dir ) contents - return $ mapMaybe (fileKey . takeFileName) files + return $ mapMaybe (fileKey . P.takeFileName . toRawFilePath) files , return [] ) @@ -835,7 +837,8 @@ staleKeysPrune dirspec nottransferred = do dir <- fromRepo dirspec forM_ dups $ \k -> - pruneTmpWorkDirBefore (dir keyFile k) (liftIO . removeFile) + pruneTmpWorkDirBefore (dir fromRawFilePath (keyFile k)) + (liftIO . removeFile) if nottransferred then do diff --git a/Annex/Content/LowLevel.hs b/Annex/Content/LowLevel.hs index 546e647def..39e187de76 100644 --- a/Annex/Content/LowLevel.hs +++ b/Annex/Content/LowLevel.hs @@ -128,7 +128,7 @@ checkDiskSpace' need destdir key alreadythere samefilesystem = ifM (Annex.getSta _ -> return True ) where - dir = maybe (fromRepo gitAnnexDir) return destdir + dir = maybe (fromRawFilePath <$> fromRepo gitAnnexDir) return destdir needMoreDiskSpace :: Integer -> String needMoreDiskSpace n = "not enough free space, need " ++ diff --git a/Annex/Link.hs b/Annex/Link.hs index ede132a5b9..faed59f192 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -253,7 +253,7 @@ parseLinkTargetOrPointerLazy b = {- Parses a symlink target to a Key. -} parseLinkTarget :: S.ByteString -> Maybe Key parseLinkTarget l - | isLinkToAnnex l = fileKey' $ snd $ S8.breakEnd pathsep l + | isLinkToAnnex l = fileKey $ snd $ S8.breakEnd pathsep l | otherwise = Nothing where pathsep '/' = True @@ -263,9 +263,9 @@ parseLinkTarget l pathsep _ = False formatPointer :: Key -> S.ByteString -formatPointer k = prefix <> keyFile' k <> nl +formatPointer k = prefix <> keyFile k <> nl where - prefix = toInternalGitPath $ toRawFilePath (pathSeparator:objectDir) + prefix = toInternalGitPath $ P.pathSeparator `S.cons` objectDir' nl = S8.singleton '\n' {- Maximum size of a file that could be a pointer to a key. diff --git a/Annex/Locations.hs b/Annex/Locations.hs index 42ac78a240..2aa7605310 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -9,9 +9,7 @@ module Annex.Locations ( keyFile, - keyFile', fileKey, - fileKey', keyPaths, keyPath, annexDir, @@ -126,19 +124,16 @@ import qualified Utility.RawFilePath as R {- The directory git annex uses for local state, relative to the .git - directory -} -annexDir :: FilePath -annexDir = addTrailingPathSeparator "annex" - -annexDir' :: RawFilePath -annexDir' = P.addTrailingPathSeparator "annex" +annexDir :: RawFilePath +annexDir = P.addTrailingPathSeparator "annex" {- The directory git annex uses for locally available object content, - relative to the .git directory -} objectDir :: FilePath -objectDir = addTrailingPathSeparator $ annexDir "objects" +objectDir = fromRawFilePath objectDir' objectDir' :: RawFilePath -objectDir' = P.addTrailingPathSeparator $ annexDir' P. "objects" +objectDir' = P.addTrailingPathSeparator $ annexDir P. "objects" {- Annexed file's possible locations relative to the .git directory. - There are two different possibilities, using different hashes. @@ -262,46 +257,51 @@ gitAnnexInodeCache key r config = do return $ fromRawFilePath loc ++ ".cache" gitAnnexInodeSentinal :: Git.Repo -> RawFilePath -gitAnnexInodeSentinal r = gitAnnexDir' r P. "sentinal" +gitAnnexInodeSentinal r = gitAnnexDir r P. "sentinal" gitAnnexInodeSentinalCache :: Git.Repo -> RawFilePath gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r <> ".cache" {- The annex directory of a repository. -} -gitAnnexDir :: Git.Repo -> FilePath -gitAnnexDir r = addTrailingPathSeparator $ fromRawFilePath (Git.localGitDir r) annexDir - -gitAnnexDir' :: Git.Repo -> RawFilePath -gitAnnexDir' r = P.addTrailingPathSeparator $ Git.localGitDir r P. annexDir' +gitAnnexDir :: Git.Repo -> RawFilePath +gitAnnexDir r = P.addTrailingPathSeparator $ Git.localGitDir r P. annexDir {- The part of the annex directory where file contents are stored. -} gitAnnexObjectDir :: Git.Repo -> FilePath -gitAnnexObjectDir r = addTrailingPathSeparator $ fromRawFilePath (Git.localGitDir r) objectDir +gitAnnexObjectDir r = fromRawFilePath $ + P.addTrailingPathSeparator $ Git.localGitDir r P. objectDir' {- .git/annex/tmp/ is used for temp files for key's contents -} gitAnnexTmpObjectDir :: Git.Repo -> FilePath -gitAnnexTmpObjectDir r = addTrailingPathSeparator $ gitAnnexDir r "tmp" +gitAnnexTmpObjectDir = fromRawFilePath . gitAnnexTmpObjectDir' + +gitAnnexTmpObjectDir' :: Git.Repo -> RawFilePath +gitAnnexTmpObjectDir' r = P.addTrailingPathSeparator $ gitAnnexDir r P. "tmp" {- .git/annex/othertmp/ is used for other temp files -} gitAnnexTmpOtherDir :: Git.Repo -> FilePath -gitAnnexTmpOtherDir r = addTrailingPathSeparator $ gitAnnexDir r "othertmp" +gitAnnexTmpOtherDir r = fromRawFilePath $ + P.addTrailingPathSeparator $ gitAnnexDir r P. "othertmp" {- Lock file for gitAnnexTmpOtherDir. -} gitAnnexTmpOtherLock :: Git.Repo -> FilePath -gitAnnexTmpOtherLock r = gitAnnexDir r "othertmp.lck" +gitAnnexTmpOtherLock r = fromRawFilePath $ gitAnnexDir r P. "othertmp.lck" {- .git/annex/misctmp/ was used by old versions of git-annex and is still - used during initialization -} gitAnnexTmpOtherDirOld :: Git.Repo -> FilePath -gitAnnexTmpOtherDirOld r = addTrailingPathSeparator $ gitAnnexDir r "misctmp" +gitAnnexTmpOtherDirOld r = fromRawFilePath $ + P.addTrailingPathSeparator $ gitAnnexDir r P. "misctmp" {- .git/annex/watchtmp/ is used by the watcher and assistant -} gitAnnexTmpWatcherDir :: Git.Repo -> FilePath -gitAnnexTmpWatcherDir r = addTrailingPathSeparator $ gitAnnexDir r "watchtmp" +gitAnnexTmpWatcherDir r = fromRawFilePath $ + P.addTrailingPathSeparator $ gitAnnexDir r P. "watchtmp" {- The temp file to use for a given key's content. -} gitAnnexTmpObjectLocation :: Key -> Git.Repo -> FilePath -gitAnnexTmpObjectLocation key r = gitAnnexTmpObjectDir r keyFile key +gitAnnexTmpObjectLocation key r = fromRawFilePath $ + gitAnnexTmpObjectDir' r P. keyFile key {- Given a temp file such as gitAnnexTmpObjectLocation, makes a name for a - subdirectory in the same location, that can be used as a work area @@ -318,19 +318,21 @@ gitAnnexTmpWorkDir p = {- .git/annex/bad/ is used for bad files found during fsck -} gitAnnexBadDir :: Git.Repo -> FilePath -gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r "bad" +gitAnnexBadDir r = fromRawFilePath $ + P.addTrailingPathSeparator $ gitAnnexDir r P. "bad" {- The bad file to use for a given key. -} gitAnnexBadLocation :: Key -> Git.Repo -> FilePath -gitAnnexBadLocation key r = gitAnnexBadDir r keyFile key +gitAnnexBadLocation key r = gitAnnexBadDir r fromRawFilePath (keyFile key) {- .git/annex/foounused is used to number possibly unused keys -} gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath -gitAnnexUnusedLog prefix r = gitAnnexDir r (prefix ++ "unused") +gitAnnexUnusedLog prefix r = + fromRawFilePath (gitAnnexDir r) (prefix ++ "unused") {- .git/annex/keysdb/ contains a database of information about keys. -} gitAnnexKeysDb :: Git.Repo -> FilePath -gitAnnexKeysDb r = gitAnnexDir r "keysdb" +gitAnnexKeysDb r = fromRawFilePath $ gitAnnexDir r P. "keysdb" {- Lock file for the keys database. -} gitAnnexKeysDbLock :: Git.Repo -> FilePath @@ -344,7 +346,8 @@ gitAnnexKeysDbIndexCache r = gitAnnexKeysDb r ++ ".cache" {- .git/annex/fsck/uuid/ is used to store information about incremental - fscks. -} gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath -gitAnnexFsckDir u r = gitAnnexDir r "fsck" fromUUID u +gitAnnexFsckDir u r = fromRawFilePath $ + gitAnnexDir r P. "fsck" P. fromUUID u {- used to store information about incremental fscks. -} gitAnnexFsckState :: UUID -> Git.Repo -> FilePath @@ -364,20 +367,21 @@ gitAnnexFsckDbLock u r = gitAnnexFsckDir u r "fsck.lck" {- .git/annex/fsckresults/uuid is used to store results of git fscks -} gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath -gitAnnexFsckResultsLog u r = gitAnnexDir r "fsckresults" fromUUID u +gitAnnexFsckResultsLog u r = fromRawFilePath $ + gitAnnexDir r P. "fsckresults" P. fromUUID u {- .git/annex/smudge.log is used to log smudges worktree files that need to - be updated. -} gitAnnexSmudgeLog :: Git.Repo -> FilePath -gitAnnexSmudgeLog r = gitAnnexDir r "smudge.log" +gitAnnexSmudgeLog r = fromRawFilePath $ gitAnnexDir r P. "smudge.log" gitAnnexSmudgeLock :: Git.Repo -> FilePath -gitAnnexSmudgeLock r = gitAnnexDir r "smudge.lck" +gitAnnexSmudgeLock r = fromRawFilePath $ gitAnnexDir r P. "smudge.lck" {- .git/annex/export/ is used to store information about - exports to special remotes. -} gitAnnexExportDir :: Git.Repo -> FilePath -gitAnnexExportDir r = gitAnnexDir r "export" +gitAnnexExportDir r = fromRawFilePath $ gitAnnexDir r P. "export" {- Directory containing database used to record export info. -} gitAnnexExportDbDir :: UUID -> Git.Repo -> FilePath @@ -394,7 +398,8 @@ gitAnnexExportUpdateLock u r = gitAnnexExportDbDir u r ++ ".upl" {- Log file used to keep track of files that were in the tree exported to a - remote, but were excluded by its preferred content settings. -} gitAnnexExportExcludeLog :: UUID -> Git.Repo -> FilePath -gitAnnexExportExcludeLog u r = gitAnnexDir r "export.ex" fromUUID u +gitAnnexExportExcludeLog u r = fromRawFilePath $ + gitAnnexDir r P. "export.ex" P. fromUUID u {- Directory containing database used to record remote content ids. - @@ -402,7 +407,7 @@ gitAnnexExportExcludeLog u r = gitAnnexDir r "export.ex" fromUUID u - need to be rebuilt with a new name.) -} gitAnnexContentIdentifierDbDir :: Git.Repo -> FilePath -gitAnnexContentIdentifierDbDir r = gitAnnexDir r "cidsdb" +gitAnnexContentIdentifierDbDir r = fromRawFilePath $ gitAnnexDir r P. "cidsdb" {- Lock file for writing to the content id database. -} gitAnnexContentIdentifierLock :: Git.Repo -> FilePath @@ -411,128 +416,137 @@ gitAnnexContentIdentifierLock r = gitAnnexContentIdentifierDbDir r ++ ".lck" {- .git/annex/schedulestate is used to store information about when - scheduled jobs were last run. -} gitAnnexScheduleState :: Git.Repo -> FilePath -gitAnnexScheduleState r = gitAnnexDir r "schedulestate" +gitAnnexScheduleState r = fromRawFilePath $ gitAnnexDir r P. "schedulestate" {- .git/annex/creds/ is used to store credentials to access some special - remotes. -} gitAnnexCredsDir :: Git.Repo -> FilePath -gitAnnexCredsDir r = addTrailingPathSeparator $ gitAnnexDir r "creds" +gitAnnexCredsDir r = fromRawFilePath $ + P.addTrailingPathSeparator $ gitAnnexDir r P. "creds" {- .git/annex/certificate.pem and .git/annex/key.pem are used by the webapp - when HTTPS is enabled -} gitAnnexWebCertificate :: Git.Repo -> FilePath -gitAnnexWebCertificate r = gitAnnexDir r "certificate.pem" +gitAnnexWebCertificate r = fromRawFilePath $ gitAnnexDir r P. "certificate.pem" gitAnnexWebPrivKey :: Git.Repo -> FilePath -gitAnnexWebPrivKey r = gitAnnexDir r "privkey.pem" +gitAnnexWebPrivKey r = fromRawFilePath $ gitAnnexDir r P. "privkey.pem" {- .git/annex/feeds/ is used to record per-key (url) state by importfeeds -} gitAnnexFeedStateDir :: Git.Repo -> FilePath -gitAnnexFeedStateDir r = addTrailingPathSeparator $ gitAnnexDir r "feedstate" +gitAnnexFeedStateDir r = fromRawFilePath $ + P.addTrailingPathSeparator $ gitAnnexDir r P. "feedstate" gitAnnexFeedState :: Key -> Git.Repo -> FilePath -gitAnnexFeedState k r = gitAnnexFeedStateDir r keyFile k +gitAnnexFeedState k r = gitAnnexFeedStateDir r fromRawFilePath (keyFile k) {- .git/annex/merge/ is used as a empty work tree for direct mode merges and - merges in adjusted branches. -} gitAnnexMergeDir :: Git.Repo -> FilePath -gitAnnexMergeDir r = addTrailingPathSeparator $ gitAnnexDir r "merge" +gitAnnexMergeDir r = fromRawFilePath $ + P.addTrailingPathSeparator $ gitAnnexDir r P. "merge" {- .git/annex/transfer/ is used to record keys currently - being transferred, and other transfer bookkeeping info. -} gitAnnexTransferDir :: Git.Repo -> FilePath -gitAnnexTransferDir r = addTrailingPathSeparator $ gitAnnexDir r "transfer" +gitAnnexTransferDir r = fromRawFilePath $ + P.addTrailingPathSeparator $ gitAnnexDir r P. "transfer" {- .git/annex/journal/ is used to journal changes made to the git-annex - branch -} gitAnnexJournalDir :: Git.Repo -> FilePath -gitAnnexJournalDir r = addTrailingPathSeparator $ gitAnnexDir r "journal" +gitAnnexJournalDir r = fromRawFilePath $ + P.addTrailingPathSeparator $ gitAnnexDir r P. "journal" gitAnnexJournalDir' :: Git.Repo -> RawFilePath -gitAnnexJournalDir' r = P.addTrailingPathSeparator $ gitAnnexDir' r P. "journal" +gitAnnexJournalDir' r = P.addTrailingPathSeparator $ gitAnnexDir r P. "journal" {- Lock file for the journal. -} gitAnnexJournalLock :: Git.Repo -> FilePath -gitAnnexJournalLock r = gitAnnexDir r "journal.lck" +gitAnnexJournalLock r = fromRawFilePath $ gitAnnexDir r P. "journal.lck" {- Lock file for flushing a git queue that writes to the git index or - other git state that should only have one writer at a time. -} gitAnnexGitQueueLock :: Git.Repo -> FilePath -gitAnnexGitQueueLock r = gitAnnexDir r "gitqueue.lck" +gitAnnexGitQueueLock r = fromRawFilePath $ gitAnnexDir r P. "gitqueue.lck" {- Lock file for the pre-commit hook. -} gitAnnexPreCommitLock :: Git.Repo -> FilePath -gitAnnexPreCommitLock r = gitAnnexDir r "precommit.lck" +gitAnnexPreCommitLock r = fromRawFilePath $ gitAnnexDir r P. "precommit.lck" {- Lock file for direct mode merge. -} gitAnnexMergeLock :: Git.Repo -> FilePath -gitAnnexMergeLock r = gitAnnexDir r "merge.lck" +gitAnnexMergeLock r = fromRawFilePath $ gitAnnexDir r P. "merge.lck" {- .git/annex/index is used to stage changes to the git-annex branch -} gitAnnexIndex :: Git.Repo -> FilePath -gitAnnexIndex r = gitAnnexDir r "index" +gitAnnexIndex r = fromRawFilePath $ gitAnnexDir r P. "index" {- Holds the ref of the git-annex branch that the index was last updated to. - - The .lck in the name is a historical accident; this is not used as a - lock. -} gitAnnexIndexStatus :: Git.Repo -> FilePath -gitAnnexIndexStatus r = gitAnnexDir r "index.lck" +gitAnnexIndexStatus r = fromRawFilePath $ gitAnnexDir r P. "index.lck" {- The index file used to generate a filtered branch view._-} gitAnnexViewIndex :: Git.Repo -> FilePath -gitAnnexViewIndex r = gitAnnexDir r "viewindex" +gitAnnexViewIndex r = fromRawFilePath $ gitAnnexDir r P. "viewindex" {- File containing a log of recently accessed views. -} gitAnnexViewLog :: Git.Repo -> FilePath -gitAnnexViewLog r = gitAnnexDir r "viewlog" +gitAnnexViewLog r = fromRawFilePath $ gitAnnexDir r P. "viewlog" {- List of refs that have already been merged into the git-annex branch. -} gitAnnexMergedRefs :: Git.Repo -> FilePath -gitAnnexMergedRefs r = gitAnnexDir r "mergedrefs" +gitAnnexMergedRefs r = fromRawFilePath $ gitAnnexDir r P. "mergedrefs" {- List of refs that should not be merged into the git-annex branch. -} gitAnnexIgnoredRefs :: Git.Repo -> FilePath -gitAnnexIgnoredRefs r = gitAnnexDir r "ignoredrefs" +gitAnnexIgnoredRefs r = fromRawFilePath $ gitAnnexDir r P. "ignoredrefs" {- Pid file for daemon mode. -} gitAnnexPidFile :: Git.Repo -> FilePath -gitAnnexPidFile r = gitAnnexDir r "daemon.pid" +gitAnnexPidFile r = fromRawFilePath $ gitAnnexDir r P. "daemon.pid" {- Pid lock file for pidlock mode -} gitAnnexPidLockFile :: Git.Repo -> FilePath -gitAnnexPidLockFile r = gitAnnexDir r "pidlock" +gitAnnexPidLockFile r = fromRawFilePath $ gitAnnexDir r P. "pidlock" {- Status file for daemon mode. -} gitAnnexDaemonStatusFile :: Git.Repo -> FilePath -gitAnnexDaemonStatusFile r = gitAnnexDir r "daemon.status" +gitAnnexDaemonStatusFile r = fromRawFilePath $ + gitAnnexDir r P. "daemon.status" {- Log file for daemon mode. -} gitAnnexLogFile :: Git.Repo -> FilePath -gitAnnexLogFile r = gitAnnexDir r "daemon.log" +gitAnnexLogFile r = fromRawFilePath $ gitAnnexDir r P. "daemon.log" {- Log file for fuzz test. -} gitAnnexFuzzTestLogFile :: Git.Repo -> FilePath -gitAnnexFuzzTestLogFile r = gitAnnexDir r "fuzztest.log" +gitAnnexFuzzTestLogFile r = fromRawFilePath $ + gitAnnexDir r P. "fuzztest.log" {- Html shim file used to launch the webapp. -} gitAnnexHtmlShim :: Git.Repo -> FilePath -gitAnnexHtmlShim r = gitAnnexDir r "webapp.html" +gitAnnexHtmlShim r = fromRawFilePath $ gitAnnexDir r P. "webapp.html" {- File containing the url to the webapp. -} gitAnnexUrlFile :: Git.Repo -> FilePath -gitAnnexUrlFile r = gitAnnexDir r "url" +gitAnnexUrlFile r = fromRawFilePath $ gitAnnexDir r P. "url" {- Temporary file used to edit configuriation from the git-annex branch. -} gitAnnexTmpCfgFile :: Git.Repo -> FilePath -gitAnnexTmpCfgFile r = gitAnnexDir r "config.tmp" +gitAnnexTmpCfgFile r = fromRawFilePath $ gitAnnexDir r P. "config.tmp" {- .git/annex/ssh/ is used for ssh connection caching -} gitAnnexSshDir :: Git.Repo -> FilePath -gitAnnexSshDir r = addTrailingPathSeparator $ gitAnnexDir r "ssh" +gitAnnexSshDir r = fromRawFilePath $ + P.addTrailingPathSeparator $ gitAnnexDir r P. "ssh" {- .git/annex/remotes/ is used for remote-specific state. -} gitAnnexRemotesDir :: Git.Repo -> FilePath -gitAnnexRemotesDir r = addTrailingPathSeparator $ gitAnnexDir r "remotes" +gitAnnexRemotesDir r = fromRawFilePath $ + P.addTrailingPathSeparator $ gitAnnexDir r P. "remotes" {- This is the base directory name used by the assistant when making - repositories, by default. -} @@ -589,11 +603,8 @@ reSanitizeKeyName = preSanitizeKeyName' True - Changing what this function escapes and how is not a good idea, as it - can cause existing objects to get lost. -} -keyFile :: Key -> FilePath -keyFile = fromRawFilePath . keyFile' - -keyFile' :: Key -> RawFilePath -keyFile' k = +keyFile :: Key -> RawFilePath +keyFile k = let b = serializeKey' k in if S8.any (`elem` ['&', '%', ':', '/']) b then S8.concatMap esc b @@ -608,11 +619,8 @@ keyFile' k = {- Reverses keyFile, converting a filename fragment (ie, the basename of - the symlink target) into a key. -} -fileKey :: FilePath -> Maybe Key -fileKey = fileKey' . toRawFilePath - -fileKey' :: RawFilePath -> Maybe Key -fileKey' = deserializeKey' . S8.intercalate "/" . map go . S8.split '%' +fileKey :: RawFilePath -> Maybe Key +fileKey = deserializeKey' . S8.intercalate "/" . map go . S8.split '%' where go = S8.concat . unescafterfirst . S8.split '&' unescafterfirst [] = [] @@ -634,7 +642,7 @@ fileKey' = deserializeKey' . S8.intercalate "/" . map go . S8.split '%' keyPath :: Key -> Hasher -> RawFilePath keyPath key hasher = hasher key P. f P. f where - f = keyFile' key + f = keyFile key {- All possibile locations to store a key in a special remote - using different directory hashes. diff --git a/Annex/Perms.hs b/Annex/Perms.hs index d2b270dd40..a24e0362f0 100644 --- a/Annex/Perms.hs +++ b/Annex/Perms.hs @@ -70,7 +70,7 @@ annexFileMode = withShared $ return . go createAnnexDirectory :: FilePath -> Annex () createAnnexDirectory dir = walk dir [] =<< top where - top = parentDir <$> fromRepo gitAnnexDir + top = parentDir . fromRawFilePath <$> fromRepo gitAnnexDir walk d below stop | d `equalFilePath` stop = done | otherwise = ifM (liftIO $ doesDirectoryExist d) diff --git a/Annex/VariantFile.hs b/Annex/VariantFile.hs index 65f989ebae..781732368d 100644 --- a/Annex/VariantFile.hs +++ b/Annex/VariantFile.hs @@ -36,7 +36,7 @@ mkVariant file variant = takeDirectory file -} variantFile :: FilePath -> Key -> FilePath variantFile file key - | doubleconflict = mkVariant file (keyFile key) + | doubleconflict = mkVariant file (fromRawFilePath (keyFile key)) | otherwise = mkVariant file (shortHash $ serializeKey' key) where doubleconflict = variantMarker `isInfixOf` file diff --git a/Assistant/Repair.hs b/Assistant/Repair.hs index f8e7bedcec..a96921796c 100644 --- a/Assistant/Repair.hs +++ b/Assistant/Repair.hs @@ -130,7 +130,7 @@ repairStaleGitLocks r = do repairStaleLocks lockfiles return $ not $ null lockfiles where - findgitfiles = dirContentsRecursiveSkipping (== dropTrailingPathSeparator annexDir) True . fromRawFilePath . Git.localGitDir + findgitfiles = dirContentsRecursiveSkipping (== dropTrailingPathSeparator (fromRawFilePath annexDir)) True . fromRawFilePath . Git.localGitDir islock f | "gc.pid" `isInfixOf` f = False | ".lock" `isSuffixOf` f = True diff --git a/Command/AddUnused.hs b/Command/AddUnused.hs index 025b25e4d0..b14e85bde5 100644 --- a/Command/AddUnused.hs +++ b/Command/AddUnused.hs @@ -31,7 +31,7 @@ perform key = next $ do addLink file key Nothing return True where - file = "unused." ++ keyFile key + file = "unused." ++ fromRawFilePath (keyFile key) {- The content is not in the annex, but in another directory, and - it seems better to error out, rather than moving bad/tmp content into diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 3010a6ce37..65c0112ea7 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -164,7 +164,7 @@ performRemote key afile backend numcopies remote = pid <- liftIO getPID t <- fromRepo gitAnnexTmpObjectDir createAnnexDirectory t - let tmp = t "fsck" ++ show pid ++ "." ++ keyFile key + let tmp = t "fsck" ++ show pid ++ "." ++ fromRawFilePath (keyFile key) let cleanup = liftIO $ catchIO (removeFile tmp) (const noop) cleanup cleanup `after` a tmp @@ -516,7 +516,7 @@ badContent key = do badContentRemote :: Remote -> FilePath -> Key -> Annex String badContentRemote remote localcopy key = do bad <- fromRepo gitAnnexBadDir - let destbad = bad keyFile key + let destbad = bad fromRawFilePath (keyFile key) movedbad <- ifM (inAnnex key <||> liftIO (doesFileExist destbad)) ( return False , do diff --git a/Command/Info.hs b/Command/Info.hs index 94292077f8..3448ee6ef2 100644 --- a/Command/Info.hs +++ b/Command/Info.hs @@ -454,7 +454,7 @@ disk_size :: Stat disk_size = simpleStat "available local disk space" $ calcfree <$> (lift $ annexDiskReserve <$> Annex.getGitConfig) - <*> (lift $ inRepo $ getDiskFree . gitAnnexDir) + <*> (lift $ inRepo $ getDiskFree . fromRawFilePath . gitAnnexDir) <*> mkSizer where calcfree reserve (Just have) sizer = unwords @@ -674,7 +674,7 @@ staleSize label dirspec = go =<< lift (dirKeys dirspec) keysizes keys = do dir <- lift $ fromRepo dirspec liftIO $ forM keys $ \k -> catchDefaultIO 0 $ - getFileSize (dir keyFile k) + getFileSize (dir fromRawFilePath (keyFile k)) aside :: String -> String aside s = " (" ++ s ++ ")" diff --git a/Command/Map.hs b/Command/Map.hs index de2a0c6dd6..c35ad6870d 100644 --- a/Command/Map.hs +++ b/Command/Map.hs @@ -46,7 +46,9 @@ start = startingNoMessage (ActionItemOther Nothing) $ do umap <- uuidDescMap trustmap <- trustMapLoad - file <- () <$> fromRepo gitAnnexDir <*> pure "map.dot" + file <- () + <$> fromRepo (fromRawFilePath . gitAnnexDir) + <*> pure "map.dot" liftIO $ writeFile file (drawMap rs trustmap umap) next $ diff --git a/Command/Uninit.hs b/Command/Uninit.hs index 29278a6c4e..ff9c4d3880 100644 --- a/Command/Uninit.hs +++ b/Command/Uninit.hs @@ -58,7 +58,7 @@ startCheckIncomplete file _ = giveup $ unlines finish :: Annex () finish = do - annexdir <- fromRepo gitAnnexDir + annexdir <- fromRawFilePath <$> fromRepo gitAnnexDir annexobjectdir <- fromRepo gitAnnexObjectDir leftovers <- removeUnannexed =<< listKeys InAnnex prepareRemoveAnnexDir annexdir diff --git a/Database/Keys.hs b/Database/Keys.hs index 7b34bae39a..77506d1b26 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -43,6 +43,9 @@ import Git.Command import Git.Types import Git.Index +import qualified Data.ByteString as S +import qualified System.FilePath.ByteString as P + {- Runs an action that reads from the database. - - If the database doesn't already exist, it's not created; mempty is @@ -263,7 +266,7 @@ reconcileStaged qh = do -- pointer file. And a pointer file that is replaced with -- a non-pointer file will match this. , Param $ "-G^" ++ fromRawFilePath (toInternalGitPath $ - toRawFilePath (pathSeparator:objectDir)) + P.pathSeparator `S.cons` objectDir') -- Don't include files that were deleted, because this only -- wants to update information for files that are present -- in the index. diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs index 3846aaf625..cc307f996a 100644 --- a/Database/Keys/SQL.hs +++ b/Database/Keys/SQL.hs @@ -17,7 +17,6 @@ import Database.Types import Database.Handle import qualified Database.Queue as H import Utility.InodeCache -import Utility.FileSystemEncoding import Git.FilePath import Database.Persist.Sql hiding (Key) diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index 85d9687e4c..e046895a1c 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -70,18 +70,18 @@ diffOpts = ["--raw", "-z", "-r", "--no-renames", "-l0"] doMerge :: HashObjectHandle -> CatFileHandle -> [String] -> Repo -> Streamer doMerge hashhandle ch differ repo streamer = do (diff, cleanup) <- pipeNullSplit (map Param differ) repo - go (map decodeBL' diff) + go diff void $ cleanup where go [] = noop - go (info:file:rest) = mergeFile info file hashhandle ch >>= + go (info:file:rest) = mergeFile (decodeBL' info) (L.toStrict file) hashhandle ch >>= maybe (go rest) (\l -> streamer l >> go rest) go (_:[]) = error $ "parse error " ++ show differ {- Given an info line from a git raw diff, and the filename, generates - a line suitable for update-index that union merges the two sides of the - diff. -} -mergeFile :: String -> FilePath -> HashObjectHandle -> CatFileHandle -> IO (Maybe L.ByteString) +mergeFile :: String -> RawFilePath -> HashObjectHandle -> CatFileHandle -> IO (Maybe L.ByteString) mergeFile info file hashhandle h = case filter (/= nullSha) [Ref asha, Ref bsha] of [] -> return Nothing (sha:[]) -> use sha @@ -91,7 +91,7 @@ mergeFile info file hashhandle h = case filter (/= nullSha) [Ref asha, Ref bsha] where [_colonmode, _bmode, asha, bsha, _status] = words info use sha = return $ Just $ - updateIndexLine sha TreeFile $ asTopFilePath $ toRawFilePath file + updateIndexLine sha TreeFile $ asTopFilePath file -- Get file and split into lines to union merge. -- The encoding of the file is assumed to be either ASCII or utf-8; -- in either case it's safe to split on \n diff --git a/Logs.hs b/Logs.hs index 18a045b452..5faec561ef 100644 --- a/Logs.hs +++ b/Logs.hs @@ -119,18 +119,18 @@ exportLog = "export.log" {- The pathname of the location log file for a given key. -} locationLogFile :: GitConfig -> Key -> RawFilePath locationLogFile config key = - branchHashDir config key P. keyFile' key <> ".log" + branchHashDir config key P. keyFile key <> ".log" {- The filename of the url log for a given key. -} urlLogFile :: GitConfig -> Key -> RawFilePath urlLogFile config key = - branchHashDir config key P. keyFile' key <> urlLogExt + branchHashDir config key P. keyFile key <> urlLogExt {- Old versions stored the urls elsewhere. -} oldurlLogs :: GitConfig -> Key -> [RawFilePath] oldurlLogs config key = [ "remote/web" P. hdir P. serializeKey' key <> ".log" - , "remote/web" P. hdir P. keyFile' key <> ".log" + , "remote/web" P. hdir P. keyFile key <> ".log" ] where hdir = branchHashDir config key @@ -145,7 +145,7 @@ isUrlLog file = urlLogExt `S.isSuffixOf` file {- The filename of the remote state log for a given key. -} remoteStateLogFile :: GitConfig -> Key -> RawFilePath remoteStateLogFile config key = - (branchHashDir config key P. keyFile' key) + (branchHashDir config key P. keyFile key) <> remoteStateLogExt remoteStateLogExt :: S.ByteString @@ -157,7 +157,7 @@ isRemoteStateLog path = remoteStateLogExt `S.isSuffixOf` path {- The filename of the chunk log for a given key. -} chunkLogFile :: GitConfig -> Key -> RawFilePath chunkLogFile config key = - (branchHashDir config key P. keyFile' key) + (branchHashDir config key P. keyFile key) <> chunkLogExt chunkLogExt :: S.ByteString @@ -169,7 +169,7 @@ isChunkLog path = chunkLogExt `S.isSuffixOf` path {- The filename of the metadata log for a given key. -} metaDataLogFile :: GitConfig -> Key -> RawFilePath metaDataLogFile config key = - (branchHashDir config key P. keyFile' key) + (branchHashDir config key P. keyFile key) <> metaDataLogExt metaDataLogExt :: S.ByteString @@ -181,7 +181,7 @@ isMetaDataLog path = metaDataLogExt `S.isSuffixOf` path {- The filename of the remote metadata log for a given key. -} remoteMetaDataLogFile :: GitConfig -> Key -> RawFilePath remoteMetaDataLogFile config key = - (branchHashDir config key P. keyFile' key) + (branchHashDir config key P. keyFile key) <> remoteMetaDataLogExt remoteMetaDataLogExt :: S.ByteString @@ -193,7 +193,7 @@ isRemoteMetaDataLog path = remoteMetaDataLogExt `S.isSuffixOf` path {- The filename of the remote content identifier log for a given key. -} remoteContentIdentifierLogFile :: GitConfig -> Key -> RawFilePath remoteContentIdentifierLogFile config key = - (branchHashDir config key P. keyFile' key) + (branchHashDir config key P. keyFile key) <> remoteContentIdentifierExt remoteContentIdentifierExt :: S.ByteString @@ -205,7 +205,7 @@ isRemoteContentIdentifierLog path = remoteContentIdentifierExt `S.isSuffixOf` pa {- From an extension and a log filename, get the key that it's a log for. -} extLogFileKey :: S.ByteString -> RawFilePath -> Maybe Key extLogFileKey expectedext path - | encodeBS' ext == expectedext = fileKey base + | encodeBS' ext == expectedext = fileKey (toRawFilePath base) | otherwise = Nothing where file = takeFileName (fromRawFilePath path) diff --git a/Logs/Transfer.hs b/Logs/Transfer.hs index 2dabe5cf34..ab9a8ca61b 100644 --- a/Logs/Transfer.hs +++ b/Logs/Transfer.hs @@ -195,12 +195,12 @@ recordFailedTransfer t info = do transferFile :: Transfer -> Git.Repo -> FilePath transferFile (Transfer direction u kd) r = transferDir direction r filter (/= '/') (fromUUID u) - keyFile (mkKey (const kd)) + fromRawFilePath (keyFile (mkKey (const kd))) {- The transfer information file to use to record a failed Transfer -} failedTransferFile :: Transfer -> Git.Repo -> FilePath failedTransferFile (Transfer direction u kd) r = failedTransferDir u direction r - keyFile (mkKey (const kd)) + fromRawFilePath (keyFile (mkKey (const kd))) {- The transfer lock file corresponding to a given transfer info file. -} transferLockFile :: FilePath -> FilePath @@ -215,7 +215,7 @@ parseTransferFile file [direction, u, key] -> Transfer <$> parseDirection direction <*> pure (toUUID u) - <*> fmap (fromKey id) (fileKey key) + <*> fmap (fromKey id) (fileKey (toRawFilePath key)) _ -> Nothing where bits = splitDirectories file diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs index 09fa5ed744..0bbf4b24a7 100644 --- a/Remote/BitTorrent.hs +++ b/Remote/BitTorrent.hs @@ -195,7 +195,7 @@ downloadTorrentFile u = do createAnnexDirectory (parentDir torrent) if isTorrentMagnetUrl u then withOtherTmp $ \othertmp -> do - kf <- keyFile <$> torrentUrlKey u + kf <- fromRawFilePath . keyFile <$> torrentUrlKey u let metadir = othertmp "torrentmeta" kf createAnnexDirectory metadir showOutput @@ -239,7 +239,7 @@ downloadTorrentContent :: Key -> URLString -> FilePath -> Int -> MeterUpdate -> downloadTorrentContent k u dest filenum p = do torrent <- tmpTorrentFile u withOtherTmp $ \othertmp -> do - kf <- keyFile <$> torrentUrlKey u + kf <- fromRawFilePath . keyFile <$> torrentUrlKey u let downloaddir = othertmp "torrent" kf createAnnexDirectory downloaddir f <- wantedfile torrent diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 933ccd23ce..3aa6185155 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -140,7 +140,7 @@ getLocation d k = do {- Directory where the file(s) for a key are stored. -} storeDir :: FilePath -> Key -> FilePath storeDir d k = addTrailingPathSeparator $ - d fromRawFilePath (hashDirLower def k) keyFile k + d fromRawFilePath (hashDirLower def k) fromRawFilePath (keyFile k) {- Check if there is enough free disk space in the remote's directory to - store the key. Note that the unencrypted key size is checked. -} @@ -164,12 +164,13 @@ store d chunkconfig k b p = liftIO $ do case chunkconfig of LegacyChunks chunksize -> Legacy.store chunksize finalizeStoreGeneric k b p tmpdir destdir _ -> do - let tmpf = tmpdir keyFile k + let tmpf = tmpdir kf meteredWriteFile p tmpf b finalizeStoreGeneric tmpdir destdir return True where - tmpdir = addTrailingPathSeparator $ d "tmp" keyFile k + tmpdir = addTrailingPathSeparator $ d "tmp" kf + kf = fromRawFilePath (keyFile k) destdir = storeDir d k {- Passed a temp directory that contains the files that should be placed diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index fb3626f489..d9d5a860ce 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -91,7 +91,7 @@ store chunksize finalizer k b p = storeHelper finalizer k $ \dests -> retrieve :: (FilePath -> Key -> [FilePath]) -> FilePath -> Preparer Retriever retrieve locations d basek a = withOtherTmp $ \tmpdir -> do showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow." - let tmp = tmpdir keyFile basek ++ ".directorylegacy.tmp" + let tmp = tmpdir fromRawFilePath (keyFile basek) ++ ".directorylegacy.tmp" a $ Just $ byteRetriever $ \k sink -> do liftIO $ void $ withStoredFiles d locations k $ \fs -> do forM_ fs $ diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index c3a3f31348..9fa5916978 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -351,9 +351,9 @@ store' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> Storer store' repo r rsyncopts | not $ Git.repoIsUrl repo = byteStorer $ \k b p -> guardUsable repo (return False) $ liftIO $ do - let tmpdir = Git.repoLocation repo "tmp" keyFile k + let tmpdir = Git.repoLocation repo "tmp" fromRawFilePath (keyFile k) void $ tryIO $ createDirectoryIfMissing True tmpdir - let tmpf = tmpdir keyFile k + let tmpf = tmpdir fromRawFilePath (keyFile k) meteredWriteFile p tmpf b let destdir = parentDir $ gCryptLocation repo k Remote.Directory.finalizeStoreGeneric tmpdir destdir diff --git a/Remote/Helper/Chunked/Legacy.hs b/Remote/Helper/Chunked/Legacy.hs index c804b23754..e7a7c5fc67 100644 --- a/Remote/Helper/Chunked/Legacy.hs +++ b/Remote/Helper/Chunked/Legacy.hs @@ -77,7 +77,7 @@ storeChunks key tmp dest storer recorder finalizer = either onerr return warningIO (show e) return False - basef = tmp ++ keyFile key + basef = tmp ++ fromRawFilePath (keyFile key) tmpdests = map (basef ++ ) chunkStream {- Given a list of destinations to use, chunks the data according to the diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index f171b69e60..1847514002 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -226,7 +226,7 @@ remove o k = removeGeneric o includes [ parentDir dir , dir -- match content directory and anything in it - , dir keyFile k "***" + , dir fromRawFilePath (keyFile k) "***" ] {- An empty directory is rsynced to make it delete. Everything is excluded, diff --git a/Remote/Rsync/RsyncUrl.hs b/Remote/Rsync/RsyncUrl.hs index 2b0dbc1966..dc810dea4d 100644 --- a/Remote/Rsync/RsyncUrl.hs +++ b/Remote/Rsync/RsyncUrl.hs @@ -44,7 +44,7 @@ rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl] rsyncUrls o k = map use dirHashes where use h = rsyncUrl o fromRawFilePath (hash h) rsyncEscape o (f f) - f = keyFile k + f = fromRawFilePath (keyFile k) #ifndef mingw32_HOST_OS hash h = h def k #else diff --git a/Remote/WebDAV/DavLocation.hs b/Remote/WebDAV/DavLocation.hs index 3893533a22..bd188a6de4 100644 --- a/Remote/WebDAV/DavLocation.hs +++ b/Remote/WebDAV/DavLocation.hs @@ -36,7 +36,7 @@ inLocation d = inDAVLocation ( d') {- The directory where files(s) for a key are stored. -} keyDir :: Key -> DavLocation -keyDir k = addTrailingPathSeparator $ hashdir keyFile k +keyDir k = addTrailingPathSeparator $ hashdir fromRawFilePath (keyFile k) where #ifndef mingw32_HOST_OS hashdir = fromRawFilePath $ hashDirLower def k @@ -45,7 +45,7 @@ keyDir k = addTrailingPathSeparator $ hashdir keyFile k #endif keyLocation :: Key -> DavLocation -keyLocation k = keyDir k ++ keyFile k +keyLocation k = keyDir k ++ fromRawFilePath (keyFile k) {- Paths containing # or ? cannot be represented in an url, so fails on - those. -} @@ -60,7 +60,7 @@ exportLocation l = {- Where we store temporary data for a key as it's being uploaded. -} keyTmpLocation :: Key -> DavLocation -keyTmpLocation = tmpLocation . keyFile +keyTmpLocation = tmpLocation . fromRawFilePath . keyFile tmpLocation :: FilePath -> DavLocation tmpLocation f = "git-annex-webdav-tmp-" ++ f diff --git a/Upgrade/V0.hs b/Upgrade/V0.hs index 00dce6d125..2b5b2d4eba 100644 --- a/Upgrade/V0.hs +++ b/Upgrade/V0.hs @@ -16,7 +16,7 @@ upgrade = do showAction "v0 to v1" -- do the reorganisation of the key files - olddir <- fromRepo gitAnnexDir + olddir <- fromRawFilePath <$> fromRepo gitAnnexDir keys <- getKeysPresent0 olddir forM_ keys $ \k -> moveAnnex k $ olddir keyFile0 k diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index e311044664..88a3494484 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -238,7 +238,7 @@ logFile2 = logFile' (hashDirLower def) logFile' :: (Key -> RawFilePath) -> Key -> Git.Repo -> String logFile' hasher key repo = - gitStateDir repo ++ fromRawFilePath (hasher key) ++ keyFile key ++ ".log" + gitStateDir repo ++ fromRawFilePath (hasher key) ++ fromRawFilePath (keyFile key) ++ ".log" stateDir :: FilePath stateDir = addTrailingPathSeparator ".git-annex" diff --git a/Upgrade/V7.hs b/Upgrade/V7.hs index 600f91e206..d3ba494619 100644 --- a/Upgrade/V7.hs +++ b/Upgrade/V7.hs @@ -46,7 +46,7 @@ upgrade automatic = do return True gitAnnexKeysDbOld :: Git.Repo -> FilePath -gitAnnexKeysDbOld r = gitAnnexDir r "keys" +gitAnnexKeysDbOld r = fromRawFilePath (gitAnnexDir r) "keys" gitAnnexKeysDbLockOld :: Git.Repo -> FilePath gitAnnexKeysDbLockOld r = gitAnnexKeysDbOld r ++ ".lck" @@ -55,7 +55,7 @@ gitAnnexKeysDbIndexCacheOld :: Git.Repo -> FilePath gitAnnexKeysDbIndexCacheOld r = gitAnnexKeysDbOld r ++ ".cache" gitAnnexContentIdentifierDbDirOld :: Git.Repo -> FilePath -gitAnnexContentIdentifierDbDirOld r = gitAnnexDir r "cids" +gitAnnexContentIdentifierDbDirOld r = fromRawFilePath (gitAnnexDir r) "cids" gitAnnexContentIdentifierLockOld :: Git.Repo -> FilePath gitAnnexContentIdentifierLockOld r = gitAnnexContentIdentifierDbDirOld r ++ ".lck" diff --git a/doc/devblog/day_614__bytestring_wrapping_up.mdwn b/doc/devblog/day_614__bytestring_wrapping_up.mdwn new file mode 100644 index 0000000000..db686e7d6f --- /dev/null +++ b/doc/devblog/day_614__bytestring_wrapping_up.mdwn @@ -0,0 +1,15 @@ +Cut the last release before the switch over to end-to-end ByteString. +(Including fixing the rpm repo's index which had not been getting updated.) + +I had left the `bs` branch last week with a known bug, so got that fixed. +Also there were some encoding problems on windows with the ByteString +filepaths, which needed a new release of filepath-bytestring to clean up. +Now I think the `bs` branch is really in a mergeable state. +(It's still not tested on Windows at all though.) + +Took the last little while to do some more profiling. Mostly the remaining +ByteString conversions barely seem worth doing (1% improvement at most), +but [[todo/optimise_journal_access]] seems like it could pay off well. + +Also found time in there somewhere to implement `git annex inprogress +--key` diff --git a/doc/profiling/comment_7_95f5afb616c7eba60473cdeb85a070b0._comment b/doc/profiling/comment_7_95f5afb616c7eba60473cdeb85a070b0._comment new file mode 100644 index 0000000000..05db73f4fb --- /dev/null +++ b/doc/profiling/comment_7_95f5afb616c7eba60473cdeb85a070b0._comment @@ -0,0 +1,40 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 7""" + date="2019-12-18T19:18:04Z" + content=""" +Updated profiling. git-annex find is now ByteString end-to-end! +Note the massive reduction in alloc, and improved runtime. + + Wed Dec 11 14:41 2019 Time and Allocation Profiling Report (Final) + + git-annex +RTS -p -RTS find + + total time = 1.51 secs (1515 ticks @ 1000 us, 1 processor) + total alloc = 608,475,328 bytes (excludes profiling overheads) + + COST CENTRE MODULE SRC %time %alloc + + keyFile' Annex.Locations Annex/Locations.hs:(590,1)-(600,30) 8.2 16.6 + >>=.\.succ' Data.Attoparsec.Internal.Types Data/Attoparsec/Internal/Types.hs:146:13-76 4.7 0.7 + getAnnexLinkTarget'.probesymlink Annex.Link Annex/Link.hs:79:9-46 4.2 7.6 + >>=.\ Data.Attoparsec.Internal.Types Data/Attoparsec/Internal/Types.hs:(146,9)-(147,44) 3.9 2.3 + parseLinkTarget Annex.Link Annex/Link.hs:(255,1)-(263,25) 3.9 11.8 + doesPathExist Utility.RawFilePath Utility/RawFilePath.hs:30:1-25 3.4 0.6 + keyFile'.esc Annex.Locations Annex/Locations.hs:(596,9)-(600,30) 3.2 14.7 + fileKey' Annex.Locations Annex/Locations.hs:(609,1)-(619,41) 3.0 4.7 + parseLinkTargetOrPointer Annex.Link Annex/Link.hs:(240,1)-(244,25) 2.8 0.2 + hashUpdates.\.\.\ Crypto.Hash Crypto/Hash.hs:85:48-99 2.5 0.1 + combineAlways System.FilePath.Posix.ByteString System/FilePath/Posix/../Internal.hs:(698,1)-(704,67) 2.0 3.3 + getState Annex Annex.hs:(251,1)-(254,27) 2.0 1.1 + withPtr.makeTrampoline Basement.Block.Base Basement/Block/Base.hs:(401,5)-(404,31) 1.9 1.7 + withMutablePtrHint Basement.Block.Base Basement/Block/Base.hs:(468,1)-(482,50) 1.8 1.2 + parseKeyVariety Types.Key Types/Key.hs:(323,1)-(371,42) 1.8 0.0 + fileKey'.go Annex.Locations Annex/Locations.hs:611:9-55 1.7 2.2 + isLinkToAnnex Annex.Link Annex/Link.hs:(299,1)-(305,47) 1.7 1.0 + hashDirMixed Annex.DirHashes Annex/DirHashes.hs:(82,1)-(90,27) 1.7 1.3 + primitive Basement.Monad Basement/Monad.hs:72:5-18 1.6 0.1 + withPtr Basement.Block.Base Basement/Block/Base.hs:(395,1)-(404,31) 1.5 1.6 + mkKeySerialization Types.Key Types/Key.hs:(115,1)-(117,22) 1.1 2.8 + decimal.step Data.Attoparsec.ByteString.Char8 Data/Attoparsec/ByteString/Char8.hs:448:9-49 0.8 1.2 +"""]] diff --git a/doc/todo/addunlocked_config_setting/comment_3_6909726735abb7945930ba45632e4769._comment b/doc/todo/addunlocked_config_setting/comment_3_6909726735abb7945930ba45632e4769._comment new file mode 100644 index 0000000000..e16430a758 --- /dev/null +++ b/doc/todo/addunlocked_config_setting/comment_3_6909726735abb7945930ba45632e4769._comment @@ -0,0 +1,32 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2019-12-19T15:29:40Z" + content=""" +Retargeting this todo at something useful post-git-add-kerfluffle, +annex.addunlocked could usefully be a pagespec to allow adding some files +unlocked and others locked (by git-annex add only, not git add). +"true" would be the same as "anything" and false as "nothing". + +--- + +It may also then make sense to let it be configured in .gitattributes. +Although, the ugliness of setting a pagespec in .gitattributes, +as was done for annex.largefiles, coupled with the overhead of needing to +query that from git-check-attr for every file, makes me wary. + +(Surprising amount of `git-annex add` time is in querying the +annex.largefiles and annex.backend attributes. Setting the former in +gitconfig avoids the attribute query and speeds up add of smaller files by +2%. Granted I've sped up add (except hashing) by probably 20% this month, +and with large files the hashing dominates.) + +The query overhead could maybe be finessed: Since adding a file +already queries gitattributes for two other things, a single query could be +done for a file and the result cached. + +Letting it be globally configured via `git-annex config` is an alternative +that I'm leaning toward. +(That would also need some caching, easier to implement and faster +since it is not a per-file value as the gitattribute would be.) +"""]] diff --git a/doc/todo/git-annex-cat/comment_3_347a33a4a77fd385ab8f3551138b75e1._comment b/doc/todo/git-annex-cat/comment_3_347a33a4a77fd385ab8f3551138b75e1._comment new file mode 100644 index 0000000000..8c5929974c --- /dev/null +++ b/doc/todo/git-annex-cat/comment_3_347a33a4a77fd385ab8f3551138b75e1._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="named pipes as destination files" + date="2019-12-18T18:41:57Z" + content=""" +\"getting object content from remotes involve a destination file that is written to\" -- what happens if git-annex makes a named pipe, and passes that as the destination file name to the remote? +"""]] diff --git a/doc/todo/making_it_easier_to_smudge_dotfiles/comment_1_73ad5cd7f65c94a1db859d22eb6eece4._comment b/doc/todo/making_it_easier_to_smudge_dotfiles/comment_1_73ad5cd7f65c94a1db859d22eb6eece4._comment new file mode 100644 index 0000000000..96b75c3d10 --- /dev/null +++ b/doc/todo/making_it_easier_to_smudge_dotfiles/comment_1_73ad5cd7f65c94a1db859d22eb6eece4._comment @@ -0,0 +1,32 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2019-12-19T16:08:09Z" + content=""" +Hmm, it used to be that `git add .` would smudge all dotfiles without that +line, but now annex.largefiles has to be configured for it to smudge +anything. + +So, this could be dealt with in annex.largefiles. Both `anything` and +`include=*` currently match dotfiles. It's kind of weird really that `*` +matches dotfiles; it does not in the shell. If `*` did not match dotfiles +(and `anything` is just an alias for `include=*`), it would be fairly safe +to remove the `.* !filter` line by default. (If annex.largefiles has a +content-based setting, and a dotfile is large enough or the right mime type +or whatever, it's reasonable to default to smudging it.) + +Then, you could set annex.largfiles to match the dotfiles you want, +eg `include=* or include=.mydotfile`. You could put the config in +.gitattributes if you want to configure it globally. + +This change to annex.largefiles would also let `git-annex add` +stop skipping dotfiles by default; instead annex.largefiles would not match +dotfiles unless the user explicitly configured it to, and so the dotfiles +would be added as small files, directly to git. + +I like this because it unifies the behaviors of the two ways of adding, +and it reduces the complexity, rather than adding more. + +Removing the `.* !filter` line by default +would need to be done as part of the v8 upgrade, or a later upgrade. +"""]] diff --git a/doc/todo/making_it_easier_to_smudge_dotfiles/comment_2_3912c1194c3f4f0e4c372e7603e0a3e5._comment b/doc/todo/making_it_easier_to_smudge_dotfiles/comment_2_3912c1194c3f4f0e4c372e7603e0a3e5._comment new file mode 100644 index 0000000000..11cd3e4441 --- /dev/null +++ b/doc/todo/making_it_easier_to_smudge_dotfiles/comment_2_3912c1194c3f4f0e4c372e7603e0a3e5._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2019-12-19T17:17:31Z" + content=""" +`*` is not only used in annex.largefiles, but other pagespecs too. +Like preferred content: + + exclude=archive/* + +So changing `*` to not match dotfiles would have wide reaching effects, +and it's really not good for different versions of git-annex to parse +preferred content expressions differently. And it seems too confusing to +have `*` match differently in annex.largefiles than in other pagespecs. + +Having a single config that controls both kinds of adds still seems like a +good idea, but I don't know what that config should be. +annex.largedotfiles? +"""]] diff --git a/doc/todo/optimise_by_converting_Ref_to_ByteString.mdwn b/doc/todo/optimise_by_converting_Ref_to_ByteString.mdwn new file mode 100644 index 0000000000..11328a9f37 --- /dev/null +++ b/doc/todo/optimise_by_converting_Ref_to_ByteString.mdwn @@ -0,0 +1,3 @@ +Profiling of `git annex find --not --in web` suggests that converting Ref +to contain a ByteString, rather than a String, would eliminate a +fromRawFilePath that uses about 1% of runtime. diff --git a/doc/todo/optimise_journal_access.mdwn b/doc/todo/optimise_journal_access.mdwn new file mode 100644 index 0000000000..a49441cf5e --- /dev/null +++ b/doc/todo/optimise_journal_access.mdwn @@ -0,0 +1,21 @@ +Often a command will need to read a number of files from the git-annex +branch, and it uses getJournalFile for each to check for any journalled +change that has not reached the branch. But typically, the journal is empty +and in such a case, that's a lot of time spent trying to open journal files +that DNE. + +Profiling eg, `git annex find --in web` shows things called by getJournalFile +use around 5% of runtime. + +What if, once at startup, it checked if the journal was entirely empty. +If so, it can remember that, and avoid reading journal files. +Perhaps paired with staging the journal if it's not empty. + +This could lead to behavior changes in some cases where one command is +writing changes and another command used to read them from the journal and +may no longer do so. But any such behavior change is of a behavior that +used to involve a race; the reader could just as well be ahead of the +writer and it would have already behaved as it would after the change. + +But: When a process writes to the journal, it will need to update its state +to remember it's no longer empty. --[[Joey]] diff --git a/doc/todo/optimize_by_converting_String_to_ByteString.mdwn b/doc/todo/optimize_by_converting_String_to_ByteString.mdwn index 9fcc9e5319..830f18d549 100644 --- a/doc/todo/optimize_by_converting_String_to_ByteString.mdwn +++ b/doc/todo/optimize_by_converting_String_to_ByteString.mdwn @@ -9,7 +9,7 @@ Benchmarking `git-annex find`, speedups range from 28-66%. The files fly by much more snappily. Other commands likely also speed up, but do more work than find so the improvement is not as large. -The `bs` branch is in a mergeable state now. +The `bs` branch is in a mergeable state now. [[done]] Stuff not entirely finished: