git-annex/Annex/Locations.hs
Joey Hess 6a3bd283b8
add restage log
When pointer files need to be restaged, they're first written to the
log, and then when the restage operation runs, it reads the log. This
way, if the git-annex process is interrupted before it can do the
restaging, a later git-annex process can do it.

Currently, this lets a git-annex get/drop command be interrupted and
then re-ran, and as long as it gets/drops additional files, it will
clean up after the interrupted command. But more changes are
needed to make it easier to restage after an interrupted process.

Kept using the git queue to run the restage action, even though the
list of files that it builds up for that action is not actually used by
the action. This could perhaps be simplified to make restaging a cleanup
action that gets registered, rather than using the git queue for it. But
I wasn't sure if that would cause visible behavior changes, when eg
dropping a large number of files, currently the git queue flushes
periodically, and so it restages incrementally, rather than all at the
end.

In restagePointerFiles, it reads the restage log twice, once to get
the number of files and size, and a second time to process it.
This seemed better than reading the whole file into memory, since
potentially a huge number of files could be in there. Probably the OS
will cache the file in memory and there will not be much performance
impact. It might be better to keep running tallies in another file
though. But updating that atomically with the log seems hard.

Also note that it's possible for calcRestageLog to see a different file
than streamRestageLog does. More files may be added to the log in
between. That is ok, it will only cause the filterprocessfaster heuristic to
operate with slightly out of date information, so it may make the wrong
choice for the files that got added and be a little slower than ideal.

Sponsored-by: Dartmouth College's DANDI project
2022-09-23 15:47:24 -04:00

671 lines
24 KiB
Haskell

{- git-annex file locations
-
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Locations (
keyFile,
fileKey,
keyPaths,
keyPath,
annexDir,
objectDir,
gitAnnexLocation,
gitAnnexLocation',
gitAnnexLocationDepth,
gitAnnexLink,
gitAnnexLinkCanonical,
gitAnnexContentLock,
gitAnnexContentLockLock,
gitAnnexInodeSentinal,
gitAnnexInodeSentinalCache,
annexLocationsBare,
annexLocationsNonBare,
gitAnnexDir,
gitAnnexObjectDir,
gitAnnexTmpOtherDir,
gitAnnexTmpOtherLock,
gitAnnexTmpOtherDirOld,
gitAnnexTmpWatcherDir,
gitAnnexTmpObjectDir,
gitAnnexTmpObjectLocation,
gitAnnexTmpWorkDir,
gitAnnexBadDir,
gitAnnexBadLocation,
gitAnnexUnusedLog,
gitAnnexKeysDbDir,
gitAnnexKeysDbLock,
gitAnnexKeysDbIndexCache,
gitAnnexFsckState,
gitAnnexFsckDbDir,
gitAnnexFsckDbDirOld,
gitAnnexFsckDbLock,
gitAnnexFsckResultsLog,
gitAnnexUpgradeLog,
gitAnnexUpgradeLock,
gitAnnexSmudgeLog,
gitAnnexSmudgeLock,
gitAnnexRestageLog,
gitAnnexRestageLock,
gitAnnexMoveLog,
gitAnnexMoveLock,
gitAnnexExportDir,
gitAnnexExportDbDir,
gitAnnexExportLock,
gitAnnexExportUpdateLock,
gitAnnexExportExcludeLog,
gitAnnexContentIdentifierDbDir,
gitAnnexContentIdentifierLock,
gitAnnexScheduleState,
gitAnnexTransferDir,
gitAnnexCredsDir,
gitAnnexWebCertificate,
gitAnnexWebPrivKey,
gitAnnexFeedStateDir,
gitAnnexFeedState,
gitAnnexMergeDir,
gitAnnexJournalDir,
gitAnnexPrivateJournalDir,
gitAnnexJournalLock,
gitAnnexGitQueueLock,
gitAnnexIndex,
gitAnnexPrivateIndex,
gitAnnexIndexStatus,
gitAnnexViewIndex,
gitAnnexViewLog,
gitAnnexMergedRefs,
gitAnnexIgnoredRefs,
gitAnnexPidFile,
gitAnnexPidLockFile,
gitAnnexDaemonStatusFile,
gitAnnexDaemonLogFile,
gitAnnexFuzzTestLogFile,
gitAnnexHtmlShim,
gitAnnexUrlFile,
gitAnnexTmpCfgFile,
gitAnnexSshDir,
gitAnnexRemotesDir,
gitAnnexAssistantDefaultDir,
HashLevels(..),
hashDirMixed,
hashDirLower,
preSanitizeKeyName,
reSanitizeKeyName,
) where
import Data.Char
import Data.Default
import qualified Data.ByteString.Char8 as S8
import qualified System.FilePath.ByteString as P
import Common
import Key
import Types.UUID
import Types.GitConfig
import Types.Difference
import qualified Git
import qualified Git.Types as Git
import Git.FilePath
import Annex.DirHashes
import Annex.Fixup
import qualified Utility.RawFilePath as R
{- Conventions:
-
- Functions ending in "Dir" should always return values ending with a
- trailing path separator. Most code does not rely on that, but a few
- things do.
-
- Everything else should not end in a trailing path sepatator.
-
- Only functions (with names starting with "git") that build a path
- based on a git repository should return full path relative to the git
- repository. Everything else returns path segments.
-}
{- The directory git annex uses for local state, relative to the .git
- directory -}
annexDir :: RawFilePath
annexDir = P.addTrailingPathSeparator "annex"
{- The directory git annex uses for locally available object content,
- relative to the .git directory -}
objectDir :: RawFilePath
objectDir = P.addTrailingPathSeparator $ annexDir P.</> "objects"
{- Annexed file's possible locations relative to the .git directory
- in a non-bare repository.
-
- Normally it is hashDirMixed. However, it's always possible that a
- bare repository was converted to non-bare, or that the cripped
- filesystem setting changed, so still need to check both. -}
annexLocationsNonBare :: GitConfig -> Key -> [RawFilePath]
annexLocationsNonBare config key =
map (annexLocation config key) [hashDirMixed, hashDirLower]
{- Annexed file's possible locations relative to a bare repository. -}
annexLocationsBare :: GitConfig -> Key -> [RawFilePath]
annexLocationsBare config key =
map (annexLocation config key) [hashDirLower, hashDirMixed]
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath
annexLocation config key hasher = objectDir P.</> keyPath key (hasher $ objectHashLevels config)
{- Number of subdirectories from the gitAnnexObjectDir
- to the gitAnnexLocation. -}
gitAnnexLocationDepth :: GitConfig -> Int
gitAnnexLocationDepth config = hashlevels + 1
where
HashLevels hashlevels = objectHashLevels config
{- Annexed object's location in a repository.
-
- When there are multiple possible locations, returns the one where the
- file is actually present.
-
- When the file is not present, returns the location where the file should
- be stored.
-}
gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLocation = gitAnnexLocation' R.doesPathExist
gitAnnexLocation' :: (RawFilePath -> IO Bool) -> Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLocation' checker key r config = gitAnnexLocation'' key r config
(annexCrippledFileSystem config)
(coreSymlinks config)
checker
(Git.localGitDir r)
gitAnnexLocation'' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (RawFilePath -> IO Bool) -> RawFilePath -> IO RawFilePath
gitAnnexLocation'' key r config crippled symlinkssupported checker gitdir
{- Bare repositories default to hashDirLower for new
- content, as it's more portable. But check all locations. -}
| Git.repoIsLocalBare r = checkall annexLocationsBare
{- If the repository is configured to only use lower, no need
- to check both. -}
| hasDifference ObjectHashLower (annexDifferences config) =
only hashDirLower
{- Repositories on crippled filesystems use same layout as bare
- repos for new content, unless symlinks are supported too. -}
| crippled = if symlinkssupported
then checkall annexLocationsNonBare
else checkall annexLocationsBare
| otherwise = checkall annexLocationsNonBare
where
only = return . inrepo . annexLocation config key
checkall f = check $ map inrepo $ f config key
inrepo d = gitdir P.</> d
check locs@(l:_) = fromMaybe l <$> firstM checker locs
check [] = error "internal"
{- Calculates a symlink target to link a file to an annexed object. -}
gitAnnexLink :: RawFilePath -> Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLink file key r config = do
currdir <- R.getCurrentDirectory
let absfile = absNormPathUnix currdir file
let gitdir = getgitdir currdir
loc <- gitAnnexLocation'' key r config False False (\_ -> return True) gitdir
toInternalGitPath <$> relPathDirToFile (parentDir absfile) loc
where
getgitdir currdir
{- This special case is for git submodules on filesystems not
- supporting symlinks; generate link target that will
- work portably. -}
| not (coreSymlinks config) && needsSubmoduleFixup r =
absNormPathUnix currdir (Git.repoPath r P.</> ".git")
| otherwise = Git.localGitDir r
absNormPathUnix d p = toInternalGitPath $
absPathFrom (toInternalGitPath d) (toInternalGitPath p)
{- Calculates a symlink target as would be used in a typical git
- repository, with .git in the top of the work tree. -}
gitAnnexLinkCanonical :: RawFilePath -> Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLinkCanonical file key r config = gitAnnexLink file key r' config'
where
r' = case r of
Git.Repo { Git.location = l@Git.Local { Git.worktree = Just wt } } ->
r { Git.location = l { Git.gitdir = wt P.</> ".git" } }
_ -> r
config' = config
{ annexCrippledFileSystem = False
, coreSymlinks = True
}
{- File used to lock a key's content. -}
gitAnnexContentLock :: Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexContentLock key r config = do
loc <- gitAnnexLocation key r config
return $ loc <> ".lck"
{- Lock that is held when taking the gitAnnexContentLock to support the v10
- upgrade.
-
- This uses the gitAnnexInodeSentinal file, because it needs to be a file
- that exists in the repository, even when it's an old v8 repository that
- is mounted read-only. The gitAnnexInodeSentinal is created by git-annex
- init, so should already exist.
-}
gitAnnexContentLockLock :: Git.Repo -> RawFilePath
gitAnnexContentLockLock = gitAnnexInodeSentinal
gitAnnexInodeSentinal :: Git.Repo -> RawFilePath
gitAnnexInodeSentinal r = gitAnnexDir r P.</> "sentinal"
gitAnnexInodeSentinalCache :: Git.Repo -> RawFilePath
gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r <> ".cache"
{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> RawFilePath
gitAnnexDir r = P.addTrailingPathSeparator $ Git.localGitDir r P.</> annexDir
{- The part of the annex directory where file contents are stored. -}
gitAnnexObjectDir :: Git.Repo -> RawFilePath
gitAnnexObjectDir r = P.addTrailingPathSeparator $
Git.localGitDir r P.</> objectDir
{- .git/annex/tmp/ is used for temp files for key's contents -}
gitAnnexTmpObjectDir :: Git.Repo -> RawFilePath
gitAnnexTmpObjectDir r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "tmp"
{- .git/annex/othertmp/ is used for other temp files -}
gitAnnexTmpOtherDir :: Git.Repo -> RawFilePath
gitAnnexTmpOtherDir r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "othertmp"
{- Lock file for gitAnnexTmpOtherDir. -}
gitAnnexTmpOtherLock :: Git.Repo -> RawFilePath
gitAnnexTmpOtherLock r = gitAnnexDir r P.</> "othertmp.lck"
{- .git/annex/misctmp/ was used by old versions of git-annex and is still
- used during initialization -}
gitAnnexTmpOtherDirOld :: Git.Repo -> RawFilePath
gitAnnexTmpOtherDirOld r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "misctmp"
{- .git/annex/watchtmp/ is used by the watcher and assistant -}
gitAnnexTmpWatcherDir :: Git.Repo -> RawFilePath
gitAnnexTmpWatcherDir r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "watchtmp"
{- The temp file to use for a given key's content. -}
gitAnnexTmpObjectLocation :: Key -> Git.Repo -> RawFilePath
gitAnnexTmpObjectLocation key r = gitAnnexTmpObjectDir r P.</> keyFile key
{- Given a temp file such as gitAnnexTmpObjectLocation, makes a name for a
- subdirectory in the same location, that can be used as a work area
- when receiving the key's content.
-
- There are ordering requirements for creating these directories;
- use Annex.Content.withTmpWorkDir to set them up.
-}
gitAnnexTmpWorkDir :: RawFilePath -> RawFilePath
gitAnnexTmpWorkDir p =
let (dir, f) = P.splitFileName p
-- Using a prefix avoids name conflict with any other keys.
in dir P.</> "work." <> f
{- .git/annex/bad/ is used for bad files found during fsck -}
gitAnnexBadDir :: Git.Repo -> RawFilePath
gitAnnexBadDir r = P.addTrailingPathSeparator $ gitAnnexDir r P.</> "bad"
{- The bad file to use for a given key. -}
gitAnnexBadLocation :: Key -> Git.Repo -> RawFilePath
gitAnnexBadLocation key r = gitAnnexBadDir r P.</> keyFile key
{- .git/annex/foounused is used to number possibly unused keys -}
gitAnnexUnusedLog :: RawFilePath -> Git.Repo -> RawFilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r P.</> (prefix <> "unused")
{- .git/annex/keysdb/ contains a database of information about keys. -}
gitAnnexKeysDbDir :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexKeysDbDir r c = fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "keysdb"
{- Lock file for the keys database. -}
gitAnnexKeysDbLock :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexKeysDbLock r c = gitAnnexKeysDbDir r c <> ".lck"
{- Contains the stat of the last index file that was
- reconciled with the keys database. -}
gitAnnexKeysDbIndexCache :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexKeysDbIndexCache r c = gitAnnexKeysDbDir r c <> ".cache"
{- .git/annex/fsck/uuid/ is used to store information about incremental
- fscks. -}
gitAnnexFsckDir :: UUID -> Git.Repo -> Maybe GitConfig -> RawFilePath
gitAnnexFsckDir u r mc = case annexDbDir =<< mc of
Nothing -> go (gitAnnexDir r)
Just d -> go d
where
go d = d P.</> "fsck" P.</> fromUUID u
{- used to store information about incremental fscks. -}
gitAnnexFsckState :: UUID -> Git.Repo -> RawFilePath
gitAnnexFsckState u r = gitAnnexFsckDir u r Nothing P.</> "state"
{- Directory containing database used to record fsck info. -}
gitAnnexFsckDbDir :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexFsckDbDir u r c = gitAnnexFsckDir u r (Just c) P.</> "fsckdb"
{- Directory containing old database used to record fsck info. -}
gitAnnexFsckDbDirOld :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexFsckDbDirOld u r c = gitAnnexFsckDir u r (Just c) P.</> "db"
{- Lock file for the fsck database. -}
gitAnnexFsckDbLock :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexFsckDbLock u r c = gitAnnexFsckDir u r (Just c) P.</> "fsck.lck"
{- .git/annex/fsckresults/uuid is used to store results of git fscks -}
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> RawFilePath
gitAnnexFsckResultsLog u r =
gitAnnexDir r P.</> "fsckresults" P.</> fromUUID u
{- .git/annex/upgrade.log is used to record repository version upgrades. -}
gitAnnexUpgradeLog :: Git.Repo -> RawFilePath
gitAnnexUpgradeLog r = gitAnnexDir r P.</> "upgrade.log"
gitAnnexUpgradeLock :: Git.Repo -> RawFilePath
gitAnnexUpgradeLock r = gitAnnexDir r P.</> "upgrade.lck"
{- .git/annex/smudge.log is used to log smudged worktree files that need to
- be updated. -}
gitAnnexSmudgeLog :: Git.Repo -> RawFilePath
gitAnnexSmudgeLog r = gitAnnexDir r P.</> "smudge.log"
gitAnnexSmudgeLock :: Git.Repo -> RawFilePath
gitAnnexSmudgeLock r = gitAnnexDir r P.</> "smudge.lck"
{- .git/annex/restage.log is used to log worktree files that need to be
- restaged in git -}
gitAnnexRestageLog :: Git.Repo -> RawFilePath
gitAnnexRestageLog r = gitAnnexDir r P.</> "restage.log"
gitAnnexRestageLock :: Git.Repo -> RawFilePath
gitAnnexRestageLock r = gitAnnexDir r P.</> "restage.lck"
{- .git/annex/move.log is used to log moves that are in progress,
- to better support resuming an interrupted move. -}
gitAnnexMoveLog :: Git.Repo -> RawFilePath
gitAnnexMoveLog r = gitAnnexDir r P.</> "move.log"
gitAnnexMoveLock :: Git.Repo -> RawFilePath
gitAnnexMoveLock r = gitAnnexDir r P.</> "move.lck"
{- .git/annex/export/ is used to store information about
- exports to special remotes. -}
gitAnnexExportDir :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexExportDir r c = fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "export"
{- Directory containing database used to record export info. -}
gitAnnexExportDbDir :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexExportDbDir u r c =
gitAnnexExportDir r c P.</> fromUUID u P.</> "exportdb"
{- Lock file for export database. -}
gitAnnexExportLock :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexExportLock u r c = gitAnnexExportDbDir u r c <> ".lck"
{- Lock file for updating the export database with information from the
- repository. -}
gitAnnexExportUpdateLock :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexExportUpdateLock u r c = gitAnnexExportDbDir u r c <> ".upl"
{- Log file used to keep track of files that were in the tree exported to a
- remote, but were excluded by its preferred content settings. -}
gitAnnexExportExcludeLog :: UUID -> Git.Repo -> RawFilePath
gitAnnexExportExcludeLog u r = gitAnnexDir r P.</> "export.ex" P.</> fromUUID u
{- Directory containing database used to record remote content ids.
-
- (This used to be "cid", but a problem with the database caused it to
- need to be rebuilt with a new name.)
-}
gitAnnexContentIdentifierDbDir :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexContentIdentifierDbDir r c =
fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "cidsdb"
{- Lock file for writing to the content id database. -}
gitAnnexContentIdentifierLock :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexContentIdentifierLock r c = gitAnnexContentIdentifierDbDir r c <> ".lck"
{- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> RawFilePath
gitAnnexScheduleState r = gitAnnexDir r P.</> "schedulestate"
{- .git/annex/creds/ is used to store credentials to access some special
- remotes. -}
gitAnnexCredsDir :: Git.Repo -> RawFilePath
gitAnnexCredsDir r = P.addTrailingPathSeparator $ gitAnnexDir r P.</> "creds"
{- .git/annex/certificate.pem and .git/annex/key.pem are used by the webapp
- when HTTPS is enabled -}
gitAnnexWebCertificate :: Git.Repo -> FilePath
gitAnnexWebCertificate r = fromRawFilePath $ gitAnnexDir r P.</> "certificate.pem"
gitAnnexWebPrivKey :: Git.Repo -> FilePath
gitAnnexWebPrivKey r = fromRawFilePath $ gitAnnexDir r P.</> "privkey.pem"
{- .git/annex/feeds/ is used to record per-key (url) state by importfeeds -}
gitAnnexFeedStateDir :: Git.Repo -> RawFilePath
gitAnnexFeedStateDir r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "feedstate"
gitAnnexFeedState :: Key -> Git.Repo -> RawFilePath
gitAnnexFeedState k r = gitAnnexFeedStateDir r P.</> keyFile k
{- .git/annex/merge/ is used as a empty work tree for merges in
- adjusted branches. -}
gitAnnexMergeDir :: Git.Repo -> FilePath
gitAnnexMergeDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "merge"
{- .git/annex/transfer/ is used to record keys currently
- being transferred, and other transfer bookkeeping info. -}
gitAnnexTransferDir :: Git.Repo -> RawFilePath
gitAnnexTransferDir r =
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "transfer"
{- .git/annex/journal/ is used to journal changes made to the git-annex
- branch -}
gitAnnexJournalDir :: Git.Repo -> RawFilePath
gitAnnexJournalDir r =
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal"
{- .git/annex/journal.private/ is used to journal changes regarding private
- repositories. -}
gitAnnexPrivateJournalDir :: Git.Repo -> RawFilePath
gitAnnexPrivateJournalDir r =
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal-private"
{- Lock file for the journal. -}
gitAnnexJournalLock :: Git.Repo -> RawFilePath
gitAnnexJournalLock r = gitAnnexDir r P.</> "journal.lck"
{- Lock file for flushing a git queue that writes to the git index or
- other git state that should only have one writer at a time. -}
gitAnnexGitQueueLock :: Git.Repo -> RawFilePath
gitAnnexGitQueueLock r = gitAnnexDir r P.</> "gitqueue.lck"
{- .git/annex/index is used to stage changes to the git-annex branch -}
gitAnnexIndex :: Git.Repo -> RawFilePath
gitAnnexIndex r = gitAnnexDir r P.</> "index"
{- .git/annex/index-private is used to store information that is not to
- be exposed to the git-annex branch. -}
gitAnnexPrivateIndex :: Git.Repo -> RawFilePath
gitAnnexPrivateIndex r = gitAnnexDir r P.</> "index-private"
{- Holds the ref of the git-annex branch that the index was last updated to.
-
- The .lck in the name is a historical accident; this is not used as a
- lock. -}
gitAnnexIndexStatus :: Git.Repo -> RawFilePath
gitAnnexIndexStatus r = gitAnnexDir r P.</> "index.lck"
{- The index file used to generate a filtered branch view._-}
gitAnnexViewIndex :: Git.Repo -> RawFilePath
gitAnnexViewIndex r = gitAnnexDir r P.</> "viewindex"
{- File containing a log of recently accessed views. -}
gitAnnexViewLog :: Git.Repo -> RawFilePath
gitAnnexViewLog r = gitAnnexDir r P.</> "viewlog"
{- List of refs that have already been merged into the git-annex branch. -}
gitAnnexMergedRefs :: Git.Repo -> RawFilePath
gitAnnexMergedRefs r = gitAnnexDir r P.</> "mergedrefs"
{- List of refs that should not be merged into the git-annex branch. -}
gitAnnexIgnoredRefs :: Git.Repo -> RawFilePath
gitAnnexIgnoredRefs r = gitAnnexDir r P.</> "ignoredrefs"
{- Pid file for daemon mode. -}
gitAnnexPidFile :: Git.Repo -> RawFilePath
gitAnnexPidFile r = gitAnnexDir r P.</> "daemon.pid"
{- Pid lock file for pidlock mode -}
gitAnnexPidLockFile :: Git.Repo -> RawFilePath
gitAnnexPidLockFile r = gitAnnexDir r P.</> "pidlock"
{- Status file for daemon mode. -}
gitAnnexDaemonStatusFile :: Git.Repo -> FilePath
gitAnnexDaemonStatusFile r = fromRawFilePath $
gitAnnexDir r P.</> "daemon.status"
{- Log file for daemon mode. -}
gitAnnexDaemonLogFile :: Git.Repo -> RawFilePath
gitAnnexDaemonLogFile r = gitAnnexDir r P.</> "daemon.log"
{- Log file for fuzz test. -}
gitAnnexFuzzTestLogFile :: Git.Repo -> FilePath
gitAnnexFuzzTestLogFile r = fromRawFilePath $
gitAnnexDir r P.</> "fuzztest.log"
{- Html shim file used to launch the webapp. -}
gitAnnexHtmlShim :: Git.Repo -> RawFilePath
gitAnnexHtmlShim r = gitAnnexDir r P.</> "webapp.html"
{- File containing the url to the webapp. -}
gitAnnexUrlFile :: Git.Repo -> RawFilePath
gitAnnexUrlFile r = gitAnnexDir r P.</> "url"
{- Temporary file used to edit configuriation from the git-annex branch. -}
gitAnnexTmpCfgFile :: Git.Repo -> RawFilePath
gitAnnexTmpCfgFile r = gitAnnexDir r P.</> "config.tmp"
{- .git/annex/ssh/ is used for ssh connection caching -}
gitAnnexSshDir :: Git.Repo -> RawFilePath
gitAnnexSshDir r = P.addTrailingPathSeparator $ gitAnnexDir r P.</> "ssh"
{- .git/annex/remotes/ is used for remote-specific state. -}
gitAnnexRemotesDir :: Git.Repo -> RawFilePath
gitAnnexRemotesDir r =
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "remotes"
{- This is the base directory name used by the assistant when making
- repositories, by default. -}
gitAnnexAssistantDefaultDir :: FilePath
gitAnnexAssistantDefaultDir = "annex"
{- Sanitizes a String that will be used as part of a Key's keyName,
- dealing with characters that cause problems.
-
- This is used when a new Key is initially being generated, eg by genKey.
- Unlike keyFile and fileKey, it does not need to be a reversable
- escaping. Also, it's ok to change this to add more problematic
- characters later. Unlike changing keyFile, which could result in the
- filenames used for existing keys changing and contents getting lost.
-
- It is, however, important that the input and output of this function
- have a 1:1 mapping, to avoid two different inputs from mapping to the
- same key.
-}
preSanitizeKeyName :: String -> String
preSanitizeKeyName = preSanitizeKeyName' False
preSanitizeKeyName' :: Bool -> String -> String
preSanitizeKeyName' resanitize = concatMap escape
where
escape c
| isAsciiUpper c || isAsciiLower c || isDigit c = [c]
| c `elem` ['.', '-', '_'] = [c] -- common, assumed safe
| c `elem` ['/', '%', ':'] = [c] -- handled by keyFile
-- , is safe and uncommon, so will be used to escape
-- other characters. By itself, it is escaped to
-- doubled form.
| c == ',' = if not resanitize
then ",,"
else ","
| otherwise = ',' : show (ord c)
{- Converts a keyName that has been santizied with an old version of
- preSanitizeKeyName to be sanitized with the new version. -}
reSanitizeKeyName :: String -> String
reSanitizeKeyName = preSanitizeKeyName' True
{- Converts a key into a filename fragment without any directory.
-
- Escape "/" in the key name, to keep a flat tree of files and avoid
- issues with keys containing "/../" or ending with "/" etc.
-
- "/" is escaped to "%" because it's short and rarely used, and resembles
- a slash
- "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping
- is one to one.
- ":" is escaped to "&c", because it seemed like a good idea at the time.
-
- Changing what this function escapes and how is not a good idea, as it
- can cause existing objects to get lost.
-}
keyFile :: Key -> RawFilePath
keyFile k =
let b = serializeKey' k
in if S8.any (`elem` ['&', '%', ':', '/']) b
then S8.concatMap esc b
else b
where
esc '&' = "&a"
esc '%' = "&s"
esc ':' = "&c"
esc '/' = "%"
esc c = S8.singleton c
{- Reverses keyFile, converting a filename fragment (ie, the basename of
- the symlink target) into a key. -}
fileKey :: RawFilePath -> Maybe Key
fileKey = deserializeKey' . S8.intercalate "/" . map go . S8.split '%'
where
go = S8.concat . unescafterfirst . S8.split '&'
unescafterfirst [] = []
unescafterfirst (b:bs) = b : map (unesc . S8.uncons) bs
unesc :: Maybe (Char, S8.ByteString) -> S8.ByteString
unesc Nothing = mempty
unesc (Just ('c', b)) = S8.cons ':' b
unesc (Just ('s', b)) = S8.cons '%' b
unesc (Just ('a', b)) = S8.cons '&' b
unesc (Just (c, b)) = S8.cons c b
{- A location to store a key on a special remote that uses a filesystem.
- A directory hash is used, to protect against filesystems that dislike
- having many items in a single directory.
-
- The file is put in a directory with the same name, this allows
- write-protecting the directory to avoid accidental deletion of the file.
-}
keyPath :: Key -> Hasher -> RawFilePath
keyPath key hasher = hasher key P.</> f P.</> f
where
f = keyFile key
{- All possibile locations to store a key in a special remote
- using different directory hashes.
-
- This is compatible with the annexLocationsNonBare and annexLocationsBare,
- for interoperability between special remotes and git-annex repos.
-}
keyPaths :: Key -> [RawFilePath]
keyPaths key = map (\h -> keyPath key (h def)) dirHashes