git-annex/Annex/Locations.hs
Joey Hess dd31fe7b9e
fall back to checking lower case hash directories in normal repo
Fix a bug that prevented getting content from a repository that started out
as a bare repository, or had annex.crippledfilesystem set, and was
converted to a non-bare repository.

This unfortunately means that inAnnex check gets slowed down by a stat call
in normal repos when the content is not present. Oh well, such is the cost
of backwards compatability with old mistakes.

Sponsored-by: Mark Reidenbach on Patreon
2021-07-15 12:16:31 -04:00

631 lines
22 KiB
Haskell

{- git-annex file locations
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Locations (
keyFile,
fileKey,
keyPaths,
keyPath,
annexDir,
objectDir,
objectDir',
gitAnnexLocation,
gitAnnexLocationDepth,
gitAnnexLink,
gitAnnexLinkCanonical,
gitAnnexContentLock,
gitAnnexInodeSentinal,
gitAnnexInodeSentinalCache,
annexLocations,
gitAnnexDir,
gitAnnexObjectDir,
gitAnnexTmpOtherDir,
gitAnnexTmpOtherLock,
gitAnnexTmpOtherDirOld,
gitAnnexTmpWatcherDir,
gitAnnexTmpObjectDir,
gitAnnexTmpObjectLocation,
gitAnnexTmpWorkDir,
gitAnnexBadDir,
gitAnnexBadLocation,
gitAnnexUnusedLog,
gitAnnexKeysDb,
gitAnnexKeysDbLock,
gitAnnexKeysDbIndexCache,
gitAnnexFsckState,
gitAnnexFsckDbDir,
gitAnnexFsckDbDirOld,
gitAnnexFsckDbLock,
gitAnnexFsckResultsLog,
gitAnnexSmudgeLog,
gitAnnexSmudgeLock,
gitAnnexMoveLog,
gitAnnexMoveLock,
gitAnnexExportDir,
gitAnnexExportDbDir,
gitAnnexExportLock,
gitAnnexExportUpdateLock,
gitAnnexExportExcludeLog,
gitAnnexContentIdentifierDbDir,
gitAnnexContentIdentifierLock,
gitAnnexScheduleState,
gitAnnexTransferDir,
gitAnnexCredsDir,
gitAnnexWebCertificate,
gitAnnexWebPrivKey,
gitAnnexFeedStateDir,
gitAnnexFeedState,
gitAnnexMergeDir,
gitAnnexJournalDir,
gitAnnexPrivateJournalDir,
gitAnnexJournalLock,
gitAnnexGitQueueLock,
gitAnnexIndex,
gitAnnexPrivateIndex,
gitAnnexIndexStatus,
gitAnnexViewIndex,
gitAnnexViewLog,
gitAnnexMergedRefs,
gitAnnexIgnoredRefs,
gitAnnexPidFile,
gitAnnexPidLockFile,
gitAnnexDaemonStatusFile,
gitAnnexDaemonLogFile,
gitAnnexFuzzTestLogFile,
gitAnnexHtmlShim,
gitAnnexUrlFile,
gitAnnexTmpCfgFile,
gitAnnexSshDir,
gitAnnexRemotesDir,
gitAnnexAssistantDefaultDir,
HashLevels(..),
hashDirMixed,
hashDirLower,
preSanitizeKeyName,
reSanitizeKeyName,
) where
import Data.Char
import Data.Default
import qualified Data.ByteString.Char8 as S8
import qualified System.FilePath.ByteString as P
import Common
import Key
import Types.UUID
import Types.GitConfig
import Types.Difference
import qualified Git
import qualified Git.Types as Git
import Git.FilePath
import Annex.DirHashes
import Annex.Fixup
import qualified Utility.RawFilePath as R
{- Conventions:
-
- Functions ending in "Dir" should always return values ending with a
- trailing path separator. Most code does not rely on that, but a few
- things do.
-
- Everything else should not end in a trailing path sepatator.
-
- Only functions (with names starting with "git") that build a path
- based on a git repository should return full path relative to the git
- repository. Everything else returns path segments.
-}
{- The directory git annex uses for local state, relative to the .git
- directory -}
annexDir :: RawFilePath
annexDir = P.addTrailingPathSeparator "annex"
{- The directory git annex uses for locally available object content,
- relative to the .git directory -}
objectDir :: FilePath
objectDir = fromRawFilePath objectDir'
objectDir' :: RawFilePath
objectDir' = P.addTrailingPathSeparator $ annexDir P.</> "objects"
{- Annexed file's possible locations relative to the .git directory.
- There are two different possibilities, using different hashes.
-
- Also, some repositories have a Difference in hash directory depth.
-}
annexLocations :: GitConfig -> Key -> [RawFilePath]
annexLocations config key = map (annexLocation config key) dirHashes
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath
annexLocation config key hasher = objectDir' P.</> keyPath key (hasher $ objectHashLevels config)
{- Number of subdirectories from the gitAnnexObjectDir
- to the gitAnnexLocation. -}
gitAnnexLocationDepth :: GitConfig -> Int
gitAnnexLocationDepth config = hashlevels + 1
where
HashLevels hashlevels = objectHashLevels config
{- Annexed object's location in a repository.
-
- When there are multiple possible locations, returns the one where the
- file is actually present.
-
- When the file is not present, returns the location where the file should
- be stored.
-}
gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLocation key r config = gitAnnexLocation' key r config
(annexCrippledFileSystem config)
(coreSymlinks config)
R.doesPathExist
(Git.localGitDir r)
gitAnnexLocation' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (RawFilePath -> IO Bool) -> RawFilePath -> IO RawFilePath
gitAnnexLocation' key r config crippled symlinkssupported checker gitdir
{- Bare repositories default to hashDirLower for new
- content, as it's more portable. But check all locations. -}
| Git.repoIsLocalBare r = checkall
{- If the repository is configured to only use lower, no need
- to check both. -}
| hasDifference ObjectHashLower (annexDifferences config) =
only hashDirLower
{- Repositories on crippled filesystems use hashDirLower
- for new content, unless symlinks are supported too.
- Then hashDirMixed is used. But, the content could be
- in either location so check both. -}
| crippled = if symlinkssupported
then check $ map inrepo $ reverse $ annexLocations config key
else checkall
{- Regular repositories usually only use hashDirMixed.
- However, it's always possible that a bare repository was
- converted to non-bare, or that the cripped filesystem
- setting changed, so still need to check both. -}
| otherwise = checkall
where
only = return . inrepo . annexLocation config key
checkall = check $ map inrepo $ annexLocations config key
inrepo d = gitdir P.</> d
check locs@(l:_) = fromMaybe l <$> firstM checker locs
check [] = error "internal"
{- Calculates a symlink target to link a file to an annexed object. -}
gitAnnexLink :: RawFilePath -> Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLink file key r config = do
currdir <- R.getCurrentDirectory
let absfile = absNormPathUnix currdir file
let gitdir = getgitdir currdir
loc <- gitAnnexLocation' key r config False False (\_ -> return True) gitdir
toInternalGitPath <$> relPathDirToFile (parentDir absfile) loc
where
getgitdir currdir
{- This special case is for git submodules on filesystems not
- supporting symlinks; generate link target that will
- work portably. -}
| not (coreSymlinks config) && needsSubmoduleFixup r =
absNormPathUnix currdir (Git.repoPath r P.</> ".git")
| otherwise = Git.localGitDir r
absNormPathUnix d p = toInternalGitPath $
absPathFrom (toInternalGitPath d) (toInternalGitPath p)
{- Calculates a symlink target as would be used in a typical git
- repository, with .git in the top of the work tree. -}
gitAnnexLinkCanonical :: RawFilePath -> Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLinkCanonical file key r config = gitAnnexLink file key r' config'
where
r' = case r of
Git.Repo { Git.location = l@Git.Local { Git.worktree = Just wt } } ->
r { Git.location = l { Git.gitdir = wt P.</> ".git" } }
_ -> r
config' = config
{ annexCrippledFileSystem = False
, coreSymlinks = True
}
{- File used to lock a key's content. -}
gitAnnexContentLock :: Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexContentLock key r config = do
loc <- gitAnnexLocation key r config
return $ loc <> ".lck"
gitAnnexInodeSentinal :: Git.Repo -> RawFilePath
gitAnnexInodeSentinal r = gitAnnexDir r P.</> "sentinal"
gitAnnexInodeSentinalCache :: Git.Repo -> RawFilePath
gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r <> ".cache"
{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> RawFilePath
gitAnnexDir r = P.addTrailingPathSeparator $ Git.localGitDir r P.</> annexDir
{- The part of the annex directory where file contents are stored. -}
gitAnnexObjectDir :: Git.Repo -> RawFilePath
gitAnnexObjectDir r = P.addTrailingPathSeparator $
Git.localGitDir r P.</> objectDir'
{- .git/annex/tmp/ is used for temp files for key's contents -}
gitAnnexTmpObjectDir :: Git.Repo -> RawFilePath
gitAnnexTmpObjectDir r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "tmp"
{- .git/annex/othertmp/ is used for other temp files -}
gitAnnexTmpOtherDir :: Git.Repo -> RawFilePath
gitAnnexTmpOtherDir r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "othertmp"
{- Lock file for gitAnnexTmpOtherDir. -}
gitAnnexTmpOtherLock :: Git.Repo -> RawFilePath
gitAnnexTmpOtherLock r = gitAnnexDir r P.</> "othertmp.lck"
{- .git/annex/misctmp/ was used by old versions of git-annex and is still
- used during initialization -}
gitAnnexTmpOtherDirOld :: Git.Repo -> RawFilePath
gitAnnexTmpOtherDirOld r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "misctmp"
{- .git/annex/watchtmp/ is used by the watcher and assistant -}
gitAnnexTmpWatcherDir :: Git.Repo -> RawFilePath
gitAnnexTmpWatcherDir r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "watchtmp"
{- The temp file to use for a given key's content. -}
gitAnnexTmpObjectLocation :: Key -> Git.Repo -> RawFilePath
gitAnnexTmpObjectLocation key r = gitAnnexTmpObjectDir r P.</> keyFile key
{- Given a temp file such as gitAnnexTmpObjectLocation, makes a name for a
- subdirectory in the same location, that can be used as a work area
- when receiving the key's content.
-
- There are ordering requirements for creating these directories;
- use Annex.Content.withTmpWorkDir to set them up.
-}
gitAnnexTmpWorkDir :: RawFilePath -> RawFilePath
gitAnnexTmpWorkDir p =
let (dir, f) = P.splitFileName p
-- Using a prefix avoids name conflict with any other keys.
in dir P.</> "work." <> f
{- .git/annex/bad/ is used for bad files found during fsck -}
gitAnnexBadDir :: Git.Repo -> RawFilePath
gitAnnexBadDir r = P.addTrailingPathSeparator $ gitAnnexDir r P.</> "bad"
{- The bad file to use for a given key. -}
gitAnnexBadLocation :: Key -> Git.Repo -> RawFilePath
gitAnnexBadLocation key r = gitAnnexBadDir r P.</> keyFile key
{- .git/annex/foounused is used to number possibly unused keys -}
gitAnnexUnusedLog :: RawFilePath -> Git.Repo -> RawFilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r P.</> (prefix <> "unused")
{- .git/annex/keysdb/ contains a database of information about keys. -}
gitAnnexKeysDb :: Git.Repo -> RawFilePath
gitAnnexKeysDb r = gitAnnexDir r P.</> "keysdb"
{- Lock file for the keys database. -}
gitAnnexKeysDbLock :: Git.Repo -> RawFilePath
gitAnnexKeysDbLock r = gitAnnexKeysDb r <> ".lck"
{- Contains the stat of the last index file that was
- reconciled with the keys database. -}
gitAnnexKeysDbIndexCache :: Git.Repo -> RawFilePath
gitAnnexKeysDbIndexCache r = gitAnnexKeysDb r <> ".cache"
{- .git/annex/fsck/uuid/ is used to store information about incremental
- fscks. -}
gitAnnexFsckDir :: UUID -> Git.Repo -> RawFilePath
gitAnnexFsckDir u r = gitAnnexDir r P.</> "fsck" P.</> fromUUID u
{- used to store information about incremental fscks. -}
gitAnnexFsckState :: UUID -> Git.Repo -> RawFilePath
gitAnnexFsckState u r = gitAnnexFsckDir u r P.</> "state"
{- Directory containing database used to record fsck info. -}
gitAnnexFsckDbDir :: UUID -> Git.Repo -> RawFilePath
gitAnnexFsckDbDir u r = gitAnnexFsckDir u r P.</> "fsckdb"
{- Directory containing old database used to record fsck info. -}
gitAnnexFsckDbDirOld :: UUID -> Git.Repo -> RawFilePath
gitAnnexFsckDbDirOld u r = gitAnnexFsckDir u r P.</> "db"
{- Lock file for the fsck database. -}
gitAnnexFsckDbLock :: UUID -> Git.Repo -> RawFilePath
gitAnnexFsckDbLock u r = gitAnnexFsckDir u r P.</> "fsck.lck"
{- .git/annex/fsckresults/uuid is used to store results of git fscks -}
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> RawFilePath
gitAnnexFsckResultsLog u r =
gitAnnexDir r P.</> "fsckresults" P.</> fromUUID u
{- .git/annex/smudge.log is used to log smudges worktree files that need to
- be updated. -}
gitAnnexSmudgeLog :: Git.Repo -> RawFilePath
gitAnnexSmudgeLog r = gitAnnexDir r P.</> "smudge.log"
gitAnnexSmudgeLock :: Git.Repo -> RawFilePath
gitAnnexSmudgeLock r = gitAnnexDir r P.</> "smudge.lck"
{- .git/annex/move.log is used to log moves that are in progress,
- to better support resuming an interrupted move. -}
gitAnnexMoveLog :: Git.Repo -> RawFilePath
gitAnnexMoveLog r = gitAnnexDir r P.</> "move.log"
gitAnnexMoveLock :: Git.Repo -> RawFilePath
gitAnnexMoveLock r = gitAnnexDir r P.</> "move.lck"
{- .git/annex/export/ is used to store information about
- exports to special remotes. -}
gitAnnexExportDir :: Git.Repo -> RawFilePath
gitAnnexExportDir r = gitAnnexDir r P.</> "export"
{- Directory containing database used to record export info. -}
gitAnnexExportDbDir :: UUID -> Git.Repo -> RawFilePath
gitAnnexExportDbDir u r = gitAnnexExportDir r P.</> fromUUID u P.</> "exportdb"
{- Lock file for export state for a special remote. -}
gitAnnexExportLock :: UUID -> Git.Repo -> RawFilePath
gitAnnexExportLock u r = gitAnnexExportDbDir u r <> ".lck"
{- Lock file for updating the export state for a special remote. -}
gitAnnexExportUpdateLock :: UUID -> Git.Repo -> RawFilePath
gitAnnexExportUpdateLock u r = gitAnnexExportDbDir u r <> ".upl"
{- Log file used to keep track of files that were in the tree exported to a
- remote, but were excluded by its preferred content settings. -}
gitAnnexExportExcludeLog :: UUID -> Git.Repo -> RawFilePath
gitAnnexExportExcludeLog u r = gitAnnexDir r P.</> "export.ex" P.</> fromUUID u
{- Directory containing database used to record remote content ids.
-
- (This used to be "cid", but a problem with the database caused it to
- need to be rebuilt with a new name.)
-}
gitAnnexContentIdentifierDbDir :: Git.Repo -> RawFilePath
gitAnnexContentIdentifierDbDir r = gitAnnexDir r P.</> "cidsdb"
{- Lock file for writing to the content id database. -}
gitAnnexContentIdentifierLock :: Git.Repo -> RawFilePath
gitAnnexContentIdentifierLock r = gitAnnexContentIdentifierDbDir r <> ".lck"
{- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> RawFilePath
gitAnnexScheduleState r = gitAnnexDir r P.</> "schedulestate"
{- .git/annex/creds/ is used to store credentials to access some special
- remotes. -}
gitAnnexCredsDir :: Git.Repo -> RawFilePath
gitAnnexCredsDir r = P.addTrailingPathSeparator $ gitAnnexDir r P.</> "creds"
{- .git/annex/certificate.pem and .git/annex/key.pem are used by the webapp
- when HTTPS is enabled -}
gitAnnexWebCertificate :: Git.Repo -> FilePath
gitAnnexWebCertificate r = fromRawFilePath $ gitAnnexDir r P.</> "certificate.pem"
gitAnnexWebPrivKey :: Git.Repo -> FilePath
gitAnnexWebPrivKey r = fromRawFilePath $ gitAnnexDir r P.</> "privkey.pem"
{- .git/annex/feeds/ is used to record per-key (url) state by importfeeds -}
gitAnnexFeedStateDir :: Git.Repo -> RawFilePath
gitAnnexFeedStateDir r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "feedstate"
gitAnnexFeedState :: Key -> Git.Repo -> RawFilePath
gitAnnexFeedState k r = gitAnnexFeedStateDir r P.</> keyFile k
{- .git/annex/merge/ is used as a empty work tree for merges in
- adjusted branches. -}
gitAnnexMergeDir :: Git.Repo -> FilePath
gitAnnexMergeDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "merge"
{- .git/annex/transfer/ is used to record keys currently
- being transferred, and other transfer bookkeeping info. -}
gitAnnexTransferDir :: Git.Repo -> RawFilePath
gitAnnexTransferDir r =
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "transfer"
{- .git/annex/journal/ is used to journal changes made to the git-annex
- branch -}
gitAnnexJournalDir :: Git.Repo -> RawFilePath
gitAnnexJournalDir r =
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal"
{- .git/annex/journal.private/ is used to journal changes regarding private
- repositories. -}
gitAnnexPrivateJournalDir :: Git.Repo -> RawFilePath
gitAnnexPrivateJournalDir r =
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal-private"
{- Lock file for the journal. -}
gitAnnexJournalLock :: Git.Repo -> RawFilePath
gitAnnexJournalLock r = gitAnnexDir r P.</> "journal.lck"
{- Lock file for flushing a git queue that writes to the git index or
- other git state that should only have one writer at a time. -}
gitAnnexGitQueueLock :: Git.Repo -> RawFilePath
gitAnnexGitQueueLock r = gitAnnexDir r P.</> "gitqueue.lck"
{- .git/annex/index is used to stage changes to the git-annex branch -}
gitAnnexIndex :: Git.Repo -> RawFilePath
gitAnnexIndex r = gitAnnexDir r P.</> "index"
{- .git/annex/index-private is used to store information that is not to
- be exposed to the git-annex branch. -}
gitAnnexPrivateIndex :: Git.Repo -> RawFilePath
gitAnnexPrivateIndex r = gitAnnexDir r P.</> "index-private"
{- Holds the ref of the git-annex branch that the index was last updated to.
-
- The .lck in the name is a historical accident; this is not used as a
- lock. -}
gitAnnexIndexStatus :: Git.Repo -> RawFilePath
gitAnnexIndexStatus r = gitAnnexDir r P.</> "index.lck"
{- The index file used to generate a filtered branch view._-}
gitAnnexViewIndex :: Git.Repo -> RawFilePath
gitAnnexViewIndex r = gitAnnexDir r P.</> "viewindex"
{- File containing a log of recently accessed views. -}
gitAnnexViewLog :: Git.Repo -> RawFilePath
gitAnnexViewLog r = gitAnnexDir r P.</> "viewlog"
{- List of refs that have already been merged into the git-annex branch. -}
gitAnnexMergedRefs :: Git.Repo -> RawFilePath
gitAnnexMergedRefs r = gitAnnexDir r P.</> "mergedrefs"
{- List of refs that should not be merged into the git-annex branch. -}
gitAnnexIgnoredRefs :: Git.Repo -> RawFilePath
gitAnnexIgnoredRefs r = gitAnnexDir r P.</> "ignoredrefs"
{- Pid file for daemon mode. -}
gitAnnexPidFile :: Git.Repo -> RawFilePath
gitAnnexPidFile r = gitAnnexDir r P.</> "daemon.pid"
{- Pid lock file for pidlock mode -}
gitAnnexPidLockFile :: Git.Repo -> RawFilePath
gitAnnexPidLockFile r = gitAnnexDir r P.</> "pidlock"
{- Status file for daemon mode. -}
gitAnnexDaemonStatusFile :: Git.Repo -> FilePath
gitAnnexDaemonStatusFile r = fromRawFilePath $
gitAnnexDir r P.</> "daemon.status"
{- Log file for daemon mode. -}
gitAnnexDaemonLogFile :: Git.Repo -> RawFilePath
gitAnnexDaemonLogFile r = gitAnnexDir r P.</> "daemon.log"
{- Log file for fuzz test. -}
gitAnnexFuzzTestLogFile :: Git.Repo -> FilePath
gitAnnexFuzzTestLogFile r = fromRawFilePath $
gitAnnexDir r P.</> "fuzztest.log"
{- Html shim file used to launch the webapp. -}
gitAnnexHtmlShim :: Git.Repo -> RawFilePath
gitAnnexHtmlShim r = gitAnnexDir r P.</> "webapp.html"
{- File containing the url to the webapp. -}
gitAnnexUrlFile :: Git.Repo -> RawFilePath
gitAnnexUrlFile r = gitAnnexDir r P.</> "url"
{- Temporary file used to edit configuriation from the git-annex branch. -}
gitAnnexTmpCfgFile :: Git.Repo -> RawFilePath
gitAnnexTmpCfgFile r = gitAnnexDir r P.</> "config.tmp"
{- .git/annex/ssh/ is used for ssh connection caching -}
gitAnnexSshDir :: Git.Repo -> RawFilePath
gitAnnexSshDir r = P.addTrailingPathSeparator $ gitAnnexDir r P.</> "ssh"
{- .git/annex/remotes/ is used for remote-specific state. -}
gitAnnexRemotesDir :: Git.Repo -> RawFilePath
gitAnnexRemotesDir r =
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "remotes"
{- This is the base directory name used by the assistant when making
- repositories, by default. -}
gitAnnexAssistantDefaultDir :: FilePath
gitAnnexAssistantDefaultDir = "annex"
{- Sanitizes a String that will be used as part of a Key's keyName,
- dealing with characters that cause problems.
-
- This is used when a new Key is initially being generated, eg by genKey.
- Unlike keyFile and fileKey, it does not need to be a reversable
- escaping. Also, it's ok to change this to add more problematic
- characters later. Unlike changing keyFile, which could result in the
- filenames used for existing keys changing and contents getting lost.
-
- It is, however, important that the input and output of this function
- have a 1:1 mapping, to avoid two different inputs from mapping to the
- same key.
-}
preSanitizeKeyName :: String -> String
preSanitizeKeyName = preSanitizeKeyName' False
preSanitizeKeyName' :: Bool -> String -> String
preSanitizeKeyName' resanitize = concatMap escape
where
escape c
| isAsciiUpper c || isAsciiLower c || isDigit c = [c]
| c `elem` ['.', '-', '_'] = [c] -- common, assumed safe
| c `elem` ['/', '%', ':'] = [c] -- handled by keyFile
-- , is safe and uncommon, so will be used to escape
-- other characters. By itself, it is escaped to
-- doubled form.
| c == ',' = if not resanitize
then ",,"
else ","
| otherwise = ',' : show (ord c)
{- Converts a keyName that has been santizied with an old version of
- preSanitizeKeyName to be sanitized with the new version. -}
reSanitizeKeyName :: String -> String
reSanitizeKeyName = preSanitizeKeyName' True
{- Converts a key into a filename fragment without any directory.
-
- Escape "/" in the key name, to keep a flat tree of files and avoid
- issues with keys containing "/../" or ending with "/" etc.
-
- "/" is escaped to "%" because it's short and rarely used, and resembles
- a slash
- "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping
- is one to one.
- ":" is escaped to "&c", because it seemed like a good idea at the time.
-
- Changing what this function escapes and how is not a good idea, as it
- can cause existing objects to get lost.
-}
keyFile :: Key -> RawFilePath
keyFile k =
let b = serializeKey' k
in if S8.any (`elem` ['&', '%', ':', '/']) b
then S8.concatMap esc b
else b
where
esc '&' = "&a"
esc '%' = "&s"
esc ':' = "&c"
esc '/' = "%"
esc c = S8.singleton c
{- Reverses keyFile, converting a filename fragment (ie, the basename of
- the symlink target) into a key. -}
fileKey :: RawFilePath -> Maybe Key
fileKey = deserializeKey' . S8.intercalate "/" . map go . S8.split '%'
where
go = S8.concat . unescafterfirst . S8.split '&'
unescafterfirst [] = []
unescafterfirst (b:bs) = b : map (unesc . S8.uncons) bs
unesc :: Maybe (Char, S8.ByteString) -> S8.ByteString
unesc Nothing = mempty
unesc (Just ('c', b)) = S8.cons ':' b
unesc (Just ('s', b)) = S8.cons '%' b
unesc (Just ('a', b)) = S8.cons '&' b
unesc (Just (c, b)) = S8.cons c b
{- A location to store a key on a special remote that uses a filesystem.
- A directory hash is used, to protect against filesystems that dislike
- having many items in a single directory.
-
- The file is put in a directory with the same name, this allows
- write-protecting the directory to avoid accidental deletion of the file.
-}
keyPath :: Key -> Hasher -> RawFilePath
keyPath key hasher = hasher key P.</> f P.</> f
where
f = keyFile key
{- All possibile locations to store a key in a special remote
- using different directory hashes.
-
- This is compatible with the annexLocations, for interoperability between
- special remotes and git-annex repos.
-}
keyPaths :: Key -> [RawFilePath]
keyPaths key = map (\h -> keyPath key (h def)) dirHashes