2010-10-10 19:54:02 +00:00
|
|
|
{- git-annex file locations
|
2010-10-27 20:53:54 +00:00
|
|
|
-
|
2019-01-14 18:02:47 +00:00
|
|
|
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
|
2010-10-27 20:53:54 +00:00
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
2010-10-10 19:54:02 +00:00
|
|
|
-}
|
|
|
|
|
2019-01-14 18:02:47 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
module Annex.Locations (
|
2010-10-13 00:04:36 +00:00
|
|
|
keyFile,
|
2019-01-14 18:02:47 +00:00
|
|
|
keyFile',
|
2010-10-13 07:41:12 +00:00
|
|
|
fileKey,
|
2019-01-14 18:02:47 +00:00
|
|
|
fileKey',
|
2011-12-02 18:39:47 +00:00
|
|
|
keyPaths,
|
2012-11-19 03:59:39 +00:00
|
|
|
keyPath,
|
assistant: Detect stale git lock files at startup time, and remove them.
Extends the index.lock handling to other git lock files. I surveyed
all lock files used by git, and found more than I expected. All are
handled the same in git; it leaves them open while doing the operation,
possibly writing the new file content to the lock file, and then closes
them when done.
The gc.pid file is excluded because it won't affect the normal operation
of the assistant, and waiting for a gc to finish on startup wouldn't be
good.
All threads except the webapp thread wait on the new startup sanity checker
thread to complete, so they won't try to do things with git that fail
due to stale lock files. The webapp thread mostly avoids doing that kind of
thing itself. A few configurators might fail on lock files, but only if the
user is explicitly trying to run them. The webapp needs to start
immediately when the user has opened it, even if there are stale lock
files.
Arranging for the threads to wait on the startup sanity checker was a bit
of a bear. Have to get all the NotificationHandles set up before the
startup sanity checker runs, or they won't see its signal. Perhaps
the NotificationBroadcaster is not the best interface to have used for
this. Oh well, it works.
This commit was sponsored by Michael Jakl
2013-10-05 21:02:11 +00:00
|
|
|
annexDir,
|
2013-09-24 21:25:47 +00:00
|
|
|
objectDir,
|
2011-01-27 21:00:32 +00:00
|
|
|
gitAnnexLocation,
|
2015-06-11 19:14:42 +00:00
|
|
|
gitAnnexLocationDepth,
|
2013-04-04 19:46:33 +00:00
|
|
|
gitAnnexLink,
|
2016-05-16 21:05:42 +00:00
|
|
|
gitAnnexLinkCanonical,
|
2014-01-28 20:01:19 +00:00
|
|
|
gitAnnexContentLock,
|
2012-12-07 18:40:31 +00:00
|
|
|
gitAnnexMapping,
|
2013-02-14 20:17:40 +00:00
|
|
|
gitAnnexInodeCache,
|
2013-02-19 20:26:07 +00:00
|
|
|
gitAnnexInodeSentinal,
|
|
|
|
gitAnnexInodeSentinalCache,
|
2011-11-29 02:43:51 +00:00
|
|
|
annexLocations,
|
2011-01-27 21:00:32 +00:00
|
|
|
gitAnnexDir,
|
|
|
|
gitAnnexObjectDir,
|
2014-02-26 20:52:56 +00:00
|
|
|
gitAnnexTmpMiscDir,
|
|
|
|
gitAnnexTmpObjectDir,
|
|
|
|
gitAnnexTmpObjectLocation,
|
2017-11-29 17:49:52 +00:00
|
|
|
gitAnnexTmpWorkDir,
|
2011-01-27 21:00:32 +00:00
|
|
|
gitAnnexBadDir,
|
2011-04-29 17:59:00 +00:00
|
|
|
gitAnnexBadLocation,
|
2011-01-27 21:00:32 +00:00
|
|
|
gitAnnexUnusedLog,
|
2015-12-09 21:00:37 +00:00
|
|
|
gitAnnexKeysDb,
|
|
|
|
gitAnnexKeysDbLock,
|
2018-08-22 17:04:12 +00:00
|
|
|
gitAnnexKeysDbIndexCache,
|
2012-09-25 18:16:34 +00:00
|
|
|
gitAnnexFsckState,
|
2015-02-18 19:54:24 +00:00
|
|
|
gitAnnexFsckDbDir,
|
2015-02-17 21:08:11 +00:00
|
|
|
gitAnnexFsckDbLock,
|
2013-10-22 20:02:52 +00:00
|
|
|
gitAnnexFsckResultsLog,
|
2018-10-25 18:43:13 +00:00
|
|
|
gitAnnexSmudgeLog,
|
|
|
|
gitAnnexSmudgeLock,
|
2017-09-04 17:52:22 +00:00
|
|
|
gitAnnexExportDbDir,
|
2017-09-18 16:12:11 +00:00
|
|
|
gitAnnexExportLock,
|
2013-10-08 15:48:28 +00:00
|
|
|
gitAnnexScheduleState,
|
2012-07-01 18:29:00 +00:00
|
|
|
gitAnnexTransferDir,
|
2012-09-26 16:06:44 +00:00
|
|
|
gitAnnexCredsDir,
|
2014-03-01 01:32:18 +00:00
|
|
|
gitAnnexWebCertificate,
|
|
|
|
gitAnnexWebPrivKey,
|
2013-08-03 05:40:21 +00:00
|
|
|
gitAnnexFeedStateDir,
|
|
|
|
gitAnnexFeedState,
|
2012-12-18 19:04:44 +00:00
|
|
|
gitAnnexMergeDir,
|
2011-06-23 15:37:26 +00:00
|
|
|
gitAnnexJournalDir,
|
2011-10-03 20:32:36 +00:00
|
|
|
gitAnnexJournalLock,
|
2014-03-21 18:39:50 +00:00
|
|
|
gitAnnexPreCommitLock,
|
2014-07-09 19:07:53 +00:00
|
|
|
gitAnnexMergeLock,
|
2011-12-11 18:14:28 +00:00
|
|
|
gitAnnexIndex,
|
2013-10-03 19:06:58 +00:00
|
|
|
gitAnnexIndexStatus,
|
2014-02-18 21:38:23 +00:00
|
|
|
gitAnnexViewIndex,
|
|
|
|
gitAnnexViewLog,
|
2016-07-17 16:11:05 +00:00
|
|
|
gitAnnexMergedRefs,
|
2013-08-28 19:57:42 +00:00
|
|
|
gitAnnexIgnoredRefs,
|
2012-06-11 05:20:19 +00:00
|
|
|
gitAnnexPidFile,
|
2015-11-12 21:47:31 +00:00
|
|
|
gitAnnexPidLockFile,
|
2012-06-13 17:35:15 +00:00
|
|
|
gitAnnexDaemonStatusFile,
|
2012-06-11 04:39:09 +00:00
|
|
|
gitAnnexLogFile,
|
2013-05-23 23:00:46 +00:00
|
|
|
gitAnnexFuzzTestLogFile,
|
2012-07-26 03:13:01 +00:00
|
|
|
gitAnnexHtmlShim,
|
2012-09-18 21:50:07 +00:00
|
|
|
gitAnnexUrlFile,
|
2012-10-03 21:04:52 +00:00
|
|
|
gitAnnexTmpCfgFile,
|
2012-01-20 19:34:52 +00:00
|
|
|
gitAnnexSshDir,
|
2012-03-04 20:00:24 +00:00
|
|
|
gitAnnexRemotesDir,
|
2012-08-31 22:59:57 +00:00
|
|
|
gitAnnexAssistantDefaultDir,
|
2015-01-28 19:55:17 +00:00
|
|
|
HashLevels(..),
|
2011-04-02 17:49:03 +00:00
|
|
|
hashDirMixed,
|
2011-06-22 21:51:48 +00:00
|
|
|
hashDirLower,
|
Better sanitization of problem characters when generating URL and WORM keys.
FAT has a lot of characters it does not allow in filenames, like ? and *
It's probably the worst offender, but other filesystems also have
limitiations.
In 2011, I made keyFile escape : to handle FAT, but missed the other
characters. It also turns out that when I did that, I was also living
dangerously; any existing keys that contained a : had their object
location change. Oops.
So, adding new characters to escape to keyFile is out. Well, it would be
possible to make keyFile behave differently on a per-filesystem basis, but
this would be a real nightmare to get right. Consider that a rsync special
remote uses keyFile to determine the filenames to use, and we don't know
the underlying filesystem on the rsync server..
Instead, I have gone for a solution that is backwards compatable and
simple. Its only downside is that already generated URL and WORM keys
might not be able to be stored on FAT or some other filesystem that
dislikes a character used in the key. (In this case, the user can just
migrate the problem keys to a checksumming backend. If this became a big
problem, fsck could be made to detect these and suggest a migration.)
Going forward, new keys that are created will escape all characters that
are likely to cause problems. And if some filesystem comes along that's
even worse than FAT (seems unlikely, but here it is 2013, and people are
still using FAT!), additional characters can be added to the set that are
escaped without difficulty.
(Also, made WORM limit the part of the filename that is embedded in the key,
to deal with filesystem filename length limits. This could have already
been a problem, but is more likely now, since the escaping of the filename
can make it longer.)
This commit was sponsored by Ian Downes
2013-10-05 19:01:49 +00:00
|
|
|
preSanitizeKeyName,
|
2017-08-17 19:09:38 +00:00
|
|
|
reSanitizeKeyName,
|
2010-10-11 21:52:46 +00:00
|
|
|
) where
|
2010-10-10 19:54:02 +00:00
|
|
|
|
Better sanitization of problem characters when generating URL and WORM keys.
FAT has a lot of characters it does not allow in filenames, like ? and *
It's probably the worst offender, but other filesystems also have
limitiations.
In 2011, I made keyFile escape : to handle FAT, but missed the other
characters. It also turns out that when I did that, I was also living
dangerously; any existing keys that contained a : had their object
location change. Oops.
So, adding new characters to escape to keyFile is out. Well, it would be
possible to make keyFile behave differently on a per-filesystem basis, but
this would be a real nightmare to get right. Consider that a rsync special
remote uses keyFile to determine the filenames to use, and we don't know
the underlying filesystem on the rsync server..
Instead, I have gone for a solution that is backwards compatable and
simple. Its only downside is that already generated URL and WORM keys
might not be able to be stored on FAT or some other filesystem that
dislikes a character used in the key. (In this case, the user can just
migrate the problem keys to a checksumming backend. If this became a big
problem, fsck could be made to detect these and suggest a migration.)
Going forward, new keys that are created will escape all characters that
are likely to cause problems. And if some filesystem comes along that's
even worse than FAT (seems unlikely, but here it is 2013, and people are
still using FAT!), additional characters can be added to the set that are
escaped without difficulty.
(Also, made WORM limit the part of the filename that is embedded in the key,
to deal with filesystem filename length limits. This could have already
been a problem, but is more likely now, since the escaping of the filename
can make it longer.)
This commit was sponsored by Ian Downes
2013-10-05 19:01:49 +00:00
|
|
|
import Data.Char
|
2015-01-28 19:55:17 +00:00
|
|
|
import Data.Default
|
2019-01-14 18:02:47 +00:00
|
|
|
import qualified Data.ByteString.Char8 as S8
|
2010-10-16 20:20:49 +00:00
|
|
|
|
2011-10-04 02:24:57 +00:00
|
|
|
import Common
|
2017-02-24 17:42:30 +00:00
|
|
|
import Key
|
2013-10-22 20:02:52 +00:00
|
|
|
import Types.UUID
|
2016-01-20 20:36:33 +00:00
|
|
|
import Types.GitConfig
|
2015-01-27 21:38:06 +00:00
|
|
|
import Types.Difference
|
2011-06-30 17:16:57 +00:00
|
|
|
import qualified Git
|
2016-05-16 21:05:42 +00:00
|
|
|
import qualified Git.Types as Git
|
2015-02-09 19:24:33 +00:00
|
|
|
import Git.FilePath
|
2015-01-28 20:51:40 +00:00
|
|
|
import Annex.DirHashes
|
2015-03-04 20:08:41 +00:00
|
|
|
import Annex.Fixup
|
2010-10-10 19:54:02 +00:00
|
|
|
|
2011-01-27 21:00:32 +00:00
|
|
|
{- Conventions:
|
|
|
|
-
|
|
|
|
- Functions ending in "Dir" should always return values ending with a
|
|
|
|
- trailing path separator. Most code does not rely on that, but a few
|
|
|
|
- things do.
|
|
|
|
-
|
|
|
|
- Everything else should not end in a trailing path sepatator.
|
|
|
|
-
|
|
|
|
- Only functions (with names starting with "git") that build a path
|
2015-01-06 19:31:24 +00:00
|
|
|
- based on a git repository should return full path relative to the git
|
|
|
|
- repository. Everything else returns path segments.
|
2011-01-27 21:00:32 +00:00
|
|
|
-}
|
|
|
|
|
2011-03-03 18:51:57 +00:00
|
|
|
{- The directory git annex uses for local state, relative to the .git
|
|
|
|
- directory -}
|
2011-01-27 21:00:32 +00:00
|
|
|
annexDir :: FilePath
|
2011-07-15 07:12:05 +00:00
|
|
|
annexDir = addTrailingPathSeparator "annex"
|
2011-03-03 18:51:57 +00:00
|
|
|
|
|
|
|
{- The directory git annex uses for locally available object content,
|
|
|
|
- relative to the .git directory -}
|
2011-01-27 21:00:32 +00:00
|
|
|
objectDir :: FilePath
|
|
|
|
objectDir = addTrailingPathSeparator $ annexDir </> "objects"
|
2010-10-13 05:04:06 +00:00
|
|
|
|
2011-11-29 02:43:51 +00:00
|
|
|
{- Annexed file's possible locations relative to the .git directory.
|
2015-01-28 20:51:40 +00:00
|
|
|
- There are two different possibilities, using different hashes.
|
|
|
|
-
|
|
|
|
- Also, some repositories have a Difference in hash directory depth.
|
|
|
|
-}
|
|
|
|
annexLocations :: GitConfig -> Key -> [FilePath]
|
|
|
|
annexLocations config key = map (annexLocation config key) dirHashes
|
|
|
|
|
|
|
|
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> FilePath
|
|
|
|
annexLocation config key hasher = objectDir </> keyPath key (hasher $ objectHashLevels config)
|
2010-10-13 07:41:12 +00:00
|
|
|
|
2015-06-11 19:14:42 +00:00
|
|
|
{- Number of subdirectories from the gitAnnexObjectDir
|
|
|
|
- to the gitAnnexLocation. -}
|
|
|
|
gitAnnexLocationDepth :: GitConfig -> Int
|
|
|
|
gitAnnexLocationDepth config = hashlevels + 1
|
|
|
|
where
|
|
|
|
HashLevels hashlevels = objectHashLevels config
|
|
|
|
|
2015-01-06 19:31:24 +00:00
|
|
|
{- Annexed object's location in a repository.
|
2011-11-29 03:08:11 +00:00
|
|
|
-
|
|
|
|
- When there are multiple possible locations, returns the one where the
|
|
|
|
- file is actually present.
|
|
|
|
-
|
|
|
|
- When the file is not present, returns the location where the file should
|
|
|
|
- be stored.
|
2013-01-06 18:29:01 +00:00
|
|
|
-
|
|
|
|
- This does not take direct mode into account, so in direct mode it is not
|
|
|
|
- the actual location of the file's content.
|
2011-11-29 02:43:51 +00:00
|
|
|
-}
|
2013-04-04 19:46:33 +00:00
|
|
|
gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO FilePath
|
2016-05-10 19:00:19 +00:00
|
|
|
gitAnnexLocation key r config = gitAnnexLocation' key r config (annexCrippledFileSystem config) (coreSymlinks config) doesFileExist (Git.localGitDir r)
|
|
|
|
gitAnnexLocation' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (FilePath -> IO Bool) -> FilePath -> IO FilePath
|
|
|
|
gitAnnexLocation' key r config crippled symlinkssupported checker gitdir
|
2013-04-04 19:46:33 +00:00
|
|
|
{- Bare repositories default to hashDirLower for new
|
2016-05-10 19:00:19 +00:00
|
|
|
- content, as it's more portable. But check all locations. -}
|
|
|
|
| Git.repoIsLocalBare r = checkall
|
|
|
|
| hasDifference ObjectHashLower (annexDifferences config) =
|
|
|
|
only hashDirLower
|
|
|
|
{- Repositories on crippled filesystems use hashDirLower
|
|
|
|
- for new content, unless symlinks are supported too.
|
|
|
|
- Then hashDirMixed is used. But, the content could be
|
|
|
|
- in either location so check both. -}
|
2016-05-16 21:19:07 +00:00
|
|
|
| crippled = if symlinkssupported
|
|
|
|
then check $ map inrepo $ reverse $ annexLocations config key
|
|
|
|
else checkall
|
2016-05-10 19:00:19 +00:00
|
|
|
{- Regular repositories only use hashDirMixed, so
|
2013-04-04 19:46:33 +00:00
|
|
|
- don't need to do any work to check if the file is
|
|
|
|
- present. -}
|
2016-05-10 19:00:19 +00:00
|
|
|
| otherwise = only hashDirMixed
|
2012-10-29 01:27:15 +00:00
|
|
|
where
|
2016-05-10 19:00:19 +00:00
|
|
|
only = return . inrepo . annexLocation config key
|
|
|
|
checkall = check $ map inrepo $ annexLocations config key
|
|
|
|
|
2015-03-04 20:08:41 +00:00
|
|
|
inrepo d = gitdir </> d
|
2015-03-04 19:44:36 +00:00
|
|
|
check locs@(l:_) = fromMaybe l <$> firstM checker locs
|
2012-10-29 01:27:15 +00:00
|
|
|
check [] = error "internal"
|
2011-01-27 21:00:32 +00:00
|
|
|
|
2015-03-04 20:08:41 +00:00
|
|
|
{- Calculates a symlink target to link a file to an annexed object. -}
|
2015-01-27 21:38:06 +00:00
|
|
|
gitAnnexLink :: FilePath -> Key -> Git.Repo -> GitConfig -> IO FilePath
|
|
|
|
gitAnnexLink file key r config = do
|
2014-06-10 23:20:14 +00:00
|
|
|
currdir <- getCurrentDirectory
|
2017-05-15 21:13:08 +00:00
|
|
|
let absfile = absNormPathUnix currdir file
|
2015-03-04 20:08:41 +00:00
|
|
|
let gitdir = getgitdir currdir
|
2016-05-10 19:00:19 +00:00
|
|
|
loc <- gitAnnexLocation' key r config False False (\_ -> return True) gitdir
|
2015-02-09 19:24:33 +00:00
|
|
|
toInternalGitPath <$> relPathDirToFile (parentDir absfile) loc
|
2015-01-21 17:54:47 +00:00
|
|
|
where
|
2015-03-04 20:08:41 +00:00
|
|
|
getgitdir currdir
|
|
|
|
{- This special case is for git submodules on filesystems not
|
|
|
|
- supporting symlinks; generate link target that will
|
|
|
|
- work portably. -}
|
2015-04-11 04:10:34 +00:00
|
|
|
| not (coreSymlinks config) && needsSubmoduleFixup r =
|
2017-05-15 21:13:08 +00:00
|
|
|
absNormPathUnix currdir $ Git.repoPath r </> ".git"
|
2015-03-04 20:08:41 +00:00
|
|
|
| otherwise = Git.localGitDir r
|
2017-05-15 21:13:08 +00:00
|
|
|
absNormPathUnix d p = toInternalGitPath $
|
|
|
|
absPathFrom (toInternalGitPath d) (toInternalGitPath p)
|
2013-04-04 19:46:33 +00:00
|
|
|
|
2016-05-16 21:05:42 +00:00
|
|
|
{- Calculates a symlink target as would be used in a typical git
|
|
|
|
- repository, with .git in the top of the work tree. -}
|
|
|
|
gitAnnexLinkCanonical :: FilePath -> Key -> Git.Repo -> GitConfig -> IO FilePath
|
|
|
|
gitAnnexLinkCanonical file key r config = gitAnnexLink file key r' config'
|
|
|
|
where
|
|
|
|
r' = case r of
|
|
|
|
Git.Repo { Git.location = l@Git.Local { Git.worktree = Just wt } } ->
|
|
|
|
r { Git.location = l { Git.gitdir = wt </> ".git" } }
|
|
|
|
_ -> r
|
|
|
|
config' = config
|
|
|
|
{ annexCrippledFileSystem = False
|
|
|
|
, coreSymlinks = True
|
|
|
|
}
|
|
|
|
|
2014-01-28 20:01:19 +00:00
|
|
|
{- File used to lock a key's content. -}
|
|
|
|
gitAnnexContentLock :: Key -> Git.Repo -> GitConfig -> IO FilePath
|
|
|
|
gitAnnexContentLock key r config = do
|
|
|
|
loc <- gitAnnexLocation key r config
|
|
|
|
return $ loc ++ ".lck"
|
|
|
|
|
2012-12-07 18:40:31 +00:00
|
|
|
{- File that maps from a key to the file(s) in the git repository.
|
|
|
|
- Used in direct mode. -}
|
2013-04-04 19:46:33 +00:00
|
|
|
gitAnnexMapping :: Key -> Git.Repo -> GitConfig -> IO FilePath
|
|
|
|
gitAnnexMapping key r config = do
|
|
|
|
loc <- gitAnnexLocation key r config
|
2012-12-07 18:40:31 +00:00
|
|
|
return $ loc ++ ".map"
|
|
|
|
|
2012-12-07 21:28:23 +00:00
|
|
|
{- File that caches information about a key's content, used to determine
|
|
|
|
- if a file has changed.
|
|
|
|
- Used in direct mode. -}
|
2013-04-04 19:46:33 +00:00
|
|
|
gitAnnexInodeCache :: Key -> Git.Repo -> GitConfig -> IO FilePath
|
|
|
|
gitAnnexInodeCache key r config = do
|
|
|
|
loc <- gitAnnexLocation key r config
|
2012-12-07 21:28:23 +00:00
|
|
|
return $ loc ++ ".cache"
|
|
|
|
|
2013-02-19 20:26:07 +00:00
|
|
|
gitAnnexInodeSentinal :: Git.Repo -> FilePath
|
|
|
|
gitAnnexInodeSentinal r = gitAnnexDir r </> "sentinal"
|
|
|
|
|
|
|
|
gitAnnexInodeSentinalCache :: Git.Repo -> FilePath
|
|
|
|
gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r ++ ".cache"
|
|
|
|
|
2011-03-03 18:51:57 +00:00
|
|
|
{- The annex directory of a repository. -}
|
2011-01-27 21:00:32 +00:00
|
|
|
gitAnnexDir :: Git.Repo -> FilePath
|
Clean up handling of git directory and git worktree.
Baked into the code was an assumption that a repository's git directory
could be determined by adding ".git" to its work tree (or nothing for bare
repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are
used to separate the two.
This was attacked at the type level, by storing the gitdir and worktree
separately, so Nothing for the worktree means a bare repo.
A complication arose because we don't learn where a repository is bare
until its configuration is read. So another Location type handles
repositories that have not had their config read yet. I am not entirely
happy with this being a Location type, rather than representing them
entirely separate from the Git type. The new code is not worse than the
old, but better types could enforce more safety.
Added support for core.worktree. Overriding it with -c isn't supported
because it's not really clear what to do if a git repo's config is read, is
not bare, and is then overridden to bare. What is the right git directory
in this case? I will worry about this if/when someone has a use case for
overriding core.worktree with -c. (See Git.Config.updateLocation)
Also removed and renamed some functions like gitDir and workTree that
misused git's terminology.
One minor regression is known: git annex add in a bare repository does not
print a nice error message, but runs git ls-files in a way that fails
earlier with a less nice error message. This is because before --work-tree
was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
|
|
|
gitAnnexDir r = addTrailingPathSeparator $ Git.localGitDir r </> annexDir
|
2010-11-07 21:36:24 +00:00
|
|
|
|
2011-11-29 02:43:51 +00:00
|
|
|
{- The part of the annex directory where file contents are stored. -}
|
2011-01-27 21:00:32 +00:00
|
|
|
gitAnnexObjectDir :: Git.Repo -> FilePath
|
Clean up handling of git directory and git worktree.
Baked into the code was an assumption that a repository's git directory
could be determined by adding ".git" to its work tree (or nothing for bare
repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are
used to separate the two.
This was attacked at the type level, by storing the gitdir and worktree
separately, so Nothing for the worktree means a bare repo.
A complication arose because we don't learn where a repository is bare
until its configuration is read. So another Location type handles
repositories that have not had their config read yet. I am not entirely
happy with this being a Location type, rather than representing them
entirely separate from the Git type. The new code is not worse than the
old, but better types could enforce more safety.
Added support for core.worktree. Overriding it with -c isn't supported
because it's not really clear what to do if a git repo's config is read, is
not bare, and is then overridden to bare. What is the right git directory
in this case? I will worry about this if/when someone has a use case for
overriding core.worktree with -c. (See Git.Config.updateLocation)
Also removed and renamed some functions like gitDir and workTree that
misused git's terminology.
One minor regression is known: git annex add in a bare repository does not
print a nice error message, but runs git ls-files in a way that fails
earlier with a less nice error message. This is because before --work-tree
was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
|
|
|
gitAnnexObjectDir r = addTrailingPathSeparator $ Git.localGitDir r </> objectDir
|
2010-11-08 19:14:54 +00:00
|
|
|
|
2014-02-26 20:52:56 +00:00
|
|
|
{- .git/annex/misctmp/ is used for random temp files -}
|
|
|
|
gitAnnexTmpMiscDir :: Git.Repo -> FilePath
|
|
|
|
gitAnnexTmpMiscDir r = addTrailingPathSeparator $ gitAnnexDir r </> "misctmp"
|
|
|
|
|
|
|
|
{- .git/annex/tmp/ is used for temp files for key's contents -}
|
|
|
|
gitAnnexTmpObjectDir :: Git.Repo -> FilePath
|
|
|
|
gitAnnexTmpObjectDir r = addTrailingPathSeparator $ gitAnnexDir r </> "tmp"
|
2010-10-17 20:39:30 +00:00
|
|
|
|
2013-04-02 17:13:42 +00:00
|
|
|
{- The temp file to use for a given key's content. -}
|
2014-02-26 20:52:56 +00:00
|
|
|
gitAnnexTmpObjectLocation :: Key -> Git.Repo -> FilePath
|
|
|
|
gitAnnexTmpObjectLocation key r = gitAnnexTmpObjectDir r </> keyFile key
|
2011-01-28 18:10:50 +00:00
|
|
|
|
2017-11-29 17:49:52 +00:00
|
|
|
{- Given a temp file such as gitAnnexTmpObjectLocation, makes a name for a
|
|
|
|
- subdirectory in the same location, that can be used as a work area
|
|
|
|
- when receiving the key's content.
|
|
|
|
-
|
|
|
|
- There are ordering requirements for creating these directories;
|
|
|
|
- use Annex.Content.withTmpWorkDir to set them up.
|
|
|
|
-}
|
|
|
|
gitAnnexTmpWorkDir :: FilePath -> FilePath
|
|
|
|
gitAnnexTmpWorkDir p =
|
|
|
|
let (dir, f) = splitFileName p
|
|
|
|
-- Using a prefix avoids name conflict with any other keys.
|
2017-11-29 19:49:05 +00:00
|
|
|
in dir </> "work." ++ f
|
2017-11-29 17:49:52 +00:00
|
|
|
|
2011-06-21 18:44:56 +00:00
|
|
|
{- .git/annex/bad/ is used for bad files found during fsck -}
|
2011-01-27 21:00:32 +00:00
|
|
|
gitAnnexBadDir :: Git.Repo -> FilePath
|
|
|
|
gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad"
|
2010-11-13 18:59:27 +00:00
|
|
|
|
2011-04-29 17:59:00 +00:00
|
|
|
{- The bad file to use for a given key. -}
|
2011-11-08 19:34:10 +00:00
|
|
|
gitAnnexBadLocation :: Key -> Git.Repo -> FilePath
|
|
|
|
gitAnnexBadLocation key r = gitAnnexBadDir r </> keyFile key
|
2011-04-29 17:59:00 +00:00
|
|
|
|
2012-04-14 18:22:33 +00:00
|
|
|
{- .git/annex/foounused is used to number possibly unused keys -}
|
2011-04-29 17:59:00 +00:00
|
|
|
gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
|
|
|
|
gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused")
|
2010-11-15 22:04:19 +00:00
|
|
|
|
2015-12-09 21:00:37 +00:00
|
|
|
{- .git/annex/keys/ contains a database of information about keys. -}
|
|
|
|
gitAnnexKeysDb :: Git.Repo -> FilePath
|
|
|
|
gitAnnexKeysDb r = gitAnnexDir r </> "keys"
|
2015-12-07 17:42:03 +00:00
|
|
|
|
2015-12-09 21:00:37 +00:00
|
|
|
{- Lock file for the keys database. -}
|
|
|
|
gitAnnexKeysDbLock :: Git.Repo -> FilePath
|
2015-12-31 17:28:18 +00:00
|
|
|
gitAnnexKeysDbLock r = gitAnnexKeysDb r ++ ".lck"
|
2015-12-07 17:42:03 +00:00
|
|
|
|
2018-08-22 17:04:12 +00:00
|
|
|
{- Contains the stat of the last index file that was
|
|
|
|
- reconciled with rhe keys database. -}
|
|
|
|
gitAnnexKeysDbIndexCache :: Git.Repo -> FilePath
|
|
|
|
gitAnnexKeysDbIndexCache r = gitAnnexKeysDb r ++ ".cache"
|
|
|
|
|
2015-02-17 21:08:11 +00:00
|
|
|
{- .git/annex/fsck/uuid/ is used to store information about incremental
|
|
|
|
- fscks. -}
|
|
|
|
gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath
|
|
|
|
gitAnnexFsckDir u r = gitAnnexDir r </> "fsck" </> fromUUID u
|
|
|
|
|
|
|
|
{- used to store information about incremental fscks. -}
|
|
|
|
gitAnnexFsckState :: UUID -> Git.Repo -> FilePath
|
|
|
|
gitAnnexFsckState u r = gitAnnexFsckDir u r </> "state"
|
|
|
|
|
2015-02-18 19:54:24 +00:00
|
|
|
{- Directory containing database used to record fsck info. -}
|
|
|
|
gitAnnexFsckDbDir :: UUID -> Git.Repo -> FilePath
|
|
|
|
gitAnnexFsckDbDir u r = gitAnnexFsckDir u r </> "db"
|
2015-02-17 21:08:11 +00:00
|
|
|
|
|
|
|
{- Lock file for the fsck database. -}
|
|
|
|
gitAnnexFsckDbLock :: UUID -> Git.Repo -> FilePath
|
|
|
|
gitAnnexFsckDbLock u r = gitAnnexFsckDir u r </> "fsck.lck"
|
2012-09-25 18:16:34 +00:00
|
|
|
|
2013-10-22 20:02:52 +00:00
|
|
|
{- .git/annex/fsckresults/uuid is used to store results of git fscks -}
|
|
|
|
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath
|
|
|
|
gitAnnexFsckResultsLog u r = gitAnnexDir r </> "fsckresults" </> fromUUID u
|
|
|
|
|
2018-10-25 18:43:13 +00:00
|
|
|
{- .git/annex/smudge.log is used to log smudges worktree files that need to
|
|
|
|
- be updated. -}
|
|
|
|
gitAnnexSmudgeLog :: Git.Repo -> FilePath
|
|
|
|
gitAnnexSmudgeLog r = gitAnnexDir r </> "smudge.log"
|
|
|
|
|
|
|
|
gitAnnexSmudgeLock :: Git.Repo -> FilePath
|
|
|
|
gitAnnexSmudgeLock r = gitAnnexDir r </> "smudge.lck"
|
|
|
|
|
2017-09-04 17:52:22 +00:00
|
|
|
{- .git/annex/export/uuid/ is used to store information about
|
|
|
|
- exports to special remotes. -}
|
|
|
|
gitAnnexExportDir :: UUID -> Git.Repo -> FilePath
|
|
|
|
gitAnnexExportDir u r = gitAnnexDir r </> "export" </> fromUUID u
|
|
|
|
|
|
|
|
{- Directory containing database used to record export info. -}
|
|
|
|
gitAnnexExportDbDir :: UUID -> Git.Repo -> FilePath
|
|
|
|
gitAnnexExportDbDir u r = gitAnnexExportDir u r </> "db"
|
|
|
|
|
2017-09-18 16:12:11 +00:00
|
|
|
{- Lock file for export state for a special remote. -}
|
|
|
|
gitAnnexExportLock :: UUID -> Git.Repo -> FilePath
|
2017-09-18 22:40:16 +00:00
|
|
|
gitAnnexExportLock u r = gitAnnexExportDbDir u r ++ ".lck"
|
2017-09-18 16:12:11 +00:00
|
|
|
|
2013-10-08 15:48:28 +00:00
|
|
|
{- .git/annex/schedulestate is used to store information about when
|
|
|
|
- scheduled jobs were last run. -}
|
|
|
|
gitAnnexScheduleState :: Git.Repo -> FilePath
|
|
|
|
gitAnnexScheduleState r = gitAnnexDir r </> "schedulestate"
|
|
|
|
|
2012-09-26 16:06:44 +00:00
|
|
|
{- .git/annex/creds/ is used to store credentials to access some special
|
|
|
|
- remotes. -}
|
|
|
|
gitAnnexCredsDir :: Git.Repo -> FilePath
|
|
|
|
gitAnnexCredsDir r = addTrailingPathSeparator $ gitAnnexDir r </> "creds"
|
|
|
|
|
2014-03-01 01:32:18 +00:00
|
|
|
{- .git/annex/certificate.pem and .git/annex/key.pem are used by the webapp
|
|
|
|
- when HTTPS is enabled -}
|
|
|
|
gitAnnexWebCertificate :: Git.Repo -> FilePath
|
|
|
|
gitAnnexWebCertificate r = gitAnnexDir r </> "certificate.pem"
|
|
|
|
gitAnnexWebPrivKey :: Git.Repo -> FilePath
|
|
|
|
gitAnnexWebPrivKey r = gitAnnexDir r </> "privkey.pem"
|
|
|
|
|
2013-08-03 05:40:21 +00:00
|
|
|
{- .git/annex/feeds/ is used to record per-key (url) state by importfeeds -}
|
|
|
|
gitAnnexFeedStateDir :: Git.Repo -> FilePath
|
|
|
|
gitAnnexFeedStateDir r = addTrailingPathSeparator $ gitAnnexDir r </> "feedstate"
|
|
|
|
|
|
|
|
gitAnnexFeedState :: Key -> Git.Repo -> FilePath
|
|
|
|
gitAnnexFeedState k r = gitAnnexFeedStateDir r </> keyFile k
|
|
|
|
|
2016-04-06 19:33:29 +00:00
|
|
|
{- .git/annex/merge/ is used as a empty work tree for direct mode merges and
|
|
|
|
- merges in adjusted branches. -}
|
2012-12-18 19:04:44 +00:00
|
|
|
gitAnnexMergeDir :: Git.Repo -> FilePath
|
|
|
|
gitAnnexMergeDir r = addTrailingPathSeparator $ gitAnnexDir r </> "merge"
|
|
|
|
|
2012-09-26 16:06:44 +00:00
|
|
|
{- .git/annex/transfer/ is used to record keys currently
|
2012-08-23 17:42:13 +00:00
|
|
|
- being transferred, and other transfer bookkeeping info. -}
|
2012-07-01 18:29:00 +00:00
|
|
|
gitAnnexTransferDir :: Git.Repo -> FilePath
|
|
|
|
gitAnnexTransferDir r = addTrailingPathSeparator $ gitAnnexDir r </> "transfer"
|
|
|
|
|
2011-06-23 13:56:04 +00:00
|
|
|
{- .git/annex/journal/ is used to journal changes made to the git-annex
|
|
|
|
- branch -}
|
|
|
|
gitAnnexJournalDir :: Git.Repo -> FilePath
|
|
|
|
gitAnnexJournalDir r = addTrailingPathSeparator $ gitAnnexDir r </> "journal"
|
|
|
|
|
2011-10-03 20:32:36 +00:00
|
|
|
{- Lock file for the journal. -}
|
|
|
|
gitAnnexJournalLock :: Git.Repo -> FilePath
|
|
|
|
gitAnnexJournalLock r = gitAnnexDir r </> "journal.lck"
|
|
|
|
|
2014-03-21 18:39:50 +00:00
|
|
|
{- Lock file for the pre-commit hook. -}
|
|
|
|
gitAnnexPreCommitLock :: Git.Repo -> FilePath
|
|
|
|
gitAnnexPreCommitLock r = gitAnnexDir r </> "precommit.lck"
|
|
|
|
|
2014-07-09 19:07:53 +00:00
|
|
|
{- Lock file for direct mode merge. -}
|
|
|
|
gitAnnexMergeLock :: Git.Repo -> FilePath
|
|
|
|
gitAnnexMergeLock r = gitAnnexDir r </> "merge.lck"
|
|
|
|
|
2011-12-11 18:14:28 +00:00
|
|
|
{- .git/annex/index is used to stage changes to the git-annex branch -}
|
|
|
|
gitAnnexIndex :: Git.Repo -> FilePath
|
|
|
|
gitAnnexIndex r = gitAnnexDir r </> "index"
|
|
|
|
|
2013-10-03 19:06:58 +00:00
|
|
|
{- Holds the ref of the git-annex branch that the index was last updated to.
|
|
|
|
-
|
|
|
|
- The .lck in the name is a historical accident; this is not used as a
|
|
|
|
- lock. -}
|
|
|
|
gitAnnexIndexStatus :: Git.Repo -> FilePath
|
|
|
|
gitAnnexIndexStatus r = gitAnnexDir r </> "index.lck"
|
2011-12-11 20:11:13 +00:00
|
|
|
|
2014-02-18 21:38:23 +00:00
|
|
|
{- The index file used to generate a filtered branch view._-}
|
|
|
|
gitAnnexViewIndex :: Git.Repo -> FilePath
|
|
|
|
gitAnnexViewIndex r = gitAnnexDir r </> "viewindex"
|
|
|
|
|
|
|
|
{- File containing a log of recently accessed views. -}
|
|
|
|
gitAnnexViewLog :: Git.Repo -> FilePath
|
|
|
|
gitAnnexViewLog r = gitAnnexDir r </> "viewlog"
|
|
|
|
|
2016-07-17 16:11:05 +00:00
|
|
|
{- List of refs that have already been merged into the git-annex branch. -}
|
|
|
|
gitAnnexMergedRefs :: Git.Repo -> FilePath
|
|
|
|
gitAnnexMergedRefs r = gitAnnexDir r </> "mergedrefs"
|
|
|
|
|
2013-08-28 19:57:42 +00:00
|
|
|
{- List of refs that should not be merged into the git-annex branch. -}
|
|
|
|
gitAnnexIgnoredRefs :: Git.Repo -> FilePath
|
|
|
|
gitAnnexIgnoredRefs r = gitAnnexDir r </> "ignoredrefs"
|
|
|
|
|
2012-06-11 05:20:19 +00:00
|
|
|
{- Pid file for daemon mode. -}
|
|
|
|
gitAnnexPidFile :: Git.Repo -> FilePath
|
|
|
|
gitAnnexPidFile r = gitAnnexDir r </> "daemon.pid"
|
|
|
|
|
2015-11-12 21:47:31 +00:00
|
|
|
{- Pid lock file for pidlock mode -}
|
|
|
|
gitAnnexPidLockFile :: Git.Repo -> FilePath
|
|
|
|
gitAnnexPidLockFile r = gitAnnexDir r </> "pidlock"
|
|
|
|
|
2012-06-13 17:35:15 +00:00
|
|
|
{- Status file for daemon mode. -}
|
|
|
|
gitAnnexDaemonStatusFile :: Git.Repo -> FilePath
|
|
|
|
gitAnnexDaemonStatusFile r = gitAnnexDir r </> "daemon.status"
|
|
|
|
|
2012-06-11 04:39:09 +00:00
|
|
|
{- Log file for daemon mode. -}
|
|
|
|
gitAnnexLogFile :: Git.Repo -> FilePath
|
|
|
|
gitAnnexLogFile r = gitAnnexDir r </> "daemon.log"
|
|
|
|
|
2013-05-23 23:00:46 +00:00
|
|
|
{- Log file for fuzz test. -}
|
|
|
|
gitAnnexFuzzTestLogFile :: Git.Repo -> FilePath
|
|
|
|
gitAnnexFuzzTestLogFile r = gitAnnexDir r </> "fuzztest.log"
|
|
|
|
|
2012-07-26 03:13:01 +00:00
|
|
|
{- Html shim file used to launch the webapp. -}
|
|
|
|
gitAnnexHtmlShim :: Git.Repo -> FilePath
|
|
|
|
gitAnnexHtmlShim r = gitAnnexDir r </> "webapp.html"
|
|
|
|
|
2012-09-18 21:50:07 +00:00
|
|
|
{- File containing the url to the webapp. -}
|
|
|
|
gitAnnexUrlFile :: Git.Repo -> FilePath
|
|
|
|
gitAnnexUrlFile r = gitAnnexDir r </> "url"
|
|
|
|
|
2012-10-03 21:04:52 +00:00
|
|
|
{- Temporary file used to edit configuriation from the git-annex branch. -}
|
|
|
|
gitAnnexTmpCfgFile :: Git.Repo -> FilePath
|
|
|
|
gitAnnexTmpCfgFile r = gitAnnexDir r </> "config.tmp"
|
|
|
|
|
2012-01-20 19:34:52 +00:00
|
|
|
{- .git/annex/ssh/ is used for ssh connection caching -}
|
|
|
|
gitAnnexSshDir :: Git.Repo -> FilePath
|
|
|
|
gitAnnexSshDir r = addTrailingPathSeparator $ gitAnnexDir r </> "ssh"
|
|
|
|
|
2012-03-04 20:00:24 +00:00
|
|
|
{- .git/annex/remotes/ is used for remote-specific state. -}
|
|
|
|
gitAnnexRemotesDir :: Git.Repo -> FilePath
|
|
|
|
gitAnnexRemotesDir r = addTrailingPathSeparator $ gitAnnexDir r </> "remotes"
|
|
|
|
|
2012-08-31 22:59:57 +00:00
|
|
|
{- This is the base directory name used by the assistant when making
|
|
|
|
- repositories, by default. -}
|
|
|
|
gitAnnexAssistantDefaultDir :: FilePath
|
|
|
|
gitAnnexAssistantDefaultDir = "annex"
|
|
|
|
|
Better sanitization of problem characters when generating URL and WORM keys.
FAT has a lot of characters it does not allow in filenames, like ? and *
It's probably the worst offender, but other filesystems also have
limitiations.
In 2011, I made keyFile escape : to handle FAT, but missed the other
characters. It also turns out that when I did that, I was also living
dangerously; any existing keys that contained a : had their object
location change. Oops.
So, adding new characters to escape to keyFile is out. Well, it would be
possible to make keyFile behave differently on a per-filesystem basis, but
this would be a real nightmare to get right. Consider that a rsync special
remote uses keyFile to determine the filenames to use, and we don't know
the underlying filesystem on the rsync server..
Instead, I have gone for a solution that is backwards compatable and
simple. Its only downside is that already generated URL and WORM keys
might not be able to be stored on FAT or some other filesystem that
dislikes a character used in the key. (In this case, the user can just
migrate the problem keys to a checksumming backend. If this became a big
problem, fsck could be made to detect these and suggest a migration.)
Going forward, new keys that are created will escape all characters that
are likely to cause problems. And if some filesystem comes along that's
even worse than FAT (seems unlikely, but here it is 2013, and people are
still using FAT!), additional characters can be added to the set that are
escaped without difficulty.
(Also, made WORM limit the part of the filename that is embedded in the key,
to deal with filesystem filename length limits. This could have already
been a problem, but is more likely now, since the escaping of the filename
can make it longer.)
This commit was sponsored by Ian Downes
2013-10-05 19:01:49 +00:00
|
|
|
{- Sanitizes a String that will be used as part of a Key's keyName,
|
2017-08-17 18:46:33 +00:00
|
|
|
- dealing with characters that cause problems.
|
Better sanitization of problem characters when generating URL and WORM keys.
FAT has a lot of characters it does not allow in filenames, like ? and *
It's probably the worst offender, but other filesystems also have
limitiations.
In 2011, I made keyFile escape : to handle FAT, but missed the other
characters. It also turns out that when I did that, I was also living
dangerously; any existing keys that contained a : had their object
location change. Oops.
So, adding new characters to escape to keyFile is out. Well, it would be
possible to make keyFile behave differently on a per-filesystem basis, but
this would be a real nightmare to get right. Consider that a rsync special
remote uses keyFile to determine the filenames to use, and we don't know
the underlying filesystem on the rsync server..
Instead, I have gone for a solution that is backwards compatable and
simple. Its only downside is that already generated URL and WORM keys
might not be able to be stored on FAT or some other filesystem that
dislikes a character used in the key. (In this case, the user can just
migrate the problem keys to a checksumming backend. If this became a big
problem, fsck could be made to detect these and suggest a migration.)
Going forward, new keys that are created will escape all characters that
are likely to cause problems. And if some filesystem comes along that's
even worse than FAT (seems unlikely, but here it is 2013, and people are
still using FAT!), additional characters can be added to the set that are
escaped without difficulty.
(Also, made WORM limit the part of the filename that is embedded in the key,
to deal with filesystem filename length limits. This could have already
been a problem, but is more likely now, since the escaping of the filename
can make it longer.)
This commit was sponsored by Ian Downes
2013-10-05 19:01:49 +00:00
|
|
|
-
|
|
|
|
- This is used when a new Key is initially being generated, eg by getKey.
|
|
|
|
- Unlike keyFile and fileKey, it does not need to be a reversable
|
2016-06-02 01:46:58 +00:00
|
|
|
- escaping. Also, it's ok to change this to add more problematic
|
Better sanitization of problem characters when generating URL and WORM keys.
FAT has a lot of characters it does not allow in filenames, like ? and *
It's probably the worst offender, but other filesystems also have
limitiations.
In 2011, I made keyFile escape : to handle FAT, but missed the other
characters. It also turns out that when I did that, I was also living
dangerously; any existing keys that contained a : had their object
location change. Oops.
So, adding new characters to escape to keyFile is out. Well, it would be
possible to make keyFile behave differently on a per-filesystem basis, but
this would be a real nightmare to get right. Consider that a rsync special
remote uses keyFile to determine the filenames to use, and we don't know
the underlying filesystem on the rsync server..
Instead, I have gone for a solution that is backwards compatable and
simple. Its only downside is that already generated URL and WORM keys
might not be able to be stored on FAT or some other filesystem that
dislikes a character used in the key. (In this case, the user can just
migrate the problem keys to a checksumming backend. If this became a big
problem, fsck could be made to detect these and suggest a migration.)
Going forward, new keys that are created will escape all characters that
are likely to cause problems. And if some filesystem comes along that's
even worse than FAT (seems unlikely, but here it is 2013, and people are
still using FAT!), additional characters can be added to the set that are
escaped without difficulty.
(Also, made WORM limit the part of the filename that is embedded in the key,
to deal with filesystem filename length limits. This could have already
been a problem, but is more likely now, since the escaping of the filename
can make it longer.)
This commit was sponsored by Ian Downes
2013-10-05 19:01:49 +00:00
|
|
|
- characters later. Unlike changing keyFile, which could result in the
|
|
|
|
- filenames used for existing keys changing and contents getting lost.
|
|
|
|
-
|
|
|
|
- It is, however, important that the input and output of this function
|
|
|
|
- have a 1:1 mapping, to avoid two different inputs from mapping to the
|
|
|
|
- same key.
|
|
|
|
-}
|
|
|
|
preSanitizeKeyName :: String -> String
|
2017-08-17 19:09:38 +00:00
|
|
|
preSanitizeKeyName = preSanitizeKeyName' False
|
|
|
|
|
|
|
|
preSanitizeKeyName' :: Bool -> String -> String
|
|
|
|
preSanitizeKeyName' resanitize = concatMap escape
|
Better sanitization of problem characters when generating URL and WORM keys.
FAT has a lot of characters it does not allow in filenames, like ? and *
It's probably the worst offender, but other filesystems also have
limitiations.
In 2011, I made keyFile escape : to handle FAT, but missed the other
characters. It also turns out that when I did that, I was also living
dangerously; any existing keys that contained a : had their object
location change. Oops.
So, adding new characters to escape to keyFile is out. Well, it would be
possible to make keyFile behave differently on a per-filesystem basis, but
this would be a real nightmare to get right. Consider that a rsync special
remote uses keyFile to determine the filenames to use, and we don't know
the underlying filesystem on the rsync server..
Instead, I have gone for a solution that is backwards compatable and
simple. Its only downside is that already generated URL and WORM keys
might not be able to be stored on FAT or some other filesystem that
dislikes a character used in the key. (In this case, the user can just
migrate the problem keys to a checksumming backend. If this became a big
problem, fsck could be made to detect these and suggest a migration.)
Going forward, new keys that are created will escape all characters that
are likely to cause problems. And if some filesystem comes along that's
even worse than FAT (seems unlikely, but here it is 2013, and people are
still using FAT!), additional characters can be added to the set that are
escaped without difficulty.
(Also, made WORM limit the part of the filename that is embedded in the key,
to deal with filesystem filename length limits. This could have already
been a problem, but is more likely now, since the escaping of the filename
can make it longer.)
This commit was sponsored by Ian Downes
2013-10-05 19:01:49 +00:00
|
|
|
where
|
2014-10-09 18:53:13 +00:00
|
|
|
escape c
|
Better sanitization of problem characters when generating URL and WORM keys.
FAT has a lot of characters it does not allow in filenames, like ? and *
It's probably the worst offender, but other filesystems also have
limitiations.
In 2011, I made keyFile escape : to handle FAT, but missed the other
characters. It also turns out that when I did that, I was also living
dangerously; any existing keys that contained a : had their object
location change. Oops.
So, adding new characters to escape to keyFile is out. Well, it would be
possible to make keyFile behave differently on a per-filesystem basis, but
this would be a real nightmare to get right. Consider that a rsync special
remote uses keyFile to determine the filenames to use, and we don't know
the underlying filesystem on the rsync server..
Instead, I have gone for a solution that is backwards compatable and
simple. Its only downside is that already generated URL and WORM keys
might not be able to be stored on FAT or some other filesystem that
dislikes a character used in the key. (In this case, the user can just
migrate the problem keys to a checksumming backend. If this became a big
problem, fsck could be made to detect these and suggest a migration.)
Going forward, new keys that are created will escape all characters that
are likely to cause problems. And if some filesystem comes along that's
even worse than FAT (seems unlikely, but here it is 2013, and people are
still using FAT!), additional characters can be added to the set that are
escaped without difficulty.
(Also, made WORM limit the part of the filename that is embedded in the key,
to deal with filesystem filename length limits. This could have already
been a problem, but is more likely now, since the escaping of the filename
can make it longer.)
This commit was sponsored by Ian Downes
2013-10-05 19:01:49 +00:00
|
|
|
| isAsciiUpper c || isAsciiLower c || isDigit c = [c]
|
2019-01-14 18:02:47 +00:00
|
|
|
| c `elem` ['.', '-', '_'] = [c] -- common, assumed safe
|
|
|
|
| c `elem` ['/', '%', ':'] = [c] -- handled by keyFile
|
Better sanitization of problem characters when generating URL and WORM keys.
FAT has a lot of characters it does not allow in filenames, like ? and *
It's probably the worst offender, but other filesystems also have
limitiations.
In 2011, I made keyFile escape : to handle FAT, but missed the other
characters. It also turns out that when I did that, I was also living
dangerously; any existing keys that contained a : had their object
location change. Oops.
So, adding new characters to escape to keyFile is out. Well, it would be
possible to make keyFile behave differently on a per-filesystem basis, but
this would be a real nightmare to get right. Consider that a rsync special
remote uses keyFile to determine the filenames to use, and we don't know
the underlying filesystem on the rsync server..
Instead, I have gone for a solution that is backwards compatable and
simple. Its only downside is that already generated URL and WORM keys
might not be able to be stored on FAT or some other filesystem that
dislikes a character used in the key. (In this case, the user can just
migrate the problem keys to a checksumming backend. If this became a big
problem, fsck could be made to detect these and suggest a migration.)
Going forward, new keys that are created will escape all characters that
are likely to cause problems. And if some filesystem comes along that's
even worse than FAT (seems unlikely, but here it is 2013, and people are
still using FAT!), additional characters can be added to the set that are
escaped without difficulty.
(Also, made WORM limit the part of the filename that is embedded in the key,
to deal with filesystem filename length limits. This could have already
been a problem, but is more likely now, since the escaping of the filename
can make it longer.)
This commit was sponsored by Ian Downes
2013-10-05 19:01:49 +00:00
|
|
|
-- , is safe and uncommon, so will be used to escape
|
|
|
|
-- other characters. By itself, it is escaped to
|
|
|
|
-- doubled form.
|
2017-08-17 19:09:38 +00:00
|
|
|
| c == ',' = if not resanitize
|
|
|
|
then ",,"
|
|
|
|
else ","
|
2014-02-11 05:35:11 +00:00
|
|
|
| otherwise = ',' : show (ord c)
|
Better sanitization of problem characters when generating URL and WORM keys.
FAT has a lot of characters it does not allow in filenames, like ? and *
It's probably the worst offender, but other filesystems also have
limitiations.
In 2011, I made keyFile escape : to handle FAT, but missed the other
characters. It also turns out that when I did that, I was also living
dangerously; any existing keys that contained a : had their object
location change. Oops.
So, adding new characters to escape to keyFile is out. Well, it would be
possible to make keyFile behave differently on a per-filesystem basis, but
this would be a real nightmare to get right. Consider that a rsync special
remote uses keyFile to determine the filenames to use, and we don't know
the underlying filesystem on the rsync server..
Instead, I have gone for a solution that is backwards compatable and
simple. Its only downside is that already generated URL and WORM keys
might not be able to be stored on FAT or some other filesystem that
dislikes a character used in the key. (In this case, the user can just
migrate the problem keys to a checksumming backend. If this became a big
problem, fsck could be made to detect these and suggest a migration.)
Going forward, new keys that are created will escape all characters that
are likely to cause problems. And if some filesystem comes along that's
even worse than FAT (seems unlikely, but here it is 2013, and people are
still using FAT!), additional characters can be added to the set that are
escaped without difficulty.
(Also, made WORM limit the part of the filename that is embedded in the key,
to deal with filesystem filename length limits. This could have already
been a problem, but is more likely now, since the escaping of the filename
can make it longer.)
This commit was sponsored by Ian Downes
2013-10-05 19:01:49 +00:00
|
|
|
|
2017-08-17 19:09:38 +00:00
|
|
|
{- Converts a keyName that has been santizied with an old version of
|
|
|
|
- preSanitizeKeyName to be sanitized with the new version. -}
|
|
|
|
reSanitizeKeyName :: String -> String
|
|
|
|
reSanitizeKeyName = preSanitizeKeyName' True
|
|
|
|
|
2011-12-02 18:39:47 +00:00
|
|
|
{- Converts a key into a filename fragment without any directory.
|
2010-10-13 07:41:12 +00:00
|
|
|
-
|
|
|
|
- Escape "/" in the key name, to keep a flat tree of files and avoid
|
|
|
|
- issues with keys containing "/../" or ending with "/" etc.
|
|
|
|
-
|
|
|
|
- "/" is escaped to "%" because it's short and rarely used, and resembles
|
|
|
|
- a slash
|
|
|
|
- "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping
|
|
|
|
- is one to one.
|
Better sanitization of problem characters when generating URL and WORM keys.
FAT has a lot of characters it does not allow in filenames, like ? and *
It's probably the worst offender, but other filesystems also have
limitiations.
In 2011, I made keyFile escape : to handle FAT, but missed the other
characters. It also turns out that when I did that, I was also living
dangerously; any existing keys that contained a : had their object
location change. Oops.
So, adding new characters to escape to keyFile is out. Well, it would be
possible to make keyFile behave differently on a per-filesystem basis, but
this would be a real nightmare to get right. Consider that a rsync special
remote uses keyFile to determine the filenames to use, and we don't know
the underlying filesystem on the rsync server..
Instead, I have gone for a solution that is backwards compatable and
simple. Its only downside is that already generated URL and WORM keys
might not be able to be stored on FAT or some other filesystem that
dislikes a character used in the key. (In this case, the user can just
migrate the problem keys to a checksumming backend. If this became a big
problem, fsck could be made to detect these and suggest a migration.)
Going forward, new keys that are created will escape all characters that
are likely to cause problems. And if some filesystem comes along that's
even worse than FAT (seems unlikely, but here it is 2013, and people are
still using FAT!), additional characters can be added to the set that are
escaped without difficulty.
(Also, made WORM limit the part of the filename that is embedded in the key,
to deal with filesystem filename length limits. This could have already
been a problem, but is more likely now, since the escaping of the filename
can make it longer.)
This commit was sponsored by Ian Downes
2013-10-05 19:01:49 +00:00
|
|
|
- ":" is escaped to "&c", because it seemed like a good idea at the time.
|
|
|
|
-
|
|
|
|
- Changing what this function escapes and how is not a good idea, as it
|
|
|
|
- can cause existing objects to get lost.
|
2011-10-16 04:04:26 +00:00
|
|
|
-}
|
2010-10-13 07:41:12 +00:00
|
|
|
keyFile :: Key -> FilePath
|
2019-01-14 18:02:47 +00:00
|
|
|
keyFile = fromRawFilePath . keyFile'
|
|
|
|
|
|
|
|
keyFile' :: Key -> RawFilePath
|
2019-01-15 00:52:54 +00:00
|
|
|
keyFile' k =
|
|
|
|
let b = serializeKey' k
|
|
|
|
in if any (`S8.elem` b) ['&', '%', ':', '/']
|
|
|
|
then S8.concatMap esc b
|
|
|
|
else b
|
2016-09-26 20:47:59 +00:00
|
|
|
where
|
|
|
|
esc '&' = "&a"
|
|
|
|
esc '%' = "&s"
|
|
|
|
esc ':' = "&c"
|
|
|
|
esc '/' = "%"
|
2019-01-14 18:02:47 +00:00
|
|
|
esc c = S8.singleton c
|
2010-10-13 07:41:12 +00:00
|
|
|
|
2013-10-05 17:49:45 +00:00
|
|
|
{- Reverses keyFile, converting a filename fragment (ie, the basename of
|
|
|
|
- the symlink target) into a key. -}
|
|
|
|
fileKey :: FilePath -> Maybe Key
|
2019-01-14 18:02:47 +00:00
|
|
|
fileKey = fileKey' . toRawFilePath
|
|
|
|
|
|
|
|
fileKey' :: RawFilePath -> Maybe Key
|
|
|
|
fileKey' = deserializeKey' . S8.intercalate "/" . map go . S8.split '%'
|
2016-09-26 20:47:59 +00:00
|
|
|
where
|
2019-01-15 00:59:09 +00:00
|
|
|
go = S8.concat . unescafterfirst . S8.split '&'
|
|
|
|
unescafterfirst [] = []
|
|
|
|
unescafterfirst (b:bs) = b : map (unesc . S8.uncons) bs
|
2019-01-14 18:02:47 +00:00
|
|
|
unesc :: Maybe (Char, S8.ByteString) -> S8.ByteString
|
|
|
|
unesc Nothing = mempty
|
|
|
|
unesc (Just ('c', b)) = S8.cons ':' b
|
|
|
|
unesc (Just ('s', b)) = S8.cons '%' b
|
|
|
|
unesc (Just ('a', b)) = S8.cons '&' b
|
|
|
|
unesc (Just (c, b)) = S8.cons c b
|
2013-10-05 17:49:45 +00:00
|
|
|
|
2015-01-28 20:51:40 +00:00
|
|
|
{- A location to store a key on a special remote that uses a filesystem.
|
|
|
|
- A directory hash is used, to protect against filesystems that dislike
|
|
|
|
- having many items in a single directory.
|
2011-12-02 18:39:47 +00:00
|
|
|
-
|
|
|
|
- The file is put in a directory with the same name, this allows
|
|
|
|
- write-protecting the directory to avoid accidental deletion of the file.
|
|
|
|
-}
|
|
|
|
keyPath :: Key -> Hasher -> FilePath
|
|
|
|
keyPath key hasher = hasher key </> f </> f
|
2012-10-29 01:27:15 +00:00
|
|
|
where
|
|
|
|
f = keyFile key
|
2011-12-02 18:39:47 +00:00
|
|
|
|
2015-01-28 20:51:40 +00:00
|
|
|
{- All possibile locations to store a key in a special remote
|
|
|
|
- using different directory hashes.
|
|
|
|
-
|
|
|
|
- This is compatible with the annexLocations, for interoperability between
|
|
|
|
- special remotes and git-annex repos.
|
2011-03-15 21:47:00 +00:00
|
|
|
-}
|
2015-01-28 20:51:40 +00:00
|
|
|
keyPaths :: Key -> [FilePath]
|
2015-01-28 22:01:54 +00:00
|
|
|
keyPaths key = map (\h -> keyPath key (h def)) dirHashes
|