git-annex/Locations.hs

228 lines
7.6 KiB
Haskell
Raw Normal View History

{- git-annex file locations
2010-10-27 20:53:54 +00:00
-
2011-12-02 18:39:47 +00:00
- Copyright 2010-2011 Joey Hess <joey@kitenet.net>
2010-10-27 20:53:54 +00:00
-
- Licensed under the GNU GPL version 3 or higher.
-}
2010-10-11 21:52:46 +00:00
module Locations (
2010-10-13 00:04:36 +00:00
keyFile,
2010-10-13 07:41:12 +00:00
fileKey,
2011-12-02 18:39:47 +00:00
keyPaths,
gitAnnexLocation,
annexLocations,
gitAnnexDir,
gitAnnexObjectDir,
gitAnnexTmpDir,
gitAnnexTmpLocation,
gitAnnexBadDir,
gitAnnexBadLocation,
gitAnnexUnusedLog,
gitAnnexJournalDir,
gitAnnexJournalLock,
2011-12-11 18:14:28 +00:00
gitAnnexIndex,
gitAnnexIndexLock,
isLinkToAnnex,
annexHashes,
hashDirMixed,
2011-06-22 21:51:48 +00:00
hashDirLower,
2010-11-08 20:47:36 +00:00
prop_idempotent_fileKey
2010-10-11 21:52:46 +00:00
) where
import Data.Bits
import Data.Word
2011-03-15 21:47:00 +00:00
import Data.Hash.MD5
2010-10-16 20:20:49 +00:00
import Common
2010-10-14 07:18:11 +00:00
import Types
import Types.Key
import qualified Git
{- Conventions:
-
- Functions ending in "Dir" should always return values ending with a
- trailing path separator. Most code does not rely on that, but a few
- things do.
-
- Everything else should not end in a trailing path sepatator.
-
- Only functions (with names starting with "git") that build a path
- based on a git repository should return an absolute path.
- Everything else should use relative paths.
-}
{- The directory git annex uses for local state, relative to the .git
- directory -}
annexDir :: FilePath
annexDir = addTrailingPathSeparator "annex"
{- The directory git annex uses for locally available object content,
- relative to the .git directory -}
objectDir :: FilePath
objectDir = addTrailingPathSeparator $ annexDir </> "objects"
2010-10-13 05:04:06 +00:00
{- Annexed file's possible locations relative to the .git directory.
- There are two different possibilities, using different hashes. -}
annexLocations :: Key -> [FilePath]
2011-11-29 03:20:31 +00:00
annexLocations key = map (annexLocation key) annexHashes
2011-12-02 18:39:47 +00:00
annexLocation :: Key -> Hasher -> FilePath
annexLocation key hasher = objectDir </> keyPath key hasher
2010-10-13 07:41:12 +00:00
{- Annexed file's absolute location in a repository.
-
- When there are multiple possible locations, returns the one where the
- file is actually present.
-
- When the file is not present, returns the location where the file should
- be stored.
-}
gitAnnexLocation :: Key -> Git.Repo -> IO FilePath
gitAnnexLocation key r
| Git.repoIsLocalBare r =
{- Bare repositories default to hashDirLower for new
- content, as it's more portable. -}
check (map inrepo $ annexLocations key)
| otherwise =
{- Non-bare repositories only use hashDirMixed, so
- don't need to do any work to check if the file is
- present. -}
return $ inrepo ".git" </> annexLocation key hashDirMixed
where
2011-12-11 00:53:42 +00:00
inrepo d = Git.workTree r </> d
check locs = fromMaybe (head locs) <$> firstM doesFileExist locs
{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> FilePath
gitAnnexDir r
| Git.repoIsLocalBare r = addTrailingPathSeparator $ Git.workTree r </> annexDir
| otherwise = addTrailingPathSeparator $ Git.workTree r </> ".git" </> annexDir
2010-11-07 21:36:24 +00:00
{- The part of the annex directory where file contents are stored. -}
gitAnnexObjectDir :: Git.Repo -> FilePath
gitAnnexObjectDir r
| Git.repoIsLocalBare r = addTrailingPathSeparator $ Git.workTree r </> objectDir
| otherwise = addTrailingPathSeparator $ Git.workTree r </> ".git" </> objectDir
2010-11-08 19:14:54 +00:00
2011-06-21 18:44:56 +00:00
{- .git/annex/tmp/ is used for temp files -}
gitAnnexTmpDir :: Git.Repo -> FilePath
gitAnnexTmpDir r = addTrailingPathSeparator $ gitAnnexDir r </> "tmp"
2010-10-17 20:39:30 +00:00
{- The temp file to use for a given key. -}
gitAnnexTmpLocation :: Key -> Git.Repo -> FilePath
gitAnnexTmpLocation key r = gitAnnexTmpDir r </> keyFile key
2011-06-21 18:44:56 +00:00
{- .git/annex/bad/ is used for bad files found during fsck -}
gitAnnexBadDir :: Git.Repo -> FilePath
gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad"
{- The bad file to use for a given key. -}
gitAnnexBadLocation :: Key -> Git.Repo -> FilePath
gitAnnexBadLocation key r = gitAnnexBadDir r </> keyFile key
{- .git/annex/*unused is used to number possibly unused keys -}
gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused")
2010-11-15 22:04:19 +00:00
{- .git/annex/journal/ is used to journal changes made to the git-annex
- branch -}
gitAnnexJournalDir :: Git.Repo -> FilePath
gitAnnexJournalDir r = addTrailingPathSeparator $ gitAnnexDir r </> "journal"
{- Lock file for the journal. -}
gitAnnexJournalLock :: Git.Repo -> FilePath
gitAnnexJournalLock r = gitAnnexDir r </> "journal.lck"
2011-12-11 18:14:28 +00:00
{- .git/annex/index is used to stage changes to the git-annex branch -}
gitAnnexIndex :: Git.Repo -> FilePath
gitAnnexIndex r = gitAnnexDir r </> "index"
{- Lock file for .git/annex/index. -}
gitAnnexIndexLock :: Git.Repo -> FilePath
gitAnnexIndexLock r = gitAnnexDir r </> "index.lck"
{- Checks a symlink target to see if it appears to point to annexed content. -}
isLinkToAnnex :: FilePath -> Bool
isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s
2011-12-02 18:39:47 +00:00
{- Converts a key into a filename fragment without any directory.
2010-10-13 07:41:12 +00:00
-
- Escape "/" in the key name, to keep a flat tree of files and avoid
- issues with keys containing "/../" or ending with "/" etc.
-
- "/" is escaped to "%" because it's short and rarely used, and resembles
- a slash
- "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping
- is one to one.
2011-03-16 03:39:04 +00:00
- ":" is escaped to "&c", because despite it being 2011, people still care
- about FAT.
2011-10-16 04:04:26 +00:00
-}
2010-10-13 07:41:12 +00:00
keyFile :: Key -> FilePath
2011-03-16 03:39:04 +00:00
keyFile key = replace "/" "%" $ replace ":" "&c" $
replace "%" "&s" $ replace "&" "&a" $ show key
2010-10-13 07:41:12 +00:00
2011-12-02 18:39:47 +00:00
{- A location to store a key on the filesystem. A directory hash is used,
- to protect against filesystems that dislike having many items in a
- single directory.
-
- The file is put in a directory with the same name, this allows
- write-protecting the directory to avoid accidental deletion of the file.
-}
keyPath :: Key -> Hasher -> FilePath
keyPath key hasher = hasher key </> f </> f
where
f = keyFile key
{- All possibile locations to store a key using different directory hashes. -}
keyPaths :: Key -> [FilePath]
keyPaths key = map (keyPath key) annexHashes
2010-10-14 03:31:08 +00:00
{- Reverses keyFile, converting a filename fragment (ie, the basename of
- the symlink target) into a key. -}
fileKey :: FilePath -> Maybe Key
fileKey file = readKey $
2011-03-16 03:39:04 +00:00
replace "&a" "&" $ replace "&s" "%" $
replace "&c" ":" $ replace "%" "/" file
2010-11-08 20:47:36 +00:00
{- for quickcheck -}
prop_idempotent_fileKey :: String -> Bool
prop_idempotent_fileKey s = Just k == fileKey (keyFile k)
where k = stubKey { keyName = s, keyBackendName = "test" }
2011-03-15 21:47:00 +00:00
2011-12-02 18:39:47 +00:00
{- Two different directory hashes may be used. The mixed case hash
- came first, and is fine, except for the problem of case-strict
- filesystems such as Linux VFAT (mounted with shortname=mixed),
- which do not allow using a directory "XX" when "xx" already exists.
- To support that, most repositories use the lower case hash for new data. -}
2011-12-02 18:39:47 +00:00
type Hasher = Key -> FilePath
annexHashes :: [Hasher]
annexHashes = [hashDirLower, hashDirMixed]
2011-12-02 18:39:47 +00:00
hashDirMixed :: Hasher
hashDirMixed k = addTrailingPathSeparator $ take 2 dir </> drop 2 dir
where
2011-05-16 18:49:28 +00:00
dir = take 4 $ display_32bits_as_dir =<< [a,b,c,d]
ABCD (a,b,c,d) = md5 $ Str $ show k
2011-03-15 21:47:00 +00:00
2011-12-02 18:39:47 +00:00
hashDirLower :: Hasher
hashDirLower k = addTrailingPathSeparator $ take 3 dir </> drop 3 dir
where
dir = take 6 $ md5s $ Str $ show k
2011-03-15 21:47:00 +00:00
{- modified version of display_32bits_as_hex from Data.Hash.MD5
- Copyright (C) 2001 Ian Lynagh
- License: Either BSD or GPL
-}
display_32bits_as_dir :: Word32 -> String
display_32bits_as_dir w = trim $ swap_pairs cs
where
-- Need 32 characters to use. To avoid inaverdently making
2011-03-16 06:50:13 +00:00
-- a real word, use letters that appear less frequently.
chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF"
2011-03-15 21:47:00 +00:00
cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
2011-07-15 16:47:14 +00:00
getc n = chars !! fromIntegral n
2011-03-15 21:47:00 +00:00
swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs
swap_pairs _ = []
-- Last 2 will always be 00, so omit.
2011-07-15 16:47:14 +00:00
trim = take 6