git-annex/Upgrade/V1.hs

242 lines
6.9 KiB
Haskell
Raw Normal View History

{- git-annex v1 -> v2 upgrade support
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Upgrade.V1 where
import System.Posix.Types
import Data.Char
2011-10-05 20:02:51 +00:00
import Common.Annex
import Types.Key
2011-10-04 04:40:47 +00:00
import Annex.Content
2011-10-15 20:21:08 +00:00
import Logs.Presence
2011-10-04 04:40:47 +00:00
import qualified Annex.Queue
import qualified Git
import qualified Git.LsFiles as LsFiles
import Backend
2011-10-04 04:40:47 +00:00
import Annex.Version
2011-09-23 22:13:24 +00:00
import Utility.FileMode
2013-05-12 23:19:28 +00:00
import Utility.Tmp
import qualified Upgrade.V2
2011-03-16 15:53:46 +00:00
-- v2 adds hashing of filenames of content and location log files.
-- Key information is encoded in filenames differently, so
-- both content and location log files move around, and symlinks
-- to content need to be changed.
--
-- When upgrading a v1 key to v2, file size metadata ought to be
-- added to the key (unless it is a WORM key, which encoded
-- mtime:size in v1). This can only be done when the file content
-- is present. Since upgrades need to happen consistently,
-- (so that two repos get changed the same way by the upgrade, and
-- will merge), that metadata cannot be added on upgrade.
--
-- Note that file size metadata
-- will only be used for detecting situations where git-annex
-- would run out of disk space, so if some keys don't have it,
-- the impact is minor. At least initially. It could be used in the
-- future by smart auto-repo balancing code, etc.
--
-- Anyway, since v2 plans ahead for other metadata being included
-- in keys, there should probably be a way to update a key.
-- Something similar to the migrate subcommand could be used,
-- and users could then run that at their leisure.
upgrade :: Annex Bool
upgrade = do
showAction "v1 to v2"
2012-03-16 05:59:07 +00:00
ifM (fromRepo Git.repoIsLocalBare)
( do
2011-03-16 17:16:52 +00:00
moveContent
setVersion supportedVersion
2012-03-16 05:59:07 +00:00
, do
2011-03-16 17:16:52 +00:00
moveContent
updateSymlinks
moveLocationLogs
Annex.Queue.flush
setVersion supportedVersion
2012-03-16 05:59:07 +00:00
)
Upgrade.V2.upgrade
moveContent :: Annex ()
moveContent = do
showAction "moving content"
files <- getKeyFilesPresent1
forM_ files move
2012-11-11 04:51:07 +00:00
where
move f = do
let k = fileKey1 (takeFileName f)
let d = parentDir f
liftIO $ allowWrite d
liftIO $ allowWrite f
moveAnnex k f
liftIO $ removeDirectory d
updateSymlinks :: Annex ()
updateSymlinks = do
showAction "updating symlinks"
Clean up handling of git directory and git worktree. Baked into the code was an assumption that a repository's git directory could be determined by adding ".git" to its work tree (or nothing for bare repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are used to separate the two. This was attacked at the type level, by storing the gitdir and worktree separately, so Nothing for the worktree means a bare repo. A complication arose because we don't learn where a repository is bare until its configuration is read. So another Location type handles repositories that have not had their config read yet. I am not entirely happy with this being a Location type, rather than representing them entirely separate from the Git type. The new code is not worse than the old, but better types could enforce more safety. Added support for core.worktree. Overriding it with -c isn't supported because it's not really clear what to do if a git repo's config is read, is not bare, and is then overridden to bare. What is the right git directory in this case? I will worry about this if/when someone has a use case for overriding core.worktree with -c. (See Git.Config.updateLocation) Also removed and renamed some functions like gitDir and workTree that misused git's terminology. One minor regression is known: git annex add in a bare repository does not print a nice error message, but runs git ls-files in a way that fails earlier with a less nice error message. This is because before --work-tree was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
top <- fromRepo Git.repoPath
(files, cleanup) <- inRepo $ LsFiles.inRepo [top]
forM_ files fixlink
void $ liftIO cleanup
2012-11-11 04:51:07 +00:00
where
fixlink f = do
r <- lookupFile1 f
case r of
Nothing -> noop
Just (k, _) -> do
link <- inRepo $ gitAnnexLink f k
2012-11-11 04:51:07 +00:00
liftIO $ removeFile f
liftIO $ createSymbolicLink link f
Annex.Queue.addCommand "add" [Param "--"] [f]
moveLocationLogs :: Annex ()
moveLocationLogs = do
showAction "moving location logs"
2011-03-16 14:56:59 +00:00
logkeys <- oldlocationlogs
forM_ logkeys move
2012-11-11 04:51:07 +00:00
where
oldlocationlogs = do
dir <- fromRepo Upgrade.V2.gitStateDir
ifM (liftIO $ doesDirectoryExist dir)
( mapMaybe oldlog2key
2013-09-26 03:19:01 +00:00
<$> liftIO (getDirectoryContents dir)
2012-11-11 04:51:07 +00:00
, return []
)
move (l, k) = do
dest <- fromRepo $ logFile2 k
dir <- fromRepo Upgrade.V2.gitStateDir
let f = dir </> l
liftIO $ createDirectoryIfMissing True (parentDir dest)
-- could just git mv, but this way deals with
-- log files that are not checked into git,
-- as well as merging with already upgraded
-- logs that have been pulled from elsewhere
old <- liftIO $ readLog1 f
new <- liftIO $ readLog1 dest
liftIO $ writeLog1 dest (old++new)
Annex.Queue.addCommand "add" [Param "--"] [dest]
Annex.Queue.addCommand "add" [Param "--"] [f]
Annex.Queue.addCommand "rm" [Param "--quiet", Param "-f", Param "--"] [f]
2011-03-16 14:56:59 +00:00
oldlog2key :: FilePath -> Maybe (FilePath, Key)
2012-03-16 05:59:07 +00:00
oldlog2key l
| drop len l == ".log" && sane = Just (l, k)
| otherwise = Nothing
2012-11-11 04:51:07 +00:00
where
len = length l - 4
k = readKey1 (take len l)
sane = (not . null $ keyName k) && (not . null $ keyBackendName k)
-- WORM backend keys: "WORM:mtime:size:filename"
-- all the rest: "backend:key"
--
-- If the file looks like "WORM:XXX-...", then it was created by mixing
-- v2 and v1; that infelicity is worked around by treating the value
-- as the v2 key that it is.
readKey1 :: String -> Key
2012-03-16 05:59:07 +00:00
readKey1 v
| mixup = fromJust $ file2key $ intercalate ":" $ Prelude.tail bits
| otherwise = stubKey
2012-03-16 05:59:07 +00:00
{ keyName = n
, keyBackendName = b
, keySize = s
, keyMtime = t
}
2012-11-11 04:51:07 +00:00
where
bits = split ":" v
b = Prelude.head bits
n = intercalate ":" $ drop (if wormy then 3 else 1) bits
2012-11-11 04:51:07 +00:00
t = if wormy
then Just (Prelude.read (bits !! 1) :: EpochTime)
else Nothing
s = if wormy
then Just (Prelude.read (bits !! 2) :: Integer)
else Nothing
wormy = Prelude.head bits == "WORM"
mixup = wormy && isUpper (Prelude.head $ bits !! 1)
showKey1 :: Key -> String
showKey1 Key { keyName = n , keyBackendName = b, keySize = s, keyMtime = t } =
intercalate ":" $ filter (not . null) [b, showifhere t, showifhere s, n]
2012-11-11 04:51:07 +00:00
where
showifhere Nothing = ""
showifhere (Just v) = show v
keyFile1 :: Key -> FilePath
keyFile1 key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ showKey1 key
fileKey1 :: FilePath -> Key
fileKey1 file = readKey1 $
replace "&a" "&" $ replace "&s" "%" $ replace "%" "/" file
writeLog1 :: FilePath -> [LogLine] -> IO ()
writeLog1 file ls = viaTmp writeFile file (showLog ls)
readLog1 :: FilePath -> IO [LogLine]
2012-09-17 04:18:07 +00:00
readLog1 file = catchDefaultIO [] $
parseLog <$> readFileStrict file
2011-12-31 08:11:39 +00:00
lookupFile1 :: FilePath -> Annex (Maybe (Key, Backend))
lookupFile1 file = do
tl <- liftIO $ tryIO getsymlink
case tl of
Left _ -> return Nothing
Right l -> makekey l
2012-11-11 04:51:07 +00:00
where
getsymlink = takeFileName <$> readSymbolicLink file
makekey l = case maybeLookupBackendName bname of
Nothing -> do
unless (null kname || null bname ||
not (isLinkToAnnex l)) $
warning skip
return Nothing
Just backend -> return $ Just (k, backend)
where
k = fileKey1 l
bname = keyBackendName k
kname = keyName k
skip = "skipping " ++ file ++
" (unknown backend " ++ bname ++ ")"
getKeyFilesPresent1 :: Annex [FilePath]
getKeyFilesPresent1 = getKeyFilesPresent1' =<< fromRepo gitAnnexObjectDir
getKeyFilesPresent1' :: FilePath -> Annex [FilePath]
2012-03-16 05:59:07 +00:00
getKeyFilesPresent1' dir =
ifM (liftIO $ doesDirectoryExist dir)
( do
dirs <- liftIO $ getDirectoryContents dir
let files = map (\d -> dir ++ "/" ++ d ++ "/" ++ takeFileName d) dirs
liftIO $ filterM present files
2012-03-16 05:59:07 +00:00
, return []
)
2012-11-11 04:51:07 +00:00
where
present f = do
result <- tryIO $ getFileStatus f
case result of
Right s -> return $ isRegularFile s
Left _ -> return False
logFile1 :: Git.Repo -> Key -> String
logFile1 repo key = Upgrade.V2.gitStateDir repo ++ keyFile1 key ++ ".log"
logFile2 :: Key -> Git.Repo -> String
logFile2 = logFile' hashDirLower
logFile' :: (Key -> FilePath) -> Key -> Git.Repo -> String
logFile' hasher key repo =
gitStateDir repo ++ hasher key ++ keyFile key ++ ".log"
stateDir :: FilePath
2011-09-21 03:24:48 +00:00
stateDir = addTrailingPathSeparator ".git-annex"
gitStateDir :: Git.Repo -> FilePath
Clean up handling of git directory and git worktree. Baked into the code was an assumption that a repository's git directory could be determined by adding ".git" to its work tree (or nothing for bare repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are used to separate the two. This was attacked at the type level, by storing the gitdir and worktree separately, so Nothing for the worktree means a bare repo. A complication arose because we don't learn where a repository is bare until its configuration is read. So another Location type handles repositories that have not had their config read yet. I am not entirely happy with this being a Location type, rather than representing them entirely separate from the Git type. The new code is not worse than the old, but better types could enforce more safety. Added support for core.worktree. Overriding it with -c isn't supported because it's not really clear what to do if a git repo's config is read, is not bare, and is then overridden to bare. What is the right git directory in this case? I will worry about this if/when someone has a use case for overriding core.worktree with -c. (See Git.Config.updateLocation) Also removed and renamed some functions like gitDir and workTree that misused git's terminology. One minor regression is known: git annex add in a bare repository does not print a nice error message, but runs git ls-files in a way that fails earlier with a less nice error message. This is because before --work-tree was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
gitStateDir repo = addTrailingPathSeparator $ Git.repoPath repo </> stateDir