2011-03-16 05:23:20 +00:00
|
|
|
{- git-annex v1 -> v2 upgrade support
|
|
|
|
-
|
2024-07-30 15:27:20 +00:00
|
|
|
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
|
2011-03-16 05:23:20 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2011-03-16 05:23:20 +00:00
|
|
|
-}
|
|
|
|
|
2023-04-10 21:03:41 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
2011-03-16 05:23:20 +00:00
|
|
|
module Upgrade.V1 where
|
|
|
|
|
|
|
|
import System.Posix.Types
|
2011-03-28 13:27:28 +00:00
|
|
|
import Data.Char
|
2015-01-28 19:55:17 +00:00
|
|
|
import Data.Default
|
2019-01-03 17:21:48 +00:00
|
|
|
import Data.ByteString.Builder
|
2019-01-11 20:34:04 +00:00
|
|
|
import qualified Data.ByteString as S
|
2021-10-06 00:20:08 +00:00
|
|
|
import qualified Data.ByteString.Short as S (toShort, fromShort)
|
2019-01-03 17:21:48 +00:00
|
|
|
import qualified Data.ByteString.Lazy as L
|
2020-11-06 18:10:58 +00:00
|
|
|
import qualified System.FilePath.ByteString as P
|
2023-03-01 19:55:58 +00:00
|
|
|
import System.PosixCompat.Files (isRegularFile)
|
2024-07-30 15:27:20 +00:00
|
|
|
import Text.Read
|
2011-03-16 05:23:20 +00:00
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2022-01-19 17:06:31 +00:00
|
|
|
import Types.Upgrade
|
2011-10-04 04:40:47 +00:00
|
|
|
import Annex.Content
|
2016-02-15 20:12:18 +00:00
|
|
|
import Annex.Link
|
2020-03-06 15:57:15 +00:00
|
|
|
import Annex.Perms
|
2017-02-24 19:16:56 +00:00
|
|
|
import Types.Key
|
2011-10-15 20:21:08 +00:00
|
|
|
import Logs.Presence
|
2011-10-04 04:40:47 +00:00
|
|
|
import qualified Annex.Queue
|
2011-06-30 17:16:57 +00:00
|
|
|
import qualified Git
|
|
|
|
import qualified Git.LsFiles as LsFiles
|
2011-03-16 05:23:20 +00:00
|
|
|
import Backend
|
2011-09-23 22:13:24 +00:00
|
|
|
import Utility.FileMode
|
2013-05-12 23:19:28 +00:00
|
|
|
import Utility.Tmp
|
2011-06-23 06:30:20 +00:00
|
|
|
import qualified Upgrade.V2
|
2023-03-01 19:55:58 +00:00
|
|
|
import qualified Utility.RawFilePath as R
|
2011-03-16 15:53:46 +00:00
|
|
|
|
2011-03-16 06:35:48 +00:00
|
|
|
-- v2 adds hashing of filenames of content and location log files.
|
|
|
|
-- Key information is encoded in filenames differently, so
|
|
|
|
-- both content and location log files move around, and symlinks
|
|
|
|
-- to content need to be changed.
|
|
|
|
--
|
|
|
|
-- When upgrading a v1 key to v2, file size metadata ought to be
|
|
|
|
-- added to the key (unless it is a WORM key, which encoded
|
|
|
|
-- mtime:size in v1). This can only be done when the file content
|
|
|
|
-- is present. Since upgrades need to happen consistently,
|
|
|
|
-- (so that two repos get changed the same way by the upgrade, and
|
|
|
|
-- will merge), that metadata cannot be added on upgrade.
|
|
|
|
--
|
|
|
|
-- Note that file size metadata
|
|
|
|
-- will only be used for detecting situations where git-annex
|
|
|
|
-- would run out of disk space, so if some keys don't have it,
|
|
|
|
-- the impact is minor. At least initially. It could be used in the
|
|
|
|
-- future by smart auto-repo balancing code, etc.
|
|
|
|
--
|
|
|
|
-- Anyway, since v2 plans ahead for other metadata being included
|
|
|
|
-- in keys, there should probably be a way to update a key.
|
|
|
|
-- Something similar to the migrate subcommand could be used,
|
|
|
|
-- and users could then run that at their leisure.
|
2011-03-16 05:23:20 +00:00
|
|
|
|
2022-01-19 17:06:31 +00:00
|
|
|
upgrade :: Annex UpgradeResult
|
2011-03-16 05:23:20 +00:00
|
|
|
upgrade = do
|
2011-07-19 18:07:23 +00:00
|
|
|
showAction "v1 to v2"
|
2011-11-08 19:34:10 +00:00
|
|
|
|
2012-03-16 05:59:07 +00:00
|
|
|
ifM (fromRepo Git.repoIsLocalBare)
|
2016-10-05 20:23:09 +00:00
|
|
|
( moveContent
|
2012-03-16 05:59:07 +00:00
|
|
|
, do
|
2011-03-16 17:16:52 +00:00
|
|
|
moveContent
|
|
|
|
updateSymlinks
|
|
|
|
moveLocationLogs
|
|
|
|
|
2012-04-27 17:23:52 +00:00
|
|
|
Annex.Queue.flush
|
2012-03-16 05:59:07 +00:00
|
|
|
)
|
2011-06-23 06:30:20 +00:00
|
|
|
|
|
|
|
Upgrade.V2.upgrade
|
2011-03-16 05:23:20 +00:00
|
|
|
|
2011-03-16 06:35:48 +00:00
|
|
|
moveContent :: Annex ()
|
|
|
|
moveContent = do
|
2011-07-19 18:07:23 +00:00
|
|
|
showAction "moving content"
|
2011-03-28 13:27:28 +00:00
|
|
|
files <- getKeyFilesPresent1
|
|
|
|
forM_ files move
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
move f = do
|
2020-11-06 18:10:58 +00:00
|
|
|
let f' = toRawFilePath f
|
|
|
|
let k = fileKey1 (fromRawFilePath (P.takeFileName f'))
|
|
|
|
let d = parentDir f'
|
2012-11-11 04:51:07 +00:00
|
|
|
liftIO $ allowWrite d
|
2020-11-06 18:10:58 +00:00
|
|
|
liftIO $ allowWrite f'
|
2020-11-16 18:09:55 +00:00
|
|
|
_ <- moveAnnex k (AssociatedFile Nothing) f'
|
2020-11-06 18:10:58 +00:00
|
|
|
liftIO $ removeDirectory (fromRawFilePath d)
|
2011-03-16 05:23:20 +00:00
|
|
|
|
2011-03-16 06:35:48 +00:00
|
|
|
updateSymlinks :: Annex ()
|
|
|
|
updateSymlinks = do
|
2011-07-19 18:07:23 +00:00
|
|
|
showAction "updating symlinks"
|
Clean up handling of git directory and git worktree.
Baked into the code was an assumption that a repository's git directory
could be determined by adding ".git" to its work tree (or nothing for bare
repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are
used to separate the two.
This was attacked at the type level, by storing the gitdir and worktree
separately, so Nothing for the worktree means a bare repo.
A complication arose because we don't learn where a repository is bare
until its configuration is read. So another Location type handles
repositories that have not had their config read yet. I am not entirely
happy with this being a Location type, rather than representing them
entirely separate from the Git type. The new code is not worse than the
old, but better types could enforce more safety.
Added support for core.worktree. Overriding it with -c isn't supported
because it's not really clear what to do if a git repo's config is read, is
not bare, and is then overridden to bare. What is the right git directory
in this case? I will worry about this if/when someone has a use case for
overriding core.worktree with -c. (See Git.Config.updateLocation)
Also removed and renamed some functions like gitDir and workTree that
misused git's terminology.
One minor regression is known: git annex add in a bare repository does not
print a nice error message, but runs git ls-files in a way that fails
earlier with a less nice error message. This is because before --work-tree
was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
|
|
|
top <- fromRepo Git.repoPath
|
2020-05-28 19:55:17 +00:00
|
|
|
(files, cleanup) <- inRepo $ LsFiles.inRepo [] [top]
|
2019-12-02 16:01:20 +00:00
|
|
|
forM_ files (fixlink . fromRawFilePath)
|
2012-10-04 23:56:32 +00:00
|
|
|
void $ liftIO cleanup
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
fixlink f = do
|
2020-07-10 18:17:35 +00:00
|
|
|
r <- lookupKey1 f
|
2012-11-11 04:51:07 +00:00
|
|
|
case r of
|
|
|
|
Nothing -> noop
|
|
|
|
Just (k, _) -> do
|
2020-10-29 18:20:57 +00:00
|
|
|
link <- fromRawFilePath
|
|
|
|
<$> calcRepo (gitAnnexLink (toRawFilePath f) k)
|
2012-11-11 04:51:07 +00:00
|
|
|
liftIO $ removeFile f
|
2023-03-01 19:55:58 +00:00
|
|
|
liftIO $ R.createSymbolicLink (toRawFilePath link) (toRawFilePath f)
|
2021-01-04 16:51:55 +00:00
|
|
|
Annex.Queue.addCommand [] "add" [Param "--"] [f]
|
2011-03-16 05:23:20 +00:00
|
|
|
|
2011-03-16 06:35:48 +00:00
|
|
|
moveLocationLogs :: Annex ()
|
|
|
|
moveLocationLogs = do
|
2011-07-19 18:07:23 +00:00
|
|
|
showAction "moving location logs"
|
2011-03-16 14:56:59 +00:00
|
|
|
logkeys <- oldlocationlogs
|
|
|
|
forM_ logkeys move
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
oldlocationlogs = do
|
|
|
|
dir <- fromRepo Upgrade.V2.gitStateDir
|
|
|
|
ifM (liftIO $ doesDirectoryExist dir)
|
|
|
|
( mapMaybe oldlog2key
|
2013-09-26 03:19:01 +00:00
|
|
|
<$> liftIO (getDirectoryContents dir)
|
2012-11-11 04:51:07 +00:00
|
|
|
, return []
|
|
|
|
)
|
|
|
|
move (l, k) = do
|
2020-10-29 18:20:57 +00:00
|
|
|
dest <- fromRepo (logFile2 k)
|
2012-11-11 04:51:07 +00:00
|
|
|
dir <- fromRepo Upgrade.V2.gitStateDir
|
|
|
|
let f = dir </> l
|
2020-10-29 18:20:57 +00:00
|
|
|
createWorkTreeDirectory (parentDir (toRawFilePath dest))
|
2012-11-11 04:51:07 +00:00
|
|
|
-- could just git mv, but this way deals with
|
|
|
|
-- log files that are not checked into git,
|
|
|
|
-- as well as merging with already upgraded
|
|
|
|
-- logs that have been pulled from elsewhere
|
|
|
|
old <- liftIO $ readLog1 f
|
|
|
|
new <- liftIO $ readLog1 dest
|
|
|
|
liftIO $ writeLog1 dest (old++new)
|
2021-01-04 16:51:55 +00:00
|
|
|
Annex.Queue.addCommand [] "add" [Param "--"] [dest]
|
|
|
|
Annex.Queue.addCommand [] "add" [Param "--"] [f]
|
|
|
|
Annex.Queue.addCommand [] "rm" [Param "--quiet", Param "-f", Param "--"] [f]
|
2012-11-11 04:51:07 +00:00
|
|
|
|
2011-03-16 14:56:59 +00:00
|
|
|
oldlog2key :: FilePath -> Maybe (FilePath, Key)
|
2012-03-16 05:59:07 +00:00
|
|
|
oldlog2key l
|
|
|
|
| drop len l == ".log" && sane = Just (l, k)
|
|
|
|
| otherwise = Nothing
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
len = length l - 4
|
|
|
|
k = readKey1 (take len l)
|
2021-10-06 00:20:08 +00:00
|
|
|
sane = (not . S.null $ S.fromShort $ fromKey keyName k) && (not . S.null $ formatKeyVariety $ fromKey keyVariety k)
|
2011-03-16 06:35:48 +00:00
|
|
|
|
|
|
|
-- WORM backend keys: "WORM:mtime:size:filename"
|
|
|
|
-- all the rest: "backend:key"
|
2011-03-28 13:27:28 +00:00
|
|
|
--
|
2024-07-30 15:27:20 +00:00
|
|
|
-- If the file looks like "WORM:FOO-...", then it was created by mixing
|
2011-03-28 13:27:28 +00:00
|
|
|
-- v2 and v1; that infelicity is worked around by treating the value
|
|
|
|
-- as the v2 key that it is.
|
2011-03-16 05:23:20 +00:00
|
|
|
readKey1 :: String -> Key
|
2024-07-30 15:27:20 +00:00
|
|
|
readKey1 = fromMaybe (giveup "unable to parse v0 key") . readKey1'
|
|
|
|
|
|
|
|
readKey1' :: String -> Maybe Key
|
|
|
|
readKey1' v
|
|
|
|
| mixup = deserializeKey $ intercalate ":" $ drop 1 bits
|
|
|
|
| otherwise = Just $ mkKey $ \d -> d
|
2021-10-06 00:20:08 +00:00
|
|
|
{ keyName = S.toShort (encodeBS n)
|
2019-01-11 20:34:04 +00:00
|
|
|
, keyVariety = parseKeyVariety (encodeBS b)
|
2012-03-16 05:59:07 +00:00
|
|
|
, keySize = s
|
|
|
|
, keyMtime = t
|
|
|
|
}
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
2017-01-31 22:40:42 +00:00
|
|
|
bits = splitc ':' v
|
2012-11-11 04:51:07 +00:00
|
|
|
b = Prelude.head bits
|
2013-04-23 00:24:53 +00:00
|
|
|
n = intercalate ":" $ drop (if wormy then 3 else 1) bits
|
2012-11-11 04:51:07 +00:00
|
|
|
t = if wormy
|
2024-07-30 15:27:20 +00:00
|
|
|
then readMaybe (bits !! 1) :: Maybe EpochTime
|
2012-11-11 04:51:07 +00:00
|
|
|
else Nothing
|
2024-07-30 15:27:20 +00:00
|
|
|
s = if wormy && length bits > 2
|
|
|
|
then readMaybe (bits !! 2) :: Maybe Integer
|
2012-11-11 04:51:07 +00:00
|
|
|
else Nothing
|
2024-07-30 15:27:20 +00:00
|
|
|
wormy = length bits > 1 && headMaybe bits == Just "WORM"
|
|
|
|
mixup = wormy && fromMaybe False (isUpper <$> (headMaybe $ bits !! 1))
|
2011-03-16 05:23:20 +00:00
|
|
|
|
2011-03-16 06:35:48 +00:00
|
|
|
showKey1 :: Key -> String
|
2019-11-22 20:24:04 +00:00
|
|
|
showKey1 k = intercalate ":" $ filter (not . null)
|
|
|
|
[b, showifhere t, showifhere s, decodeBS n]
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
showifhere Nothing = ""
|
2017-02-24 19:16:56 +00:00
|
|
|
showifhere (Just x) = show x
|
2019-01-11 20:34:04 +00:00
|
|
|
b = decodeBS $ formatKeyVariety v
|
2021-10-06 00:20:08 +00:00
|
|
|
n = S.fromShort $ fromKey keyName k
|
2019-11-22 20:24:04 +00:00
|
|
|
v = fromKey keyVariety k
|
|
|
|
s = fromKey keySize k
|
|
|
|
t = fromKey keyMtime k
|
2011-03-16 06:35:48 +00:00
|
|
|
|
|
|
|
keyFile1 :: Key -> FilePath
|
|
|
|
keyFile1 key = replace "/" "%" $ replace "%" "&s" $ replace "&" "&a" $ showKey1 key
|
|
|
|
|
|
|
|
fileKey1 :: FilePath -> Key
|
|
|
|
fileKey1 file = readKey1 $
|
|
|
|
replace "&a" "&" $ replace "&s" "%" $ replace "%" "/" file
|
|
|
|
|
2011-08-20 00:05:08 +00:00
|
|
|
writeLog1 :: FilePath -> [LogLine] -> IO ()
|
2019-01-03 17:21:48 +00:00
|
|
|
writeLog1 file ls = viaTmp L.writeFile file (toLazyByteString $ buildLog ls)
|
2011-08-20 00:05:08 +00:00
|
|
|
|
|
|
|
readLog1 :: FilePath -> IO [LogLine]
|
2012-09-17 04:18:07 +00:00
|
|
|
readLog1 file = catchDefaultIO [] $
|
2019-01-03 17:21:48 +00:00
|
|
|
parseLog . encodeBL <$> readFileStrict file
|
2011-03-16 06:35:48 +00:00
|
|
|
|
2020-07-10 18:17:35 +00:00
|
|
|
lookupKey1 :: FilePath -> Annex (Maybe (Key, Backend))
|
|
|
|
lookupKey1 file = do
|
2012-02-03 20:47:24 +00:00
|
|
|
tl <- liftIO $ tryIO getsymlink
|
2011-03-16 05:23:20 +00:00
|
|
|
case tl of
|
|
|
|
Left _ -> return Nothing
|
2011-07-05 22:31:46 +00:00
|
|
|
Right l -> makekey l
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
2023-03-01 19:55:58 +00:00
|
|
|
getsymlink = takeFileName . fromRawFilePath
|
|
|
|
<$> R.readSymbolicLink (toRawFilePath file)
|
2020-07-29 19:23:18 +00:00
|
|
|
makekey l = maybeLookupBackendVariety (fromKey keyVariety k) >>= \case
|
2012-11-11 04:51:07 +00:00
|
|
|
Nothing -> do
|
|
|
|
unless (null kname || null bname ||
|
2019-01-14 19:19:20 +00:00
|
|
|
not (isLinkToAnnex (toRawFilePath l))) $
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
warning (UnquotedString skip)
|
2012-11-11 04:51:07 +00:00
|
|
|
return Nothing
|
|
|
|
Just backend -> return $ Just (k, backend)
|
|
|
|
where
|
|
|
|
k = fileKey1 l
|
2019-11-22 20:24:04 +00:00
|
|
|
bname = decodeBS (formatKeyVariety (fromKey keyVariety k))
|
2021-10-06 00:20:08 +00:00
|
|
|
kname = decodeBS (S.fromShort (fromKey keyName k))
|
2012-11-11 04:51:07 +00:00
|
|
|
skip = "skipping " ++ file ++
|
|
|
|
" (unknown backend " ++ bname ++ ")"
|
2011-03-16 05:23:20 +00:00
|
|
|
|
2011-03-28 13:27:28 +00:00
|
|
|
getKeyFilesPresent1 :: Annex [FilePath]
|
2020-11-06 18:10:58 +00:00
|
|
|
getKeyFilesPresent1 = getKeyFilesPresent1' . fromRawFilePath
|
|
|
|
=<< fromRepo gitAnnexObjectDir
|
2011-03-28 13:27:28 +00:00
|
|
|
getKeyFilesPresent1' :: FilePath -> Annex [FilePath]
|
2012-03-16 05:59:07 +00:00
|
|
|
getKeyFilesPresent1' dir =
|
|
|
|
ifM (liftIO $ doesDirectoryExist dir)
|
|
|
|
( do
|
2011-03-28 13:27:28 +00:00
|
|
|
dirs <- liftIO $ getDirectoryContents dir
|
|
|
|
let files = map (\d -> dir ++ "/" ++ d ++ "/" ++ takeFileName d) dirs
|
|
|
|
liftIO $ filterM present files
|
2012-03-16 05:59:07 +00:00
|
|
|
, return []
|
|
|
|
)
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
present f = do
|
2023-03-01 19:55:58 +00:00
|
|
|
result <- tryIO $ R.getFileStatus (toRawFilePath f)
|
2012-11-11 04:51:07 +00:00
|
|
|
case result of
|
|
|
|
Right s -> return $ isRegularFile s
|
|
|
|
Left _ -> return False
|
2011-08-20 00:05:08 +00:00
|
|
|
|
|
|
|
logFile1 :: Git.Repo -> Key -> String
|
|
|
|
logFile1 repo key = Upgrade.V2.gitStateDir repo ++ keyFile1 key ++ ".log"
|
|
|
|
|
2011-11-08 19:34:10 +00:00
|
|
|
logFile2 :: Key -> Git.Repo -> String
|
2015-01-28 19:55:17 +00:00
|
|
|
logFile2 = logFile' (hashDirLower def)
|
2011-08-20 00:05:08 +00:00
|
|
|
|
2019-12-11 18:12:22 +00:00
|
|
|
logFile' :: (Key -> RawFilePath) -> Key -> Git.Repo -> String
|
2011-11-08 19:34:10 +00:00
|
|
|
logFile' hasher key repo =
|
2019-12-18 20:45:03 +00:00
|
|
|
gitStateDir repo ++ fromRawFilePath (hasher key) ++ fromRawFilePath (keyFile key) ++ ".log"
|
2011-08-20 00:05:08 +00:00
|
|
|
|
|
|
|
stateDir :: FilePath
|
2011-09-21 03:24:48 +00:00
|
|
|
stateDir = addTrailingPathSeparator ".git-annex"
|
2011-08-20 00:05:08 +00:00
|
|
|
|
|
|
|
gitStateDir :: Git.Repo -> FilePath
|
2019-12-09 17:49:05 +00:00
|
|
|
gitStateDir repo = addTrailingPathSeparator $
|
|
|
|
fromRawFilePath (Git.repoPath repo) </> stateDir
|