2019-11-06 19:37:18 +00:00
{- git - annex v7 - > v8 upgrade support
-
- Copyright 2019 Joey Hess < id @ joeyh . name >
-
- Licensed under the GNU AGPL version 3 or higher .
- }
2020-10-29 18:20:57 +00:00
{- # LANGUAGE OverloadedStrings # -}
2019-11-06 19:37:18 +00:00
{- # LANGUAGE CPP # -}
module Upgrade.V7 where
2019-12-26 20:24:40 +00:00
import qualified Annex
2019-11-06 19:37:18 +00:00
import Annex.Common
2022-01-19 17:06:31 +00:00
import Types.Upgrade
2019-11-06 19:37:18 +00:00
import Annex.CatFile
import qualified Database.Keys
import qualified Database.Keys.SQL
2022-10-12 19:21:19 +00:00
import Database.Keys.Tables
2019-11-06 19:37:18 +00:00
import qualified Git.LsFiles as LsFiles
import qualified Git
import Git.FilePath
2020-09-29 17:51:51 +00:00
import Config
2020-10-29 18:20:57 +00:00
import qualified Utility.RawFilePath as R
import qualified System.FilePath.ByteString as P
2023-03-01 19:55:58 +00:00
import System.PosixCompat.Files ( isSymbolicLink )
2019-11-06 19:37:18 +00:00
2022-01-19 17:06:31 +00:00
upgrade :: Bool -> Annex UpgradeResult
2019-11-06 19:37:18 +00:00
upgrade automatic = do
unless automatic $
showAction " v7 to v8 "
2019-11-06 20:43:52 +00:00
2019-11-06 20:27:25 +00:00
-- The fsck databases are not transitioned here; any running
-- incremental fsck can continue to write to the old database.
-- The next time an incremental fsck is started, it will delete the
-- old database, and just re-fsck the files.
2019-11-06 20:43:52 +00:00
-- The old content identifier database is deleted here, but the
-- new database is not populated. It will be automatically
-- populated from the git-annex branch the next time it is used.
2022-08-11 20:57:44 +00:00
removeOldDb . fromRawFilePath =<< fromRepo gitAnnexContentIdentifierDbDirOld
2020-10-29 18:20:57 +00:00
liftIO . removeWhenExistsWith R . removeLink
2020-10-29 14:33:12 +00:00
=<< fromRepo gitAnnexContentIdentifierLockOld
2019-11-06 19:37:18 +00:00
2019-11-06 21:13:39 +00:00
-- The export databases are deleted here. The new databases
-- will be populated by the next thing that needs them, the same
-- way as they would be in a fresh clone.
2022-08-11 20:57:44 +00:00
removeOldDb . fromRawFilePath =<< calcRepo' gitAnnexExportDir
2019-11-06 21:13:39 +00:00
populateKeysDb
2022-08-11 20:57:44 +00:00
removeOldDb . fromRawFilePath =<< fromRepo gitAnnexKeysDbOld
2020-10-29 18:20:57 +00:00
liftIO . removeWhenExistsWith R . removeLink
2020-10-29 14:33:12 +00:00
=<< fromRepo gitAnnexKeysDbIndexCacheOld
2020-10-29 18:20:57 +00:00
liftIO . removeWhenExistsWith R . removeLink
2020-10-29 14:33:12 +00:00
=<< fromRepo gitAnnexKeysDbLockOld
2019-11-06 19:37:18 +00:00
2019-12-26 20:24:40 +00:00
updateSmudgeFilter
2022-01-19 17:06:31 +00:00
return UpgradeSuccess
2019-11-06 19:37:18 +00:00
2020-10-29 18:20:57 +00:00
gitAnnexKeysDbOld :: Git . Repo -> RawFilePath
gitAnnexKeysDbOld r = gitAnnexDir r P .</> " keys "
2019-11-06 19:37:18 +00:00
2020-10-29 18:20:57 +00:00
gitAnnexKeysDbLockOld :: Git . Repo -> RawFilePath
gitAnnexKeysDbLockOld r = gitAnnexKeysDbOld r <> " .lck "
2019-11-06 19:37:18 +00:00
2020-10-29 18:20:57 +00:00
gitAnnexKeysDbIndexCacheOld :: Git . Repo -> RawFilePath
gitAnnexKeysDbIndexCacheOld r = gitAnnexKeysDbOld r <> " .cache "
2019-11-06 19:37:18 +00:00
2020-10-29 18:20:57 +00:00
gitAnnexContentIdentifierDbDirOld :: Git . Repo -> RawFilePath
gitAnnexContentIdentifierDbDirOld r = gitAnnexDir r P .</> " cids "
2019-11-06 20:43:52 +00:00
2020-10-29 18:20:57 +00:00
gitAnnexContentIdentifierLockOld :: Git . Repo -> RawFilePath
gitAnnexContentIdentifierLockOld r = gitAnnexContentIdentifierDbDirOld r <> " .lck "
2019-11-06 20:43:52 +00:00
2022-08-11 20:57:44 +00:00
removeOldDb :: FilePath -> Annex ()
removeOldDb db =
2019-11-06 19:37:18 +00:00
whenM ( liftIO $ doesDirectoryExist db ) $ do
v <- liftIO $ tryNonAsync $
removePathForcibly db
case v of
Left ex -> giveup $ " Failed removing old database directory " ++ db ++ " during upgrade ( " ++ show ex ++ " ) -- delete that and re-run git-annex to finish the upgrade. "
Right () -> return ()
-- Populate the new keys database with associated files and inode caches.
--
-- The information is queried from git. The index contains inode cache
-- information for all staged files, so that is used.
--
-- Note that typically the inode cache of annex objects is also stored in
-- the keys database. This does not add it though, because it's possible
-- that any annex object has gotten modified. The most likely way would be
-- due to annex.thin having been set at some point in the past, bypassing
-- the usual safeguards against object modification. When a worktree file
-- is still a hardlink to an annex object, then they have the same inode
-- cache, so using the inode cache from the git index will get the right
-- thing added in that case. But there are cases where the annex object's
-- inode cache is not added here, most notably when it's not unlocked.
-- The result will be more work needing to be done by isUnmodified and
-- by inAnnex (the latter only when annex.thin is set) to verify the
-- annex object. That work is only done once, and then the object will
-- finally get its inode cached.
populateKeysDb :: Annex ()
2020-09-29 17:45:14 +00:00
populateKeysDb = unlessM isBareRepo $ do
2019-11-06 19:37:18 +00:00
top <- fromRepo Git . repoPath
2019-12-18 18:57:01 +00:00
( l , cleanup ) <- inRepo $ LsFiles . inodeCaches [ top ]
2019-11-06 19:37:18 +00:00
forM_ l $ \ case
( _f , Nothing ) -> giveup " Unable to parse git ls-files --debug output while upgrading git-annex sqlite databases. "
2023-03-01 19:55:58 +00:00
( f , Just ic ) -> unlessM ( liftIO $ catchBoolIO $ isSymbolicLink <$> R . getSymbolicLinkStatus ( toRawFilePath f ) ) $ do
2019-12-06 19:17:54 +00:00
catKeyFile ( toRawFilePath f ) >>= \ case
2019-11-06 19:37:18 +00:00
Nothing -> noop
Just k -> do
2019-12-18 18:57:01 +00:00
topf <- inRepo $ toTopFilePath $ toRawFilePath f
2022-10-12 19:21:19 +00:00
Database . Keys . runWriter AssociatedTable $ \ h -> liftIO $
2021-06-08 13:23:28 +00:00
Database . Keys . SQL . addAssociatedFile k topf h
2022-10-12 19:21:19 +00:00
Database . Keys . runWriter ContentTable $ \ h -> liftIO $
2019-11-06 19:37:18 +00:00
Database . Keys . SQL . addInodeCaches k [ ic ] h
liftIO $ void cleanup
Database . Keys . closeDb
2019-11-06 20:43:52 +00:00
2019-12-26 20:24:40 +00:00
-- The gitatrributes used to have a line that prevented filtering dotfiles,
-- but now they are filtered and annex.dotfiles controls whether they get
-- added to the annex.
--
-- Only done on local gitattributes, not any gitatrributes that might be
-- checked into the repository.
updateSmudgeFilter :: Annex ()
updateSmudgeFilter = do
2020-10-29 18:20:57 +00:00
lf <- fromRawFilePath <$> Annex . fromRepo Git . attributesLocal
2019-12-26 20:24:40 +00:00
ls <- liftIO $ lines <$> catchDefaultIO " " ( readFileStrict lf )
let ls' = removedotfilter ls
when ( ls /= ls' ) $
liftIO $ writeFile lf ( unlines ls' )
where
removedotfilter ( " * filter=annex " : " .* !filter " : rest ) =
" * filter=annex " : removedotfilter rest
removedotfilter ( l : ls ) = l : removedotfilter ls
removedotfilter [] = []