git-annex/Annex/Init.hs

436 lines
14 KiB
Haskell
Raw Normal View History

{- git-annex repository initialization
-
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
2014-01-26 20:36:31 +00:00
module Annex.Init (
ensureInitialized,
autoInitialize,
isInitialized,
initialize,
initialize',
uninitialize,
2013-11-05 19:29:56 +00:00
probeCrippledFileSystem,
probeCrippledFileSystem',
) where
import Annex.Common
import qualified Annex
import qualified Git
import qualified Git.Config
import qualified Git.Objects
import Git.Types (fromConfigValue)
import Git.ConfigTypes (SharedRepository(..))
2011-10-04 04:40:47 +00:00
import qualified Annex.Branch
import Logs.UUID
import Logs.Trust.Basic
import Logs.Config
import Types.TrustLevel
import Types.RepoVersion
2011-10-04 04:40:47 +00:00
import Annex.Version
import Annex.Difference
import Annex.UUID
2016-10-17 18:58:33 +00:00
import Annex.WorkTree
import Annex.Fixup
import Annex.Path
import Config
import Config.Files
import Config.Smudge
import qualified Upgrade.V5.Direct as Direct
import qualified Annex.AdjustedBranch as AdjustedBranch
import Remote.List.Util (remotesChanged)
import Annex.Environment
import Annex.Hook
import Annex.InodeSentinal
import Upgrade
import Annex.Tmp
2013-08-04 17:07:55 +00:00
import Utility.UserInfo
import qualified Utility.RawFilePath as R
import Utility.ThreadScheduler
import Annex.Perms
2021-06-22 13:46:06 +00:00
#ifndef mingw32_HOST_OS
2013-08-04 17:07:55 +00:00
import Utility.FileMode
import System.Posix.User
import qualified Utility.LockFile.Posix as Posix
2013-08-04 17:07:55 +00:00
#endif
import qualified Data.Map as M
import Control.Monad.IO.Class (MonadIO)
#ifndef mingw32_HOST_OS
import Data.Either
2020-10-30 17:31:35 +00:00
import qualified System.FilePath.ByteString as P
import Control.Concurrent.Async
#endif
2020-12-14 16:32:21 +00:00
checkInitializeAllowed :: Annex a -> Annex a
deal with git's changes for CVE-2022-24765 Deal with git's recent changes to fix CVE-2022-24765, which prevent using git in a repository owned by someone else. That makes git config --list not list the repo's configs, only global configs. So annex.uuid and annex.version are not visible to git-annex. It displayed a message about that, which is not right for this situation. Detect the situation and display a better message, similar to the one other git commands display. Also, git-annex init when run in that situation would overwrite annex.uuid with a new one, since it couldn't see the old one. Add a check to prevent it running too in this situation. It may be that this fix has security implications, if a config set by the malicious user who owns the repo causes git or git-annex to run code. I don't think any git-annex configs get run by git-annex init. It may be that some git config of a command does get run by one of the git commands that git-annex init runs. ("git status" is the command that prompted the CVE-2022-24765, since core.fsmonitor can cause it to run a command). Since I don't know how to exploit this, I'm not treating it as a security fix for now. Note that passing --git-dir makes git bypass the security check. git-annex does pass --git-dir to most calls to git, which it does to avoid needing chdir to the directory containing a git repository when accessing a remote. So, it's possible that somewhere in git-annex it gets as far as running git with --git-dir, and git reads some configs that are unsafe (what CVE-2022-24765 is about). This seems unlikely, it would have to be part of git-annex that runs in git repositories that have no (visible) annex.uuid, and git-annex init is the only one that I can think of that then goes on to run git, as discussed earlier. But I've not fully ruled out there being others.. The git developers seem mostly worried about "git status" or a similar command implicitly run by a shell prompt, not an explicit use of git in such a repository. For example, Ævar Arnfjörð Bjarma wrote: > * There are other bits of config that also point to executable things, > e.g. core.editor, aliases etc, but nothing has been found yet that > provides the "at a distance" effect that the core.fsmonitor vector > does. > > I.e. a user is unlikely to go to /tmp/some-crap/here and run "git > commit", but they (or their shell prompt) might run "git status", and > if you have a /tmp/.git ... Sponsored-by: Jarkko Kniivilä on Patreon
2022-05-20 18:18:19 +00:00
checkInitializeAllowed a = guardSafeToUseRepo $ noAnnexFileContent' >>= \case
Nothing -> a
Just noannexmsg -> do
warning "Initialization prevented by .noannex file (remove the file to override)"
unless (null noannexmsg) $
warning noannexmsg
giveup "Not initialized."
2020-12-14 16:32:21 +00:00
initializeAllowed :: Annex Bool
initializeAllowed = isNothing <$> noAnnexFileContent'
2020-12-14 16:32:21 +00:00
noAnnexFileContent' :: Annex (Maybe String)
noAnnexFileContent' = inRepo $
noAnnexFileContent . fmap fromRawFilePath . Git.repoWorkTree
genDescription :: Maybe String -> Annex UUIDDesc
genDescription (Just d) = return $ UUIDDesc $ encodeBS d
genDescription Nothing = do
2020-10-30 17:31:35 +00:00
reldir <- liftIO . relHome . fromRawFilePath
=<< liftIO . absPath
=<< fromRepo Git.repoPath
2013-04-03 07:52:41 +00:00
hostname <- fromMaybe "" <$> liftIO getHostname
let at = if null hostname then "" else "@"
v <- liftIO myUserName
return $ UUIDDesc $ encodeBS $ concat $ case v of
Right username -> [username, at, hostname, ":", reldir]
Left _ -> [hostname, ":", reldir]
initialize :: Bool -> Maybe String -> Maybe RepoVersion -> Annex ()
initialize autoinit mdescription mversion = checkInitializeAllowed $ do
{- Has to come before any commits are made as the shared
- clone heuristic expects no local objects. -}
sharedclone <- checkSharedClone
{- This will make the first commit to git, so ensure git is set up
- properly to allow commits when running it. -}
ensureCommit $ Annex.Branch.create
prepUUID
initialize' autoinit mversion
initSharedClone sharedclone
u <- getUUID
{- Avoid overwriting existing description with a default
- description. -}
whenM (pure (isJust mdescription) <||> not . M.member u <$> uuidDescMapRaw) $
describeUUID u =<< genDescription mdescription
-- Everything except for uuid setup, shared clone setup, and initial
-- description.
initialize' :: Bool -> Maybe RepoVersion -> Annex ()
initialize' autoinit mversion = checkInitializeAllowed $ do
checkLockSupport
checkFifoSupport
checkCrippledFileSystem
unlessM isBareRepo $ do
2013-11-05 19:29:56 +00:00
hookWrite preCommitHook
hookWrite postReceiveHook
setDifferences
unlessM (isJust <$> getVersion) $
setVersion (fromMaybe defaultVersion mversion)
supportunlocked <- annexSupportUnlocked <$> Annex.getGitConfig
if supportunlocked
then configureSmudgeFilter
else deconfigureSmudgeFilter
unlessM isBareRepo $ do
hookWrite postCheckoutHook
hookWrite postMergeHook
unless autoinit $
scanAnnexedFiles
AdjustedBranch.checkAdjustedClone >>= \case
AdjustedBranch.InAdjustedClone -> return ()
AdjustedBranch.NotInAdjustedClone ->
2016-06-02 20:59:15 +00:00
ifM (crippledFileSystem <&&> (not <$> isBareRepo))
( AdjustedBranch.adjustToCrippledFileSystem
-- Handle case where this repo was cloned from a
-- direct mode repo
2016-06-02 20:59:15 +00:00
, unlessM isBareRepo
Direct.switchHEADBack
)
propigateSecureHashesOnly
createInodeSentinalFile False
fixupUnusualReposAfterInit
uninitialize :: Annex ()
uninitialize = do
-- Remove hooks that are written when initializing.
hookUnWrite preCommitHook
hookUnWrite postReceiveHook
hookUnWrite postCheckoutHook
hookUnWrite postMergeHook
deconfigureSmudgeFilter
removeRepoUUID
removeVersion
{- Gets the version that the repo is initialized with.
-
- To make sure the repo is fully initialized, also checks that it has a
- uuid configured. In the unusual case where one is set and the other is
- not, errors out to avoid running in an inconsistent state.
-}
getInitializedVersion :: Annex (Maybe RepoVersion)
getInitializedVersion = do
um <- (\u -> if u == NoUUID then Nothing else Just u) <$> getUUID
vm <- getVersion
case (um, vm) of
(Just _, Just v) -> return (Just v)
(Nothing, Nothing) -> return Nothing
(Just _, Nothing) -> onemissing "annex.version" "annex.uuid"
(Nothing, Just _) -> onemissing "annex.uuid" "annex.version"
where
onemissing missing have = giveup $ unwords
[ "This repository has " ++ have ++ " set,"
, "but " ++ missing ++ " is not set. Perhaps that"
, "git config was lost. Cannot use the repository"
, "in this state; set back " ++ missing ++ " to fix this."
]
{- Will automatically initialize if there is already a git-annex
2012-12-13 04:45:27 +00:00
- branch from somewhere. Otherwise, require a manual init
2017-02-11 09:38:49 +00:00
- to avoid git-annex accidentally being run in git
- repos that did not intend to use it.
-
- Checks repository version and handles upgrades too.
-}
ensureInitialized :: Annex ()
ensureInitialized = getInitializedVersion >>= maybe needsinit checkUpgrade
where
2020-12-14 16:32:21 +00:00
needsinit = ifM autoInitializeAllowed
( do
tryNonAsync (initialize True Nothing Nothing) >>= \case
Right () -> noop
Left e -> giveup $ show e ++ "\n" ++
"git-annex: automatic initialization failed due to above problems"
autoEnableSpecialRemotes
, giveup "First run: git-annex init"
)
2020-12-14 16:32:21 +00:00
{- Check if auto-initialize is allowed. -}
autoInitializeAllowed :: Annex Bool
autoInitializeAllowed = Annex.Branch.hasSibling <&&> objectDirNotPresent
objectDirNotPresent :: Annex Bool
objectDirNotPresent = do
d <- fromRawFilePath <$> fromRepo gitAnnexObjectDir
exists <- liftIO $ doesDirectoryExist d
deal with git's changes for CVE-2022-24765 Deal with git's recent changes to fix CVE-2022-24765, which prevent using git in a repository owned by someone else. That makes git config --list not list the repo's configs, only global configs. So annex.uuid and annex.version are not visible to git-annex. It displayed a message about that, which is not right for this situation. Detect the situation and display a better message, similar to the one other git commands display. Also, git-annex init when run in that situation would overwrite annex.uuid with a new one, since it couldn't see the old one. Add a check to prevent it running too in this situation. It may be that this fix has security implications, if a config set by the malicious user who owns the repo causes git or git-annex to run code. I don't think any git-annex configs get run by git-annex init. It may be that some git config of a command does get run by one of the git commands that git-annex init runs. ("git status" is the command that prompted the CVE-2022-24765, since core.fsmonitor can cause it to run a command). Since I don't know how to exploit this, I'm not treating it as a security fix for now. Note that passing --git-dir makes git bypass the security check. git-annex does pass --git-dir to most calls to git, which it does to avoid needing chdir to the directory containing a git repository when accessing a remote. So, it's possible that somewhere in git-annex it gets as far as running git with --git-dir, and git reads some configs that are unsafe (what CVE-2022-24765 is about). This seems unlikely, it would have to be part of git-annex that runs in git repositories that have no (visible) annex.uuid, and git-annex init is the only one that I can think of that then goes on to run git, as discussed earlier. But I've not fully ruled out there being others.. The git developers seem mostly worried about "git status" or a similar command implicitly run by a shell prompt, not an explicit use of git in such a repository. For example, Ævar Arnfjörð Bjarma wrote: > * There are other bits of config that also point to executable things, > e.g. core.editor, aliases etc, but nothing has been found yet that > provides the "at a distance" effect that the core.fsmonitor vector > does. > > I.e. a user is unlikely to go to /tmp/some-crap/here and run "git > commit", but they (or their shell prompt) might run "git status", and > if you have a /tmp/.git ... Sponsored-by: Jarkko Kniivilä on Patreon
2022-05-20 18:18:19 +00:00
when exists $ guardSafeToUseRepo $
giveup $ unwords $
[ "This repository is not initialized for use"
, "by git-annex, but " ++ d ++ " exists,"
, "which indicates this repository was used by"
, "git-annex before, and may have lost its"
, "annex.uuid and annex.version configs. Either"
, "set back missing configs, or run git-annex init"
, "to initialize with a new uuid."
]
return (not exists)
2020-12-14 16:32:21 +00:00
deal with git's changes for CVE-2022-24765 Deal with git's recent changes to fix CVE-2022-24765, which prevent using git in a repository owned by someone else. That makes git config --list not list the repo's configs, only global configs. So annex.uuid and annex.version are not visible to git-annex. It displayed a message about that, which is not right for this situation. Detect the situation and display a better message, similar to the one other git commands display. Also, git-annex init when run in that situation would overwrite annex.uuid with a new one, since it couldn't see the old one. Add a check to prevent it running too in this situation. It may be that this fix has security implications, if a config set by the malicious user who owns the repo causes git or git-annex to run code. I don't think any git-annex configs get run by git-annex init. It may be that some git config of a command does get run by one of the git commands that git-annex init runs. ("git status" is the command that prompted the CVE-2022-24765, since core.fsmonitor can cause it to run a command). Since I don't know how to exploit this, I'm not treating it as a security fix for now. Note that passing --git-dir makes git bypass the security check. git-annex does pass --git-dir to most calls to git, which it does to avoid needing chdir to the directory containing a git repository when accessing a remote. So, it's possible that somewhere in git-annex it gets as far as running git with --git-dir, and git reads some configs that are unsafe (what CVE-2022-24765 is about). This seems unlikely, it would have to be part of git-annex that runs in git repositories that have no (visible) annex.uuid, and git-annex init is the only one that I can think of that then goes on to run git, as discussed earlier. But I've not fully ruled out there being others.. The git developers seem mostly worried about "git status" or a similar command implicitly run by a shell prompt, not an explicit use of git in such a repository. For example, Ævar Arnfjörð Bjarma wrote: > * There are other bits of config that also point to executable things, > e.g. core.editor, aliases etc, but nothing has been found yet that > provides the "at a distance" effect that the core.fsmonitor vector > does. > > I.e. a user is unlikely to go to /tmp/some-crap/here and run "git > commit", but they (or their shell prompt) might run "git status", and > if you have a /tmp/.git ... Sponsored-by: Jarkko Kniivilä on Patreon
2022-05-20 18:18:19 +00:00
guardSafeToUseRepo :: Annex a -> Annex a
guardSafeToUseRepo a = do
repopath <- fromRepo Git.repoPath
ifM (inRepo Git.Config.checkRepoConfigInaccessible)
( giveup $ unlines $
[ "Git refuses to operate in this repository,"
, "probably because it is owned by someone else."
, ""
-- This mirrors git's wording.
, "To add an exception for this directory, call:"
, "\tgit config --global --add safe.directory " ++ fromRawFilePath repopath
]
, a
)
2020-12-14 16:32:21 +00:00
{- Initialize if it can do so automatically. Avoids failing if it cannot.
-
- Checks repository version and handles upgrades too.
-}
autoInitialize :: Annex ()
autoInitialize = getInitializedVersion >>= maybe needsinit checkUpgrade
where
2020-12-14 16:32:21 +00:00
needsinit =
whenM (initializeAllowed <&&> autoInitializeAllowed) $ do
initialize True Nothing Nothing
autoEnableSpecialRemotes
{- Checks if a repository is initialized. Does not check version for ugrade. -}
isInitialized :: Annex Bool
isInitialized = maybe Annex.Branch.hasSibling (const $ return True) =<< getVersion
{- A crippled filesystem is one that does not allow making symlinks,
- or removing write access from files. -}
probeCrippledFileSystem :: Annex Bool
probeCrippledFileSystem = withEventuallyCleanedOtherTmp $ \tmp -> do
(r, warnings) <- probeCrippledFileSystem' tmp
(Just (freezeContent' UnShared))
(Just (thawContent' UnShared))
=<< hasFreezeHook
mapM_ warning warnings
return r
probeCrippledFileSystem'
:: (MonadIO m, MonadCatch m)
=> RawFilePath
-> Maybe (RawFilePath -> m ())
-> Maybe (RawFilePath -> m ())
-> Bool
-> m (Bool, [String])
2013-08-04 17:07:55 +00:00
#ifdef mingw32_HOST_OS
probeCrippledFileSystem' _ _ _ _ = return (True, [])
#else
probeCrippledFileSystem' tmp freezecontent thawcontent hasfreezehook = do
let f = tmp P.</> "gaprobe"
let f' = fromRawFilePath f
liftIO $ writeFile f' ""
r <- probe f'
void $ tryNonAsync $ (fromMaybe (liftIO . allowWrite) thawcontent) f
liftIO $ removeFile f'
return r
where
probe f = catchDefaultIO (True, []) $ do
let f2 = f ++ "2"
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f2)
liftIO $ createSymbolicLink f f2
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f2)
(fromMaybe (liftIO . preventWrite) freezecontent) (toRawFilePath f)
-- Should be unable to write to the file (unless
-- running as root). But some crippled
-- filesystems ignore write bit removals or ignore
-- permissions entirely.
ifM ((== Just False) <$> liftIO (checkContentWritePerm' UnShared (toRawFilePath f) Nothing hasfreezehook))
( return (True, ["Filesystem does not allow removing write bit from files."])
, liftIO $ ifM ((== 0) <$> getRealUserID)
( return (False, [])
, do
r <- catchBoolIO $ do
writeFile f "2"
return True
if r
then return (True, ["Filesystem allows writing to files whose write bit is not set."])
else return (False, [])
)
)
#endif
checkCrippledFileSystem :: Annex ()
checkCrippledFileSystem = whenM probeCrippledFileSystem $ do
warning "Detected a crippled filesystem."
setCrippledFileSystem True
{- Normally git disables core.symlinks itself when the:w
-
2016-05-10 18:42:57 +00:00
- filesystem does not support them. But, even if symlinks are
- supported, we don't use them by default in a crippled
- filesystem. -}
whenM (coreSymlinks <$> Annex.getGitConfig) $ do
warning "Disabling core.symlinks."
setConfig "core.symlinks"
(Git.Config.boolConfig False)
probeLockSupport :: Annex Bool
#ifdef mingw32_HOST_OS
probeLockSupport = return True
#else
probeLockSupport = withEventuallyCleanedOtherTmp $ \tmp -> do
2020-10-30 17:31:35 +00:00
let f = tmp P.</> "lockprobe"
mode <- annexFileMode
annexrunner <- Annex.makeRunner
liftIO $ withAsync (warnstall annexrunner) (const (go f mode))
where
go f mode = do
2020-10-30 17:31:35 +00:00
removeWhenExistsWith R.removeLink f
let locktest = bracket
(Posix.lockExclusive (Just mode) f)
Posix.dropLock
(const noop)
ok <- isRight <$> tryNonAsync locktest
2020-10-30 17:31:35 +00:00
removeWhenExistsWith R.removeLink f
return ok
warnstall annexrunner = do
threadDelaySeconds (Seconds 10)
annexrunner $ do
warning "Probing the filesystem for POSIX fcntl lock support is taking a long time."
warning "(Setting annex.pidlock will avoid this probe.)"
#endif
probeFifoSupport :: Annex Bool
probeFifoSupport = do
2013-08-04 17:07:55 +00:00
#ifdef mingw32_HOST_OS
return False
#else
withEventuallyCleanedOtherTmp $ \tmp -> do
2020-10-30 17:31:35 +00:00
let f = tmp P.</> "gaprobe"
let f2 = tmp P.</> "gaprobe2"
liftIO $ do
2020-10-30 17:31:35 +00:00
removeWhenExistsWith R.removeLink f
removeWhenExistsWith R.removeLink f2
ms <- tryIO $ do
2020-10-30 17:31:35 +00:00
createNamedPipe (fromRawFilePath f) ownerReadMode
R.createLink f f2
R.getFileStatus f
removeWhenExistsWith R.removeLink f
removeWhenExistsWith R.removeLink f2
return $ either (const False) isNamedPipe ms
#endif
checkLockSupport :: Annex ()
checkLockSupport =
unlessM (annexPidLock <$> Annex.getGitConfig) $
unlessM probeLockSupport $ do
warning "Detected a filesystem without POSIX fcntl lock support."
warning "Enabling annex.pidlock."
setConfig (annexConfig "pidlock") (Git.Config.boolConfig True)
checkFifoSupport :: Annex ()
checkFifoSupport = unlessM probeFifoSupport $ do
warning "Detected a filesystem without fifo support."
warning "Disabling ssh connection caching."
setConfig (annexConfig "sshcaching") (Git.Config.boolConfig False)
checkSharedClone :: Annex Bool
checkSharedClone = inRepo Git.Objects.isSharedClone
initSharedClone :: Bool -> Annex ()
initSharedClone False = return ()
initSharedClone True = do
showLongNote "Repository was cloned with --shared; setting annex.hardlink=true and making repository untrusted."
u <- getUUID
trustSet u UnTrusted
setConfig (annexConfig "hardlink") (Git.Config.boolConfig True)
{- Propigate annex.securehashesonly from then global config to local
- config. This makes a clone inherit a parent's setting, but once
- a repository has a local setting, changes to the global config won't
- affect it. -}
propigateSecureHashesOnly :: Annex ()
propigateSecureHashesOnly =
maybe noop (setConfig "annex.securehashesonly" . fromConfigValue)
=<< getGlobalConfig "annex.securehashesonly"
fixupUnusualReposAfterInit :: Annex ()
fixupUnusualReposAfterInit = do
gc <- Annex.getGitConfig
void $ inRepo $ \r -> fixupUnusualRepos r gc
{- Try to enable any special remotes that are configured to do so.
-
- The enabling is done in a child process to avoid it using stdio.
-}
autoEnableSpecialRemotes :: Annex ()
autoEnableSpecialRemotes = do
rp <- fromRawFilePath <$> fromRepo Git.repoPath
withNullHandle $ \nullh -> gitAnnexChildProcess "init"
[ Param "--autoenable" ]
(\p -> p
{ std_out = UseHandle nullh
, std_err = UseHandle nullh
, std_in = UseHandle nullh
, cwd = Just rp
}
)
(\_ _ _ pid -> void $ waitForProcess pid)
remotesChanged