2011-08-17 18:14:43 +00:00
|
|
|
{- git-annex repository initialization
|
|
|
|
-
|
2024-05-08 20:55:45 +00:00
|
|
|
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
|
2011-08-17 18:14:43 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2011-08-17 18:14:43 +00:00
|
|
|
-}
|
|
|
|
|
2013-05-11 20:03:00 +00:00
|
|
|
{-# LANGUAGE CPP #-}
|
2019-11-27 20:54:11 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
2013-05-11 20:03:00 +00:00
|
|
|
|
2014-01-26 20:36:31 +00:00
|
|
|
module Annex.Init (
|
2022-08-29 17:52:58 +00:00
|
|
|
checkInitializeAllowed,
|
2011-08-17 22:38:26 +00:00
|
|
|
ensureInitialized,
|
2020-06-16 17:24:00 +00:00
|
|
|
autoInitialize,
|
2024-05-08 20:55:45 +00:00
|
|
|
autoInitialize',
|
2012-07-31 20:19:24 +00:00
|
|
|
isInitialized,
|
2011-08-17 18:36:20 +00:00
|
|
|
initialize,
|
2014-04-16 00:13:35 +00:00
|
|
|
initialize',
|
2013-02-15 18:17:31 +00:00
|
|
|
uninitialize,
|
2013-11-05 19:29:56 +00:00
|
|
|
probeCrippledFileSystem,
|
2016-02-16 19:30:59 +00:00
|
|
|
probeCrippledFileSystem',
|
2011-08-17 18:36:20 +00:00
|
|
|
) where
|
2011-08-17 18:14:43 +00:00
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2013-05-14 00:48:44 +00:00
|
|
|
import qualified Annex
|
2011-08-17 18:14:43 +00:00
|
|
|
import qualified Git
|
2013-04-04 17:14:55 +00:00
|
|
|
import qualified Git.Config
|
2014-09-05 17:44:09 +00:00
|
|
|
import qualified Git.Objects
|
2019-12-05 18:36:43 +00:00
|
|
|
import Git.Types (fromConfigValue)
|
2021-07-12 14:15:49 +00:00
|
|
|
import Git.ConfigTypes (SharedRepository(..))
|
2011-10-04 04:40:47 +00:00
|
|
|
import qualified Annex.Branch
|
2022-08-17 17:07:14 +00:00
|
|
|
import qualified Database.Fsck
|
2011-11-02 18:18:21 +00:00
|
|
|
import Logs.UUID
|
2014-09-05 17:44:09 +00:00
|
|
|
import Logs.Trust.Basic
|
2017-02-27 20:08:16 +00:00
|
|
|
import Logs.Config
|
2014-09-05 17:44:09 +00:00
|
|
|
import Types.TrustLevel
|
2018-10-25 21:23:53 +00:00
|
|
|
import Types.RepoVersion
|
2011-10-04 04:40:47 +00:00
|
|
|
import Annex.Version
|
2015-01-27 21:38:06 +00:00
|
|
|
import Annex.Difference
|
2011-10-15 21:47:03 +00:00
|
|
|
import Annex.UUID
|
2020-03-09 18:45:14 +00:00
|
|
|
import Annex.Fixup
|
2020-05-27 15:54:39 +00:00
|
|
|
import Annex.Path
|
2013-02-14 18:10:36 +00:00
|
|
|
import Config
|
2019-02-05 18:43:23 +00:00
|
|
|
import Config.Files
|
2018-08-28 14:26:51 +00:00
|
|
|
import Config.Smudge
|
2019-08-27 18:01:28 +00:00
|
|
|
import qualified Upgrade.V5.Direct as Direct
|
2018-12-03 16:57:23 +00:00
|
|
|
import qualified Annex.AdjustedBranch as AdjustedBranch
|
2020-05-27 15:54:39 +00:00
|
|
|
import Remote.List.Util (remotesChanged)
|
2013-07-05 16:24:28 +00:00
|
|
|
import Annex.Environment
|
2014-12-29 21:25:59 +00:00
|
|
|
import Annex.Hook
|
2015-12-09 19:42:16 +00:00
|
|
|
import Annex.InodeSentinal
|
2014-12-29 21:25:59 +00:00
|
|
|
import Upgrade
|
2019-01-17 19:40:44 +00:00
|
|
|
import Annex.Tmp
|
2013-08-04 17:07:55 +00:00
|
|
|
import Utility.UserInfo
|
2019-05-23 16:13:56 +00:00
|
|
|
import Annex.Perms
|
2021-06-22 13:46:06 +00:00
|
|
|
#ifndef mingw32_HOST_OS
|
2023-03-21 22:22:41 +00:00
|
|
|
import Utility.ThreadScheduler
|
|
|
|
import qualified Utility.RawFilePath as R
|
2013-08-04 17:07:55 +00:00
|
|
|
import Utility.FileMode
|
2015-08-19 16:36:17 +00:00
|
|
|
import System.Posix.User
|
2015-11-13 17:35:29 +00:00
|
|
|
import qualified Utility.LockFile.Posix as Posix
|
2013-08-04 17:07:55 +00:00
|
|
|
#endif
|
2012-08-03 14:45:18 +00:00
|
|
|
|
2019-05-23 16:51:01 +00:00
|
|
|
import qualified Data.Map as M
|
Added annex.freezecontent-command and annex.thawcontent-command configs
Freeze first sets the file perms, and then runs
freezecontent-command. Thaw runs thawcontent-command before
restoring file permissions. This is in case the freeze command
prevents changing file perms, as eg setting a file immutable does.
Also, changing file perms tends to mess up previously set ACLs.
git-annex init's probe for crippled filesystem uses them, so if file perms
don't work, but freezecontent-command manages to prevent write to a file,
it won't treat the filesystem as crippled.
When the the filesystem has been probed as crippled, the hooks are not
used, because there seems to be no point then; git-annex won't be relying
on locking annex objects down. Also, this avoids them being run when the
file perms have not been changed, in case they somehow rely on
git-annex's setting of the file perms in order to work.
Sponsored-by: Dartmouth College's Datalad project
2021-06-21 18:40:20 +00:00
|
|
|
import Control.Monad.IO.Class (MonadIO)
|
2020-11-24 16:35:09 +00:00
|
|
|
#ifndef mingw32_HOST_OS
|
2023-03-21 22:22:41 +00:00
|
|
|
import System.PosixCompat.Files (ownerReadMode, isNamedPipe)
|
2020-11-24 16:35:09 +00:00
|
|
|
import Data.Either
|
2020-10-30 17:31:35 +00:00
|
|
|
import qualified System.FilePath.ByteString as P
|
2020-06-05 15:03:21 +00:00
|
|
|
import Control.Concurrent.Async
|
2020-11-24 16:35:09 +00:00
|
|
|
#endif
|
2019-05-23 16:51:01 +00:00
|
|
|
|
2022-08-29 17:52:58 +00:00
|
|
|
data InitializeAllowed = InitializeAllowed
|
|
|
|
|
|
|
|
checkInitializeAllowed :: (InitializeAllowed -> Annex a) -> Annex a
|
deal with git's changes for CVE-2022-24765
Deal with git's recent changes to fix CVE-2022-24765, which prevent using
git in a repository owned by someone else.
That makes git config --list not list the repo's configs, only global
configs. So annex.uuid and annex.version are not visible to git-annex.
It displayed a message about that, which is not right for this situation.
Detect the situation and display a better message, similar to the one other
git commands display.
Also, git-annex init when run in that situation would overwrite annex.uuid
with a new one, since it couldn't see the old one. Add a check to prevent
it running too in this situation. It may be that this fix has security
implications, if a config set by the malicious user who owns the repo
causes git or git-annex to run code. I don't think any git-annex configs
get run by git-annex init. It may be that some git config of a command
does get run by one of the git commands that git-annex init runs. ("git
status" is the command that prompted the CVE-2022-24765, since
core.fsmonitor can cause it to run a command). Since I don't know how
to exploit this, I'm not treating it as a security fix for now.
Note that passing --git-dir makes git bypass the security check. git-annex
does pass --git-dir to most calls to git, which it does to avoid needing
chdir to the directory containing a git repository when accessing a remote.
So, it's possible that somewhere in git-annex it gets as far as running git
with --git-dir, and git reads some configs that are unsafe (what
CVE-2022-24765 is about). This seems unlikely, it would have to be part of
git-annex that runs in git repositories that have no (visible) annex.uuid,
and git-annex init is the only one that I can think of that then goes on to
run git, as discussed earlier. But I've not fully ruled out there being
others..
The git developers seem mostly worried about "git status" or a similar
command implicitly run by a shell prompt, not an explicit use of git in
such a repository. For example, Ævar Arnfjörð Bjarma wrote:
> * There are other bits of config that also point to executable things,
> e.g. core.editor, aliases etc, but nothing has been found yet that
> provides the "at a distance" effect that the core.fsmonitor vector
> does.
>
> I.e. a user is unlikely to go to /tmp/some-crap/here and run "git
> commit", but they (or their shell prompt) might run "git status", and
> if you have a /tmp/.git ...
Sponsored-by: Jarkko Kniivilä on Patreon
2022-05-20 18:18:19 +00:00
|
|
|
checkInitializeAllowed a = guardSafeToUseRepo $ noAnnexFileContent' >>= \case
|
2022-08-17 17:07:14 +00:00
|
|
|
Nothing -> do
|
|
|
|
checkSqliteWorks
|
2022-08-29 17:52:58 +00:00
|
|
|
a InitializeAllowed
|
2019-02-05 18:43:23 +00:00
|
|
|
Just noannexmsg -> do
|
|
|
|
warning "Initialization prevented by .noannex file (remove the file to override)"
|
|
|
|
unless (null noannexmsg) $
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
warning (UnquotedString noannexmsg)
|
2019-02-05 18:43:23 +00:00
|
|
|
giveup "Not initialized."
|
2017-12-13 18:34:32 +00:00
|
|
|
|
2020-12-14 16:32:21 +00:00
|
|
|
initializeAllowed :: Annex Bool
|
|
|
|
initializeAllowed = isNothing <$> noAnnexFileContent'
|
2020-06-16 17:24:00 +00:00
|
|
|
|
2020-12-14 16:32:21 +00:00
|
|
|
noAnnexFileContent' :: Annex (Maybe String)
|
|
|
|
noAnnexFileContent' = inRepo $
|
|
|
|
noAnnexFileContent . fmap fromRawFilePath . Git.repoWorkTree
|
2020-06-16 17:24:00 +00:00
|
|
|
|
2019-01-01 19:39:45 +00:00
|
|
|
genDescription :: Maybe String -> Annex UUIDDesc
|
|
|
|
genDescription (Just d) = return $ UUIDDesc $ encodeBS d
|
2012-08-03 14:45:18 +00:00
|
|
|
genDescription Nothing = do
|
2020-10-30 17:31:35 +00:00
|
|
|
reldir <- liftIO . relHome . fromRawFilePath
|
|
|
|
=<< liftIO . absPath
|
2019-12-09 17:49:05 +00:00
|
|
|
=<< fromRepo Git.repoPath
|
2013-04-03 07:52:41 +00:00
|
|
|
hostname <- fromMaybe "" <$> liftIO getHostname
|
2012-08-03 14:45:18 +00:00
|
|
|
let at = if null hostname then "" else "@"
|
2016-06-08 19:04:15 +00:00
|
|
|
v <- liftIO myUserName
|
2019-01-01 19:39:45 +00:00
|
|
|
return $ UUIDDesc $ encodeBS $ concat $ case v of
|
2016-06-08 19:04:15 +00:00
|
|
|
Right username -> [username, at, hostname, ":", reldir]
|
|
|
|
Left _ -> [hostname, ":", reldir]
|
2012-08-03 14:45:18 +00:00
|
|
|
|
remove dead nodes when loading the cluster log
This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.
One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.
Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.
Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.
That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.
Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.
Other commands, like git-annex-shell currently don't run annexStartup
either.
So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
2024-06-16 18:35:07 +00:00
|
|
|
initialize :: Annex () -> Maybe String -> Maybe RepoVersion -> Annex ()
|
|
|
|
initialize startupannex mdescription mversion = checkInitializeAllowed $ \initallowed -> do
|
2015-09-09 17:56:37 +00:00
|
|
|
{- Has to come before any commits are made as the shared
|
|
|
|
- clone heuristic expects no local objects. -}
|
|
|
|
sharedclone <- checkSharedClone
|
|
|
|
|
2015-04-20 18:01:41 +00:00
|
|
|
{- This will make the first commit to git, so ensure git is set up
|
|
|
|
- properly to allow commits when running it. -}
|
|
|
|
ensureCommit $ Annex.Branch.create
|
|
|
|
|
2011-08-17 18:14:43 +00:00
|
|
|
prepUUID
|
remove dead nodes when loading the cluster log
This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.
One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.
Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.
Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.
That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.
Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.
Other commands, like git-annex-shell currently don't run annexStartup
either.
So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
2024-06-16 18:35:07 +00:00
|
|
|
initialize' startupannex mversion initallowed
|
2015-09-09 17:56:37 +00:00
|
|
|
|
|
|
|
initSharedClone sharedclone
|
2021-07-30 22:36:03 +00:00
|
|
|
|
2014-04-16 00:13:35 +00:00
|
|
|
u <- getUUID
|
2023-07-05 18:43:02 +00:00
|
|
|
when (u == NoUUID) $
|
|
|
|
giveup "Failed to read annex.uuid from git config after setting it. This should never happen. Please file a bug report."
|
remove dead nodes when loading the cluster log
This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.
One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.
Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.
Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.
That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.
Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.
Other commands, like git-annex-shell currently don't run annexStartup
either.
So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
2024-06-16 18:35:07 +00:00
|
|
|
|
2019-05-23 16:51:01 +00:00
|
|
|
{- Avoid overwriting existing description with a default
|
|
|
|
- description. -}
|
2019-06-21 00:30:24 +00:00
|
|
|
whenM (pure (isJust mdescription) <||> not . M.member u <$> uuidDescMapRaw) $
|
2019-05-23 16:51:01 +00:00
|
|
|
describeUUID u =<< genDescription mdescription
|
2014-04-16 00:13:35 +00:00
|
|
|
|
2015-11-13 17:35:29 +00:00
|
|
|
-- Everything except for uuid setup, shared clone setup, and initial
|
|
|
|
-- description.
|
remove dead nodes when loading the cluster log
This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.
One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.
Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.
Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.
That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.
Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.
Other commands, like git-annex-shell currently don't run annexStartup
either.
So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
2024-06-16 18:35:07 +00:00
|
|
|
initialize' :: Annex () -> Maybe RepoVersion -> InitializeAllowed -> Annex ()
|
|
|
|
initialize' startupannex mversion _initallowed = do
|
2015-11-13 17:35:29 +00:00
|
|
|
checkLockSupport
|
2013-04-04 17:14:55 +00:00
|
|
|
checkFifoSupport
|
2019-08-30 17:54:57 +00:00
|
|
|
checkCrippledFileSystem
|
2017-02-17 18:04:43 +00:00
|
|
|
unlessM isBareRepo $ do
|
2013-11-05 19:29:56 +00:00
|
|
|
hookWrite preCommitHook
|
2017-02-17 18:04:43 +00:00
|
|
|
hookWrite postReceiveHook
|
2015-01-27 21:38:06 +00:00
|
|
|
setDifferences
|
2015-12-15 21:17:13 +00:00
|
|
|
unlessM (isJust <$> getVersion) $
|
2019-08-30 17:54:57 +00:00
|
|
|
setVersion (fromMaybe defaultVersion mversion)
|
2021-03-23 18:04:34 +00:00
|
|
|
supportunlocked <- annexSupportUnlocked <$> Annex.getGitConfig
|
|
|
|
if supportunlocked
|
|
|
|
then configureSmudgeFilter
|
|
|
|
else deconfigureSmudgeFilter
|
2019-08-30 17:54:57 +00:00
|
|
|
unlessM isBareRepo $ do
|
|
|
|
hookWrite postCheckoutHook
|
|
|
|
hookWrite postMergeHook
|
2021-07-30 22:36:03 +00:00
|
|
|
|
2018-12-03 16:57:23 +00:00
|
|
|
AdjustedBranch.checkAdjustedClone >>= \case
|
|
|
|
AdjustedBranch.InAdjustedClone -> return ()
|
|
|
|
AdjustedBranch.NotInAdjustedClone ->
|
2016-06-02 20:59:15 +00:00
|
|
|
ifM (crippledFileSystem <&&> (not <$> isBareRepo))
|
2019-08-26 18:52:55 +00:00
|
|
|
( AdjustedBranch.adjustToCrippledFileSystem
|
2016-06-02 19:58:22 +00:00
|
|
|
-- Handle case where this repo was cloned from a
|
|
|
|
-- direct mode repo
|
2016-06-02 20:59:15 +00:00
|
|
|
, unlessM isBareRepo
|
2019-08-26 19:52:19 +00:00
|
|
|
Direct.switchHEADBack
|
2016-06-02 19:58:22 +00:00
|
|
|
)
|
2017-02-27 20:08:16 +00:00
|
|
|
propigateSecureHashesOnly
|
2015-12-09 19:42:16 +00:00
|
|
|
createInodeSentinalFile False
|
2020-03-09 18:45:14 +00:00
|
|
|
fixupUnusualReposAfterInit
|
2011-08-17 18:14:43 +00:00
|
|
|
|
remove dead nodes when loading the cluster log
This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.
One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.
Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.
Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.
That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.
Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.
Other commands, like git-annex-shell currently don't run annexStartup
either.
So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
2024-06-16 18:35:07 +00:00
|
|
|
-- This is usually run at Annex startup, but when git-annex was
|
|
|
|
-- not already initialized, it will not yet have run.
|
|
|
|
startupannex
|
|
|
|
|
2011-08-17 18:14:43 +00:00
|
|
|
uninitialize :: Annex ()
|
2012-04-27 16:21:38 +00:00
|
|
|
uninitialize = do
|
2020-10-19 17:13:49 +00:00
|
|
|
-- Remove hooks that are written when initializing.
|
|
|
|
hookUnWrite preCommitHook
|
|
|
|
hookUnWrite postReceiveHook
|
|
|
|
hookUnWrite postCheckoutHook
|
|
|
|
hookUnWrite postMergeHook
|
2019-10-08 18:34:00 +00:00
|
|
|
deconfigureSmudgeFilter
|
2012-04-27 16:21:38 +00:00
|
|
|
removeRepoUUID
|
2012-10-07 20:04:03 +00:00
|
|
|
removeVersion
|
2011-08-17 18:14:43 +00:00
|
|
|
|
2020-12-14 17:17:43 +00:00
|
|
|
{- Gets the version that the repo is initialized with.
|
|
|
|
-
|
|
|
|
- To make sure the repo is fully initialized, also checks that it has a
|
|
|
|
- uuid configured. In the unusual case where one is set and the other is
|
|
|
|
- not, errors out to avoid running in an inconsistent state.
|
|
|
|
-}
|
|
|
|
getInitializedVersion :: Annex (Maybe RepoVersion)
|
|
|
|
getInitializedVersion = do
|
|
|
|
um <- (\u -> if u == NoUUID then Nothing else Just u) <$> getUUID
|
|
|
|
vm <- getVersion
|
|
|
|
case (um, vm) of
|
|
|
|
(Just _, Just v) -> return (Just v)
|
|
|
|
(Nothing, Nothing) -> return Nothing
|
|
|
|
(Just _, Nothing) -> onemissing "annex.version" "annex.uuid"
|
|
|
|
(Nothing, Just _) -> onemissing "annex.uuid" "annex.version"
|
|
|
|
where
|
|
|
|
onemissing missing have = giveup $ unwords
|
|
|
|
[ "This repository has " ++ have ++ " set,"
|
|
|
|
, "but " ++ missing ++ " is not set. Perhaps that"
|
|
|
|
, "git config was lost. Cannot use the repository"
|
|
|
|
, "in this state; set back " ++ missing ++ " to fix this."
|
|
|
|
]
|
|
|
|
|
2011-08-17 22:38:26 +00:00
|
|
|
{- Will automatically initialize if there is already a git-annex
|
2012-12-13 04:45:27 +00:00
|
|
|
- branch from somewhere. Otherwise, require a manual init
|
2017-02-11 09:38:49 +00:00
|
|
|
- to avoid git-annex accidentally being run in git
|
2013-11-05 20:42:59 +00:00
|
|
|
- repos that did not intend to use it.
|
|
|
|
-
|
|
|
|
- Checks repository version and handles upgrades too.
|
|
|
|
-}
|
remove dead nodes when loading the cluster log
This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.
One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.
Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.
Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.
That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.
Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.
Other commands, like git-annex-shell currently don't run annexStartup
either.
So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
2024-06-16 18:35:07 +00:00
|
|
|
ensureInitialized :: Annex () -> Annex [Remote] -> Annex ()
|
|
|
|
ensureInitialized startupannex remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade
|
2012-10-29 01:27:15 +00:00
|
|
|
where
|
2020-12-14 16:32:21 +00:00
|
|
|
needsinit = ifM autoInitializeAllowed
|
2020-05-27 15:54:39 +00:00
|
|
|
( do
|
remove dead nodes when loading the cluster log
This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.
One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.
Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.
Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.
That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.
Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.
Other commands, like git-annex-shell currently don't run annexStartup
either.
So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
2024-06-16 18:35:07 +00:00
|
|
|
tryNonAsync (initialize startupannex Nothing Nothing) >>= \case
|
2021-12-09 18:38:12 +00:00
|
|
|
Right () -> noop
|
|
|
|
Left e -> giveup $ show e ++ "\n" ++
|
|
|
|
"git-annex: automatic initialization failed due to above problems"
|
2022-09-09 18:43:43 +00:00
|
|
|
autoEnableSpecialRemotes remotelist
|
2020-05-27 15:54:39 +00:00
|
|
|
, giveup "First run: git-annex init"
|
2019-08-27 16:23:34 +00:00
|
|
|
)
|
2011-08-17 18:36:20 +00:00
|
|
|
|
2020-12-14 16:32:21 +00:00
|
|
|
{- Check if auto-initialize is allowed. -}
|
|
|
|
autoInitializeAllowed :: Annex Bool
|
2020-12-14 17:17:43 +00:00
|
|
|
autoInitializeAllowed = Annex.Branch.hasSibling <&&> objectDirNotPresent
|
|
|
|
|
|
|
|
objectDirNotPresent :: Annex Bool
|
|
|
|
objectDirNotPresent = do
|
|
|
|
d <- fromRawFilePath <$> fromRepo gitAnnexObjectDir
|
|
|
|
exists <- liftIO $ doesDirectoryExist d
|
deal with git's changes for CVE-2022-24765
Deal with git's recent changes to fix CVE-2022-24765, which prevent using
git in a repository owned by someone else.
That makes git config --list not list the repo's configs, only global
configs. So annex.uuid and annex.version are not visible to git-annex.
It displayed a message about that, which is not right for this situation.
Detect the situation and display a better message, similar to the one other
git commands display.
Also, git-annex init when run in that situation would overwrite annex.uuid
with a new one, since it couldn't see the old one. Add a check to prevent
it running too in this situation. It may be that this fix has security
implications, if a config set by the malicious user who owns the repo
causes git or git-annex to run code. I don't think any git-annex configs
get run by git-annex init. It may be that some git config of a command
does get run by one of the git commands that git-annex init runs. ("git
status" is the command that prompted the CVE-2022-24765, since
core.fsmonitor can cause it to run a command). Since I don't know how
to exploit this, I'm not treating it as a security fix for now.
Note that passing --git-dir makes git bypass the security check. git-annex
does pass --git-dir to most calls to git, which it does to avoid needing
chdir to the directory containing a git repository when accessing a remote.
So, it's possible that somewhere in git-annex it gets as far as running git
with --git-dir, and git reads some configs that are unsafe (what
CVE-2022-24765 is about). This seems unlikely, it would have to be part of
git-annex that runs in git repositories that have no (visible) annex.uuid,
and git-annex init is the only one that I can think of that then goes on to
run git, as discussed earlier. But I've not fully ruled out there being
others..
The git developers seem mostly worried about "git status" or a similar
command implicitly run by a shell prompt, not an explicit use of git in
such a repository. For example, Ævar Arnfjörð Bjarma wrote:
> * There are other bits of config that also point to executable things,
> e.g. core.editor, aliases etc, but nothing has been found yet that
> provides the "at a distance" effect that the core.fsmonitor vector
> does.
>
> I.e. a user is unlikely to go to /tmp/some-crap/here and run "git
> commit", but they (or their shell prompt) might run "git status", and
> if you have a /tmp/.git ...
Sponsored-by: Jarkko Kniivilä on Patreon
2022-05-20 18:18:19 +00:00
|
|
|
when exists $ guardSafeToUseRepo $
|
2020-12-14 17:17:43 +00:00
|
|
|
giveup $ unwords $
|
|
|
|
[ "This repository is not initialized for use"
|
|
|
|
, "by git-annex, but " ++ d ++ " exists,"
|
|
|
|
, "which indicates this repository was used by"
|
|
|
|
, "git-annex before, and may have lost its"
|
|
|
|
, "annex.uuid and annex.version configs. Either"
|
|
|
|
, "set back missing configs, or run git-annex init"
|
|
|
|
, "to initialize with a new uuid."
|
|
|
|
]
|
|
|
|
return (not exists)
|
2020-12-14 16:32:21 +00:00
|
|
|
|
deal with git's changes for CVE-2022-24765
Deal with git's recent changes to fix CVE-2022-24765, which prevent using
git in a repository owned by someone else.
That makes git config --list not list the repo's configs, only global
configs. So annex.uuid and annex.version are not visible to git-annex.
It displayed a message about that, which is not right for this situation.
Detect the situation and display a better message, similar to the one other
git commands display.
Also, git-annex init when run in that situation would overwrite annex.uuid
with a new one, since it couldn't see the old one. Add a check to prevent
it running too in this situation. It may be that this fix has security
implications, if a config set by the malicious user who owns the repo
causes git or git-annex to run code. I don't think any git-annex configs
get run by git-annex init. It may be that some git config of a command
does get run by one of the git commands that git-annex init runs. ("git
status" is the command that prompted the CVE-2022-24765, since
core.fsmonitor can cause it to run a command). Since I don't know how
to exploit this, I'm not treating it as a security fix for now.
Note that passing --git-dir makes git bypass the security check. git-annex
does pass --git-dir to most calls to git, which it does to avoid needing
chdir to the directory containing a git repository when accessing a remote.
So, it's possible that somewhere in git-annex it gets as far as running git
with --git-dir, and git reads some configs that are unsafe (what
CVE-2022-24765 is about). This seems unlikely, it would have to be part of
git-annex that runs in git repositories that have no (visible) annex.uuid,
and git-annex init is the only one that I can think of that then goes on to
run git, as discussed earlier. But I've not fully ruled out there being
others..
The git developers seem mostly worried about "git status" or a similar
command implicitly run by a shell prompt, not an explicit use of git in
such a repository. For example, Ævar Arnfjörð Bjarma wrote:
> * There are other bits of config that also point to executable things,
> e.g. core.editor, aliases etc, but nothing has been found yet that
> provides the "at a distance" effect that the core.fsmonitor vector
> does.
>
> I.e. a user is unlikely to go to /tmp/some-crap/here and run "git
> commit", but they (or their shell prompt) might run "git status", and
> if you have a /tmp/.git ...
Sponsored-by: Jarkko Kniivilä on Patreon
2022-05-20 18:18:19 +00:00
|
|
|
guardSafeToUseRepo :: Annex a -> Annex a
|
2022-05-31 16:17:27 +00:00
|
|
|
guardSafeToUseRepo a = ifM (inRepo Git.Config.checkRepoConfigInaccessible)
|
|
|
|
( do
|
|
|
|
repopath <- fromRepo Git.repoPath
|
|
|
|
p <- liftIO $ absPath repopath
|
|
|
|
giveup $ unlines $
|
deal with git's changes for CVE-2022-24765
Deal with git's recent changes to fix CVE-2022-24765, which prevent using
git in a repository owned by someone else.
That makes git config --list not list the repo's configs, only global
configs. So annex.uuid and annex.version are not visible to git-annex.
It displayed a message about that, which is not right for this situation.
Detect the situation and display a better message, similar to the one other
git commands display.
Also, git-annex init when run in that situation would overwrite annex.uuid
with a new one, since it couldn't see the old one. Add a check to prevent
it running too in this situation. It may be that this fix has security
implications, if a config set by the malicious user who owns the repo
causes git or git-annex to run code. I don't think any git-annex configs
get run by git-annex init. It may be that some git config of a command
does get run by one of the git commands that git-annex init runs. ("git
status" is the command that prompted the CVE-2022-24765, since
core.fsmonitor can cause it to run a command). Since I don't know how
to exploit this, I'm not treating it as a security fix for now.
Note that passing --git-dir makes git bypass the security check. git-annex
does pass --git-dir to most calls to git, which it does to avoid needing
chdir to the directory containing a git repository when accessing a remote.
So, it's possible that somewhere in git-annex it gets as far as running git
with --git-dir, and git reads some configs that are unsafe (what
CVE-2022-24765 is about). This seems unlikely, it would have to be part of
git-annex that runs in git repositories that have no (visible) annex.uuid,
and git-annex init is the only one that I can think of that then goes on to
run git, as discussed earlier. But I've not fully ruled out there being
others..
The git developers seem mostly worried about "git status" or a similar
command implicitly run by a shell prompt, not an explicit use of git in
such a repository. For example, Ævar Arnfjörð Bjarma wrote:
> * There are other bits of config that also point to executable things,
> e.g. core.editor, aliases etc, but nothing has been found yet that
> provides the "at a distance" effect that the core.fsmonitor vector
> does.
>
> I.e. a user is unlikely to go to /tmp/some-crap/here and run "git
> commit", but they (or their shell prompt) might run "git status", and
> if you have a /tmp/.git ...
Sponsored-by: Jarkko Kniivilä on Patreon
2022-05-20 18:18:19 +00:00
|
|
|
[ "Git refuses to operate in this repository,"
|
|
|
|
, "probably because it is owned by someone else."
|
|
|
|
, ""
|
|
|
|
-- This mirrors git's wording.
|
|
|
|
, "To add an exception for this directory, call:"
|
2022-05-31 16:17:27 +00:00
|
|
|
, "\tgit config --global --add safe.directory " ++ fromRawFilePath p
|
deal with git's changes for CVE-2022-24765
Deal with git's recent changes to fix CVE-2022-24765, which prevent using
git in a repository owned by someone else.
That makes git config --list not list the repo's configs, only global
configs. So annex.uuid and annex.version are not visible to git-annex.
It displayed a message about that, which is not right for this situation.
Detect the situation and display a better message, similar to the one other
git commands display.
Also, git-annex init when run in that situation would overwrite annex.uuid
with a new one, since it couldn't see the old one. Add a check to prevent
it running too in this situation. It may be that this fix has security
implications, if a config set by the malicious user who owns the repo
causes git or git-annex to run code. I don't think any git-annex configs
get run by git-annex init. It may be that some git config of a command
does get run by one of the git commands that git-annex init runs. ("git
status" is the command that prompted the CVE-2022-24765, since
core.fsmonitor can cause it to run a command). Since I don't know how
to exploit this, I'm not treating it as a security fix for now.
Note that passing --git-dir makes git bypass the security check. git-annex
does pass --git-dir to most calls to git, which it does to avoid needing
chdir to the directory containing a git repository when accessing a remote.
So, it's possible that somewhere in git-annex it gets as far as running git
with --git-dir, and git reads some configs that are unsafe (what
CVE-2022-24765 is about). This seems unlikely, it would have to be part of
git-annex that runs in git repositories that have no (visible) annex.uuid,
and git-annex init is the only one that I can think of that then goes on to
run git, as discussed earlier. But I've not fully ruled out there being
others..
The git developers seem mostly worried about "git status" or a similar
command implicitly run by a shell prompt, not an explicit use of git in
such a repository. For example, Ævar Arnfjörð Bjarma wrote:
> * There are other bits of config that also point to executable things,
> e.g. core.editor, aliases etc, but nothing has been found yet that
> provides the "at a distance" effect that the core.fsmonitor vector
> does.
>
> I.e. a user is unlikely to go to /tmp/some-crap/here and run "git
> commit", but they (or their shell prompt) might run "git status", and
> if you have a /tmp/.git ...
Sponsored-by: Jarkko Kniivilä on Patreon
2022-05-20 18:18:19 +00:00
|
|
|
]
|
2022-05-31 16:17:27 +00:00
|
|
|
, a
|
|
|
|
)
|
deal with git's changes for CVE-2022-24765
Deal with git's recent changes to fix CVE-2022-24765, which prevent using
git in a repository owned by someone else.
That makes git config --list not list the repo's configs, only global
configs. So annex.uuid and annex.version are not visible to git-annex.
It displayed a message about that, which is not right for this situation.
Detect the situation and display a better message, similar to the one other
git commands display.
Also, git-annex init when run in that situation would overwrite annex.uuid
with a new one, since it couldn't see the old one. Add a check to prevent
it running too in this situation. It may be that this fix has security
implications, if a config set by the malicious user who owns the repo
causes git or git-annex to run code. I don't think any git-annex configs
get run by git-annex init. It may be that some git config of a command
does get run by one of the git commands that git-annex init runs. ("git
status" is the command that prompted the CVE-2022-24765, since
core.fsmonitor can cause it to run a command). Since I don't know how
to exploit this, I'm not treating it as a security fix for now.
Note that passing --git-dir makes git bypass the security check. git-annex
does pass --git-dir to most calls to git, which it does to avoid needing
chdir to the directory containing a git repository when accessing a remote.
So, it's possible that somewhere in git-annex it gets as far as running git
with --git-dir, and git reads some configs that are unsafe (what
CVE-2022-24765 is about). This seems unlikely, it would have to be part of
git-annex that runs in git repositories that have no (visible) annex.uuid,
and git-annex init is the only one that I can think of that then goes on to
run git, as discussed earlier. But I've not fully ruled out there being
others..
The git developers seem mostly worried about "git status" or a similar
command implicitly run by a shell prompt, not an explicit use of git in
such a repository. For example, Ævar Arnfjörð Bjarma wrote:
> * There are other bits of config that also point to executable things,
> e.g. core.editor, aliases etc, but nothing has been found yet that
> provides the "at a distance" effect that the core.fsmonitor vector
> does.
>
> I.e. a user is unlikely to go to /tmp/some-crap/here and run "git
> commit", but they (or their shell prompt) might run "git status", and
> if you have a /tmp/.git ...
Sponsored-by: Jarkko Kniivilä on Patreon
2022-05-20 18:18:19 +00:00
|
|
|
|
2020-12-14 16:32:21 +00:00
|
|
|
{- Initialize if it can do so automatically. Avoids failing if it cannot.
|
2020-06-16 17:24:00 +00:00
|
|
|
-
|
|
|
|
- Checks repository version and handles upgrades too.
|
|
|
|
-}
|
remove dead nodes when loading the cluster log
This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.
One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.
Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.
Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.
That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.
Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.
Other commands, like git-annex-shell currently don't run annexStartup
either.
So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
2024-06-16 18:35:07 +00:00
|
|
|
autoInitialize :: Annex () -> Annex [Remote] -> Annex ()
|
2024-05-08 20:55:45 +00:00
|
|
|
autoInitialize = autoInitialize' autoInitializeAllowed
|
|
|
|
|
remove dead nodes when loading the cluster log
This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.
One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.
Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.
Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.
That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.
Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.
Other commands, like git-annex-shell currently don't run annexStartup
either.
So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
2024-06-16 18:35:07 +00:00
|
|
|
autoInitialize' :: Annex Bool -> Annex () -> Annex [Remote] -> Annex ()
|
|
|
|
autoInitialize' check startupannex remotelist =
|
|
|
|
getInitializedVersion >>= maybe needsinit checkUpgrade
|
2020-06-16 17:24:00 +00:00
|
|
|
where
|
2020-12-14 16:32:21 +00:00
|
|
|
needsinit =
|
2024-05-08 20:55:45 +00:00
|
|
|
whenM (initializeAllowed <&&> check) $ do
|
remove dead nodes when loading the cluster log
This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.
One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.
Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.
Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.
That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.
Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.
Other commands, like git-annex-shell currently don't run annexStartup
either.
So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
2024-06-16 18:35:07 +00:00
|
|
|
initialize startupannex Nothing Nothing
|
2022-09-09 18:43:43 +00:00
|
|
|
autoEnableSpecialRemotes remotelist
|
2020-06-16 17:24:00 +00:00
|
|
|
|
2024-04-06 13:50:58 +00:00
|
|
|
{- Checks if a repository is initialized. Does not check version for upgrade. -}
|
2012-07-31 20:19:24 +00:00
|
|
|
isInitialized :: Annex Bool
|
|
|
|
isInitialized = maybe Annex.Branch.hasSibling (const $ return True) =<< getVersion
|
|
|
|
|
2013-05-14 00:48:44 +00:00
|
|
|
{- A crippled filesystem is one that does not allow making symlinks,
|
|
|
|
- or removing write access from files. -}
|
2013-02-15 18:17:31 +00:00
|
|
|
probeCrippledFileSystem :: Annex Bool
|
2019-09-10 17:37:07 +00:00
|
|
|
probeCrippledFileSystem = withEventuallyCleanedOtherTmp $ \tmp -> do
|
Added annex.freezecontent-command and annex.thawcontent-command configs
Freeze first sets the file perms, and then runs
freezecontent-command. Thaw runs thawcontent-command before
restoring file permissions. This is in case the freeze command
prevents changing file perms, as eg setting a file immutable does.
Also, changing file perms tends to mess up previously set ACLs.
git-annex init's probe for crippled filesystem uses them, so if file perms
don't work, but freezecontent-command manages to prevent write to a file,
it won't treat the filesystem as crippled.
When the the filesystem has been probed as crippled, the hooks are not
used, because there seems to be no point then; git-annex won't be relying
on locking annex objects down. Also, this avoids them being run when the
file perms have not been changed, in case they somehow rely on
git-annex's setting of the file perms in order to work.
Sponsored-by: Dartmouth College's Datalad project
2021-06-21 18:40:20 +00:00
|
|
|
(r, warnings) <- probeCrippledFileSystem' tmp
|
2021-07-12 14:15:49 +00:00
|
|
|
(Just (freezeContent' UnShared))
|
|
|
|
(Just (thawContent' UnShared))
|
2022-02-24 17:28:31 +00:00
|
|
|
=<< hasFreezeHook
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
mapM_ (warning . UnquotedString) warnings
|
2017-09-29 18:58:23 +00:00
|
|
|
return r
|
2016-02-16 19:30:59 +00:00
|
|
|
|
Added annex.freezecontent-command and annex.thawcontent-command configs
Freeze first sets the file perms, and then runs
freezecontent-command. Thaw runs thawcontent-command before
restoring file permissions. This is in case the freeze command
prevents changing file perms, as eg setting a file immutable does.
Also, changing file perms tends to mess up previously set ACLs.
git-annex init's probe for crippled filesystem uses them, so if file perms
don't work, but freezecontent-command manages to prevent write to a file,
it won't treat the filesystem as crippled.
When the the filesystem has been probed as crippled, the hooks are not
used, because there seems to be no point then; git-annex won't be relying
on locking annex objects down. Also, this avoids them being run when the
file perms have not been changed, in case they somehow rely on
git-annex's setting of the file perms in order to work.
Sponsored-by: Dartmouth College's Datalad project
2021-06-21 18:40:20 +00:00
|
|
|
probeCrippledFileSystem'
|
|
|
|
:: (MonadIO m, MonadCatch m)
|
|
|
|
=> RawFilePath
|
|
|
|
-> Maybe (RawFilePath -> m ())
|
|
|
|
-> Maybe (RawFilePath -> m ())
|
2022-02-24 17:28:31 +00:00
|
|
|
-> Bool
|
Added annex.freezecontent-command and annex.thawcontent-command configs
Freeze first sets the file perms, and then runs
freezecontent-command. Thaw runs thawcontent-command before
restoring file permissions. This is in case the freeze command
prevents changing file perms, as eg setting a file immutable does.
Also, changing file perms tends to mess up previously set ACLs.
git-annex init's probe for crippled filesystem uses them, so if file perms
don't work, but freezecontent-command manages to prevent write to a file,
it won't treat the filesystem as crippled.
When the the filesystem has been probed as crippled, the hooks are not
used, because there seems to be no point then; git-annex won't be relying
on locking annex objects down. Also, this avoids them being run when the
file perms have not been changed, in case they somehow rely on
git-annex's setting of the file perms in order to work.
Sponsored-by: Dartmouth College's Datalad project
2021-06-21 18:40:20 +00:00
|
|
|
-> m (Bool, [String])
|
2013-08-04 17:07:55 +00:00
|
|
|
#ifdef mingw32_HOST_OS
|
2022-02-24 17:28:31 +00:00
|
|
|
probeCrippledFileSystem' _ _ _ _ = return (True, [])
|
2013-05-11 20:03:00 +00:00
|
|
|
#else
|
2022-02-24 17:28:31 +00:00
|
|
|
probeCrippledFileSystem' tmp freezecontent thawcontent hasfreezehook = do
|
Added annex.freezecontent-command and annex.thawcontent-command configs
Freeze first sets the file perms, and then runs
freezecontent-command. Thaw runs thawcontent-command before
restoring file permissions. This is in case the freeze command
prevents changing file perms, as eg setting a file immutable does.
Also, changing file perms tends to mess up previously set ACLs.
git-annex init's probe for crippled filesystem uses them, so if file perms
don't work, but freezecontent-command manages to prevent write to a file,
it won't treat the filesystem as crippled.
When the the filesystem has been probed as crippled, the hooks are not
used, because there seems to be no point then; git-annex won't be relying
on locking annex objects down. Also, this avoids them being run when the
file perms have not been changed, in case they somehow rely on
git-annex's setting of the file perms in order to work.
Sponsored-by: Dartmouth College's Datalad project
2021-06-21 18:40:20 +00:00
|
|
|
let f = tmp P.</> "gaprobe"
|
|
|
|
let f' = fromRawFilePath f
|
|
|
|
liftIO $ writeFile f' ""
|
|
|
|
r <- probe f'
|
|
|
|
void $ tryNonAsync $ (fromMaybe (liftIO . allowWrite) thawcontent) f
|
|
|
|
liftIO $ removeFile f'
|
2017-09-29 18:58:23 +00:00
|
|
|
return r
|
2013-02-14 18:10:36 +00:00
|
|
|
where
|
2017-09-29 18:58:23 +00:00
|
|
|
probe f = catchDefaultIO (True, []) $ do
|
2013-02-14 18:10:36 +00:00
|
|
|
let f2 = f ++ "2"
|
Added annex.freezecontent-command and annex.thawcontent-command configs
Freeze first sets the file perms, and then runs
freezecontent-command. Thaw runs thawcontent-command before
restoring file permissions. This is in case the freeze command
prevents changing file perms, as eg setting a file immutable does.
Also, changing file perms tends to mess up previously set ACLs.
git-annex init's probe for crippled filesystem uses them, so if file perms
don't work, but freezecontent-command manages to prevent write to a file,
it won't treat the filesystem as crippled.
When the the filesystem has been probed as crippled, the hooks are not
used, because there seems to be no point then; git-annex won't be relying
on locking annex objects down. Also, this avoids them being run when the
file perms have not been changed, in case they somehow rely on
git-annex's setting of the file perms in order to work.
Sponsored-by: Dartmouth College's Datalad project
2021-06-21 18:40:20 +00:00
|
|
|
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f2)
|
2023-03-01 19:55:58 +00:00
|
|
|
liftIO $ R.createSymbolicLink (toRawFilePath f) (toRawFilePath f2)
|
Added annex.freezecontent-command and annex.thawcontent-command configs
Freeze first sets the file perms, and then runs
freezecontent-command. Thaw runs thawcontent-command before
restoring file permissions. This is in case the freeze command
prevents changing file perms, as eg setting a file immutable does.
Also, changing file perms tends to mess up previously set ACLs.
git-annex init's probe for crippled filesystem uses them, so if file perms
don't work, but freezecontent-command manages to prevent write to a file,
it won't treat the filesystem as crippled.
When the the filesystem has been probed as crippled, the hooks are not
used, because there seems to be no point then; git-annex won't be relying
on locking annex objects down. Also, this avoids them being run when the
file perms have not been changed, in case they somehow rely on
git-annex's setting of the file perms in order to work.
Sponsored-by: Dartmouth College's Datalad project
2021-06-21 18:40:20 +00:00
|
|
|
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f2)
|
|
|
|
(fromMaybe (liftIO . preventWrite) freezecontent) (toRawFilePath f)
|
2021-09-01 14:27:28 +00:00
|
|
|
-- Should be unable to write to the file (unless
|
|
|
|
-- running as root). But some crippled
|
|
|
|
-- filesystems ignore write bit removals or ignore
|
|
|
|
-- permissions entirely.
|
2022-02-24 17:28:31 +00:00
|
|
|
ifM ((== Just False) <$> liftIO (checkContentWritePerm' UnShared (toRawFilePath f) Nothing hasfreezehook))
|
2021-09-01 14:27:28 +00:00
|
|
|
( return (True, ["Filesystem does not allow removing write bit from files."])
|
|
|
|
, liftIO $ ifM ((== 0) <$> getRealUserID)
|
|
|
|
( return (False, [])
|
|
|
|
, do
|
|
|
|
r <- catchBoolIO $ do
|
|
|
|
writeFile f "2"
|
|
|
|
return True
|
|
|
|
if r
|
|
|
|
then return (True, ["Filesystem allows writing to files whose write bit is not set."])
|
|
|
|
else return (False, [])
|
|
|
|
)
|
2015-08-19 16:36:17 +00:00
|
|
|
)
|
2013-05-11 20:03:00 +00:00
|
|
|
#endif
|
2013-02-15 18:17:31 +00:00
|
|
|
|
2019-08-30 17:54:57 +00:00
|
|
|
checkCrippledFileSystem :: Annex ()
|
|
|
|
checkCrippledFileSystem = whenM probeCrippledFileSystem $ do
|
2013-02-15 22:16:50 +00:00
|
|
|
warning "Detected a crippled filesystem."
|
2013-02-15 18:17:31 +00:00
|
|
|
setCrippledFileSystem True
|
2013-05-14 00:48:44 +00:00
|
|
|
|
Added annex.freezecontent-command and annex.thawcontent-command configs
Freeze first sets the file perms, and then runs
freezecontent-command. Thaw runs thawcontent-command before
restoring file permissions. This is in case the freeze command
prevents changing file perms, as eg setting a file immutable does.
Also, changing file perms tends to mess up previously set ACLs.
git-annex init's probe for crippled filesystem uses them, so if file perms
don't work, but freezecontent-command manages to prevent write to a file,
it won't treat the filesystem as crippled.
When the the filesystem has been probed as crippled, the hooks are not
used, because there seems to be no point then; git-annex won't be relying
on locking annex objects down. Also, this avoids them being run when the
file perms have not been changed, in case they somehow rely on
git-annex's setting of the file perms in order to work.
Sponsored-by: Dartmouth College's Datalad project
2021-06-21 18:40:20 +00:00
|
|
|
{- Normally git disables core.symlinks itself when the:w
|
|
|
|
-
|
2016-05-10 18:42:57 +00:00
|
|
|
- filesystem does not support them. But, even if symlinks are
|
|
|
|
- supported, we don't use them by default in a crippled
|
|
|
|
- filesystem. -}
|
2013-05-14 18:18:34 +00:00
|
|
|
whenM (coreSymlinks <$> Annex.getGitConfig) $ do
|
|
|
|
warning "Disabling core.symlinks."
|
2019-12-02 14:57:09 +00:00
|
|
|
setConfig "core.symlinks"
|
2013-05-14 18:18:34 +00:00
|
|
|
(Git.Config.boolConfig False)
|
|
|
|
|
2015-11-13 17:35:29 +00:00
|
|
|
probeLockSupport :: Annex Bool
|
|
|
|
#ifdef mingw32_HOST_OS
|
2020-06-05 15:03:21 +00:00
|
|
|
probeLockSupport = return True
|
2015-11-13 17:35:29 +00:00
|
|
|
#else
|
2020-06-05 15:03:21 +00:00
|
|
|
probeLockSupport = withEventuallyCleanedOtherTmp $ \tmp -> do
|
2020-10-30 17:31:35 +00:00
|
|
|
let f = tmp P.</> "lockprobe"
|
2020-06-05 15:03:21 +00:00
|
|
|
mode <- annexFileMode
|
2020-12-02 18:57:43 +00:00
|
|
|
annexrunner <- Annex.makeRunner
|
|
|
|
liftIO $ withAsync (warnstall annexrunner) (const (go f mode))
|
2020-06-05 15:03:21 +00:00
|
|
|
where
|
|
|
|
go f mode = do
|
2020-10-30 17:31:35 +00:00
|
|
|
removeWhenExistsWith R.removeLink f
|
2020-06-09 17:48:48 +00:00
|
|
|
let locktest = bracket
|
|
|
|
(Posix.lockExclusive (Just mode) f)
|
|
|
|
Posix.dropLock
|
|
|
|
(const noop)
|
2020-06-05 15:03:21 +00:00
|
|
|
ok <- isRight <$> tryNonAsync locktest
|
2020-10-30 17:31:35 +00:00
|
|
|
removeWhenExistsWith R.removeLink f
|
2020-06-05 15:03:21 +00:00
|
|
|
return ok
|
|
|
|
|
2020-12-02 18:57:43 +00:00
|
|
|
warnstall annexrunner = do
|
2020-06-05 15:03:21 +00:00
|
|
|
threadDelaySeconds (Seconds 10)
|
2020-12-02 18:57:43 +00:00
|
|
|
annexrunner $ do
|
|
|
|
warning "Probing the filesystem for POSIX fcntl lock support is taking a long time."
|
|
|
|
warning "(Setting annex.pidlock will avoid this probe.)"
|
2015-11-13 17:35:29 +00:00
|
|
|
#endif
|
|
|
|
|
2013-04-04 17:14:55 +00:00
|
|
|
probeFifoSupport :: Annex Bool
|
|
|
|
probeFifoSupport = do
|
2013-08-04 17:07:55 +00:00
|
|
|
#ifdef mingw32_HOST_OS
|
2013-05-11 20:03:00 +00:00
|
|
|
return False
|
|
|
|
#else
|
2019-09-10 17:37:07 +00:00
|
|
|
withEventuallyCleanedOtherTmp $ \tmp -> do
|
2020-10-30 17:31:35 +00:00
|
|
|
let f = tmp P.</> "gaprobe"
|
|
|
|
let f2 = tmp P.</> "gaprobe2"
|
2019-01-17 19:40:44 +00:00
|
|
|
liftIO $ do
|
2020-10-30 17:31:35 +00:00
|
|
|
removeWhenExistsWith R.removeLink f
|
|
|
|
removeWhenExistsWith R.removeLink f2
|
2019-01-17 19:40:44 +00:00
|
|
|
ms <- tryIO $ do
|
2023-03-01 19:55:58 +00:00
|
|
|
R.createNamedPipe f ownerReadMode
|
2020-10-30 17:31:35 +00:00
|
|
|
R.createLink f f2
|
|
|
|
R.getFileStatus f
|
|
|
|
removeWhenExistsWith R.removeLink f
|
|
|
|
removeWhenExistsWith R.removeLink f2
|
2019-01-17 19:40:44 +00:00
|
|
|
return $ either (const False) isNamedPipe ms
|
2013-05-11 20:03:00 +00:00
|
|
|
#endif
|
2013-04-04 17:14:55 +00:00
|
|
|
|
2015-11-13 17:35:29 +00:00
|
|
|
checkLockSupport :: Annex ()
|
2020-06-05 15:12:16 +00:00
|
|
|
checkLockSupport =
|
|
|
|
unlessM (annexPidLock <$> Annex.getGitConfig) $
|
|
|
|
unlessM probeLockSupport $ do
|
|
|
|
warning "Detected a filesystem without POSIX fcntl lock support."
|
|
|
|
warning "Enabling annex.pidlock."
|
|
|
|
setConfig (annexConfig "pidlock") (Git.Config.boolConfig True)
|
2015-11-13 17:35:29 +00:00
|
|
|
|
2013-04-04 17:14:55 +00:00
|
|
|
checkFifoSupport :: Annex ()
|
|
|
|
checkFifoSupport = unlessM probeFifoSupport $ do
|
|
|
|
warning "Detected a filesystem without fifo support."
|
|
|
|
warning "Disabling ssh connection caching."
|
|
|
|
setConfig (annexConfig "sshcaching") (Git.Config.boolConfig False)
|
work around lack of receive.denyCurrentBranch in direct mode
Now that direct mode sets core.bare=true, git's normal prohibition about
pushing into the currently checked out branch doesn't work.
A simple fix for this would be an update hook which blocks the pushes..
but git hooks must be executable, and git-annex needs to be usable on eg,
FAT, which lacks x bits.
Instead, enabling direct mode switches the branch (eg master) to a special
purpose branch (eg annex/direct/master). This branch is not pushed when
syncing; instead any changes that git annex sync commits get written to
master, and it's pushed (along with synced/master) to the remote.
Note that initialization has been changed to always call setDirect,
even if it's just setDirect False for indirect mode. This is needed because
if the user has just cloned a direct mode repo, that nothing has synced
with before, it may have no master branch, and only a annex/direct/master.
Resulting in that branch being checked out locally too. Calling setDirect False
for indirect mode moves back out of this branch, to a new master branch,
and ensures that a manual "git push" doesn't push changes directly to
the annex/direct/master of the remote. (It's possible that the user
makes a commit w/o using git-annex and pushes it, but nothing I can do
about that really.)
This commit was sponsored by Jonathan Harrington.
2013-11-06 01:08:31 +00:00
|
|
|
|
2022-08-17 17:07:14 +00:00
|
|
|
{- Sqlite needs the filesystem to support range locking. Some like CIFS
|
|
|
|
- do not, which will cause sqlite to fail with ErrorBusy. -}
|
|
|
|
checkSqliteWorks :: Annex ()
|
|
|
|
checkSqliteWorks = do
|
|
|
|
u <- getUUID
|
|
|
|
tryNonAsync (Database.Fsck.openDb u >>= Database.Fsck.closeDb) >>= \case
|
|
|
|
Right () -> return ()
|
|
|
|
Left e -> do
|
|
|
|
showLongNote $ "Detected a filesystem where Sqlite does not work."
|
2023-04-10 21:03:41 +00:00
|
|
|
showLongNote $ UnquotedString $ "(" ++ show e ++ ")"
|
|
|
|
showLongNote $ "To work around this problem, you can set annex.dbdir " <>
|
2022-08-17 17:07:14 +00:00
|
|
|
"to a directory on another filesystem."
|
|
|
|
showLongNote $ "For example: git config annex.dbdir $HOME/cache/git-annex"
|
|
|
|
giveup "Not initialized."
|
|
|
|
|
2015-09-09 17:56:37 +00:00
|
|
|
checkSharedClone :: Annex Bool
|
|
|
|
checkSharedClone = inRepo Git.Objects.isSharedClone
|
|
|
|
|
|
|
|
initSharedClone :: Bool -> Annex ()
|
|
|
|
initSharedClone False = return ()
|
|
|
|
initSharedClone True = do
|
2015-10-11 17:29:44 +00:00
|
|
|
showLongNote "Repository was cloned with --shared; setting annex.hardlink=true and making repository untrusted."
|
2014-09-05 17:44:09 +00:00
|
|
|
u <- getUUID
|
|
|
|
trustSet u UnTrusted
|
|
|
|
setConfig (annexConfig "hardlink") (Git.Config.boolConfig True)
|
2017-02-27 20:08:16 +00:00
|
|
|
|
2023-03-14 02:39:16 +00:00
|
|
|
{- Propagate annex.securehashesonly from then global config to local
|
2017-02-27 20:08:16 +00:00
|
|
|
- config. This makes a clone inherit a parent's setting, but once
|
|
|
|
- a repository has a local setting, changes to the global config won't
|
|
|
|
- affect it. -}
|
|
|
|
propigateSecureHashesOnly :: Annex ()
|
|
|
|
propigateSecureHashesOnly =
|
2019-12-05 18:36:43 +00:00
|
|
|
maybe noop (setConfig "annex.securehashesonly" . fromConfigValue)
|
2017-02-27 20:08:16 +00:00
|
|
|
=<< getGlobalConfig "annex.securehashesonly"
|
2020-03-09 18:45:14 +00:00
|
|
|
|
|
|
|
fixupUnusualReposAfterInit :: Annex ()
|
|
|
|
fixupUnusualReposAfterInit = do
|
|
|
|
gc <- Annex.getGitConfig
|
|
|
|
void $ inRepo $ \r -> fixupUnusualRepos r gc
|
2020-05-27 15:54:39 +00:00
|
|
|
|
|
|
|
{- Try to enable any special remotes that are configured to do so.
|
|
|
|
-
|
|
|
|
- The enabling is done in a child process to avoid it using stdio.
|
2022-09-09 18:43:43 +00:00
|
|
|
-
|
|
|
|
- The remotelist should be Remote.List.remoteList, which cannot
|
|
|
|
- be imported here due to a dependency loop.
|
2020-05-27 15:54:39 +00:00
|
|
|
-}
|
2022-09-09 18:43:43 +00:00
|
|
|
autoEnableSpecialRemotes :: Annex [Remote] -> Annex ()
|
|
|
|
autoEnableSpecialRemotes remotelist = do
|
|
|
|
-- Get all existing git remotes to probe for their uuid here,
|
|
|
|
-- so it is not done inside the child process. Doing it in there
|
|
|
|
-- could result in password prompts for http credentials,
|
|
|
|
-- which would then not end up cached in this process's state.
|
|
|
|
_ <- remotelist
|
2020-05-27 15:54:39 +00:00
|
|
|
rp <- fromRawFilePath <$> fromRepo Git.repoPath
|
2020-12-15 14:44:36 +00:00
|
|
|
withNullHandle $ \nullh -> gitAnnexChildProcess "init"
|
propagate git-annex -c on to transferrer child process
git -c was already propagated via environment, but need this for
consistency.
Also, notice it does not use gitAnnexChildProcess to run the
transferrer. So nothing is done about avoid it taking the
pid lock. It's possible that the caller is already doing something that
took the pid lock, and if so, the transferrer will certianly fail,
since it needs to take the pid lock too. This may prevent combining
annex.stalldetection with annex.pidlock, but I have not verified it's
really a problem. If it was, it seems git-annex would have to take
the pid lock when starting a transferrer, and hold it until shutdown,
or would need to take pid lock when starting to use a transferrer,
and hold it until done with a transfer and then drop it. The latter
would require starting the transferrer with pid locking disabled for the
child process, so assumes that the transferrer does not do anyting that
needs locking when not running a transfer.
2020-12-15 15:36:25 +00:00
|
|
|
[ Param "--autoenable" ]
|
2020-08-25 18:57:25 +00:00
|
|
|
(\p -> p
|
2020-05-27 15:54:39 +00:00
|
|
|
{ std_out = UseHandle nullh
|
|
|
|
, std_err = UseHandle nullh
|
|
|
|
, std_in = UseHandle nullh
|
|
|
|
, cwd = Just rp
|
|
|
|
}
|
2020-08-25 18:57:25 +00:00
|
|
|
)
|
|
|
|
(\_ _ _ pid -> void $ waitForProcess pid)
|
2020-05-27 15:54:39 +00:00
|
|
|
remotesChanged
|