2019-08-26 16:32:45 +00:00
|
|
|
{- making local repositories
|
2014-06-16 22:59:23 +00:00
|
|
|
-
|
2015-01-21 16:50:09 +00:00
|
|
|
- Copyright 2012-2014 Joey Hess <id@joeyh.name>
|
2014-06-16 22:59:23 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2014-06-16 22:59:23 +00:00
|
|
|
-}
|
|
|
|
|
2019-12-05 19:10:23 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
2019-08-26 16:32:45 +00:00
|
|
|
module Assistant.MakeRepo where
|
2014-06-16 22:59:23 +00:00
|
|
|
|
|
|
|
import Assistant.WebApp.Common
|
|
|
|
import Annex.Init
|
|
|
|
import qualified Git.Construct
|
|
|
|
import qualified Git.Config
|
|
|
|
import qualified Git.Command
|
2014-07-04 15:36:59 +00:00
|
|
|
import qualified Git.Branch
|
2014-06-16 22:59:23 +00:00
|
|
|
import qualified Annex
|
|
|
|
import Annex.UUID
|
2019-08-26 16:53:56 +00:00
|
|
|
import Annex.AdjustedBranch
|
2017-09-30 02:36:08 +00:00
|
|
|
import Annex.Action
|
remove dead nodes when loading the cluster log
This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.
One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.
Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.
Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.
That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.
Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.
Other commands, like git-annex-shell currently don't run annexStartup
either.
So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
2024-06-16 18:35:07 +00:00
|
|
|
import Annex.Startup
|
2014-06-16 22:59:23 +00:00
|
|
|
import Types.StandardGroups
|
|
|
|
import Logs.PreferredContent
|
|
|
|
import qualified Annex.Branch
|
2017-12-31 20:08:31 +00:00
|
|
|
import Utility.Process.Transcript
|
2019-08-26 16:53:56 +00:00
|
|
|
import Config
|
2014-06-16 22:59:23 +00:00
|
|
|
|
|
|
|
{- Makes a new git repository. Or, if a git repository already
|
|
|
|
- exists, returns False. -}
|
|
|
|
makeRepo :: FilePath -> Bool -> IO Bool
|
|
|
|
makeRepo path bare = ifM (probeRepoExists path)
|
|
|
|
( return False
|
|
|
|
, do
|
|
|
|
(transcript, ok) <-
|
|
|
|
processTranscript "git" (toCommand params) Nothing
|
|
|
|
unless ok $
|
2023-04-10 17:38:14 +00:00
|
|
|
giveup $ "git init failed!\nOutput:\n" ++ transcript
|
2014-06-16 22:59:23 +00:00
|
|
|
return True
|
|
|
|
)
|
|
|
|
where
|
|
|
|
baseparams = [Param "init", Param "--quiet"]
|
|
|
|
params
|
|
|
|
| bare = baseparams ++ [Param "--bare", File path]
|
|
|
|
| otherwise = baseparams ++ [File path]
|
|
|
|
|
|
|
|
{- Runs an action in the git repository in the specified directory. -}
|
|
|
|
inDir :: FilePath -> Annex a -> IO a
|
|
|
|
inDir dir a = do
|
2020-11-03 22:34:27 +00:00
|
|
|
state <- Annex.new
|
|
|
|
=<< Git.Config.read
|
|
|
|
=<< Git.Construct.fromPath (toRawFilePath dir)
|
avoid flushing keys db queue after each Annex action
The flush was only done Annex.run' to make sure that the queue was flushed
before git-annex exits. But, doing it there means that as soon as one
change gets queued, it gets flushed soon after, which contributes to
excessive writes to the database, slowing git-annex down.
(This does not yet speed git-annex up, but it is a stepping stone to
doing so.)
Database queues do not autoflush when garbage collected, so have to
be flushed explicitly. I don't think it's possible to make them
autoflush (except perhaps if git-annex sqitched to using ResourceT..).
The comment in Database.Keys.closeDb used to be accurate, since the
automatic flushing did mean that all writes reached the database even
when closeDb was not called. But now, closeDb or flushDb needs to be
called before stopping using an Annex state. So, removed that comment.
In Remote.Git, change to using quiesce everywhere that it used to use
stopCoProcesses. This means that uses on onLocal in there are just as
slow as before. I considered only calling closeDb on the local git remotes
when git-annex exits. But, the reason that Remote.Git calls stopCoProcesses
in each onLocal is so as not to leave git processes running that have files
open on the remote repo, when it's on removable media. So, it seemed to make
sense to also closeDb after each one, since sqlite may also keep files
open. Although that has not seemed to cause problems with removable
media so far. It was also just easier to quiesce in each onLocal than
once at the end. This does likely leave performance on the floor, so
could be revisited.
In Annex.Content.saveState, there was no reason to close the db,
flushing it is enough.
The rest of the changes are from auditing for Annex.new, and making
sure that quiesce is called, after any action that might possibly need
it.
After that audit, I'm pretty sure that the change to Annex.run' is
safe. The only concern might be that this does let more changes get
queued for write to the db, and if git-annex is interrupted, those will be
lost. But interrupting git-annex can obviously already prevent it from
writing the most recent change to the db, so it must recover from such
lost data... right?
Sponsored-by: Dartmouth College's Datalad project
2022-10-12 17:50:46 +00:00
|
|
|
Annex.eval state $ a `finally` quiesce True
|
2014-06-16 22:59:23 +00:00
|
|
|
|
|
|
|
{- Creates a new repository, and returns its UUID. -}
|
|
|
|
initRepo :: Bool -> Bool -> FilePath -> Maybe String -> Maybe StandardGroup -> IO UUID
|
|
|
|
initRepo True primary_assistant_repo dir desc mgroup = inDir dir $ do
|
|
|
|
initRepo' desc mgroup
|
|
|
|
{- Initialize the master branch, so things that expect
|
|
|
|
- to have it will work, before any files are added. -}
|
2023-02-14 18:11:23 +00:00
|
|
|
unlessM (fromMaybe False . Git.Config.isBare <$> gitRepo) $ do
|
2019-11-11 22:20:35 +00:00
|
|
|
cmode <- annexCommitMode <$> Annex.getGitConfig
|
2019-11-11 20:15:05 +00:00
|
|
|
void $ inRepo $ Git.Branch.commitCommand cmode
|
2021-07-19 15:28:31 +00:00
|
|
|
(Git.Branch.CommitQuiet True)
|
|
|
|
[ Param "--allow-empty"
|
2014-06-16 22:59:23 +00:00
|
|
|
, Param "-m"
|
|
|
|
, Param "created repository"
|
|
|
|
]
|
2019-08-30 17:54:57 +00:00
|
|
|
{- Repositories directly managed by the assistant use
|
|
|
|
- an adjusted unlocked branch with annex.thin set.
|
2014-06-16 22:59:23 +00:00
|
|
|
-
|
2023-03-14 02:39:16 +00:00
|
|
|
- Automatic gc is disabled, as it can be slow. Instead, gc is done
|
2014-06-16 22:59:23 +00:00
|
|
|
- once a day.
|
|
|
|
-}
|
|
|
|
when primary_assistant_repo $ do
|
2019-08-26 16:53:56 +00:00
|
|
|
void $ enterAdjustedBranch (LinkAdjustment UnlockAdjustment)
|
|
|
|
setConfig (annexConfig "thin") (Git.Config.boolConfig True)
|
2014-06-16 22:59:23 +00:00
|
|
|
inRepo $ Git.Command.run
|
|
|
|
[Param "config", Param "gc.auto", Param "0"]
|
|
|
|
getUUID
|
|
|
|
{- Repo already exists, could be a non-git-annex repo though so
|
|
|
|
- still initialize it. -}
|
|
|
|
initRepo False _ dir desc mgroup = inDir dir $ do
|
|
|
|
initRepo' desc mgroup
|
|
|
|
getUUID
|
|
|
|
|
|
|
|
initRepo' :: Maybe String -> Maybe StandardGroup -> Annex ()
|
|
|
|
initRepo' desc mgroup = unlessM isInitialized $ do
|
remove dead nodes when loading the cluster log
This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.
One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.
Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.
Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.
That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.
Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.
Other commands, like git-annex-shell currently don't run annexStartup
either.
So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
2024-06-16 18:35:07 +00:00
|
|
|
initialize startupAnnex desc Nothing
|
2014-06-16 22:59:23 +00:00
|
|
|
u <- getUUID
|
|
|
|
maybe noop (defaultStandardGroup u) mgroup
|
|
|
|
{- Ensure branch gets committed right away so it is
|
|
|
|
- available for merging immediately. -}
|
2018-08-02 18:06:06 +00:00
|
|
|
Annex.Branch.commit =<< Annex.Branch.commitMessage
|
2014-06-16 22:59:23 +00:00
|
|
|
|
|
|
|
{- Checks if a git repo exists at a location. -}
|
|
|
|
probeRepoExists :: FilePath -> IO Bool
|
|
|
|
probeRepoExists dir = isJust <$>
|
2023-10-26 17:53:43 +00:00
|
|
|
catchDefaultIO Nothing (Git.Construct.checkForRepo (encodeBS dir))
|