git-annex/Annex.hs

{- git-annex monad
 -
 - Copyright 2010-2021 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

{-# LANGUAGE GeneralizedNewtypeDeriving, BangPatterns, PackageImports #-}

module Annex (
	Annex,
	AnnexState(..),
	AnnexRead(..),
	new,
	run,
	eval,
	makeRunner,
	getRead,
	getState,
	changeState,
	withState,
	setField,
	setOutput,
	getField,
	addCleanupAction,
	gitRepo,
	inRepo,
	fromRepo,
	calcRepo,
	calcRepo',
	getGitConfig,
	overrideGitConfig,
	changeGitRepo,
	adjustGitRepo,
	addGitConfigOverride,
	getGitConfigOverrides,
	getRemoteGitConfig,
	withCurrentState,
	changeDirectory,
	getGitRemotes,
	incError,
) where

import Common
import qualified Git
import qualified Git.Config
import qualified Git.Construct
import Annex.Fixup
import Git.HashObject
import Git.CheckAttr
import Git.CheckIgnore
import qualified Git.Hook
import qualified Git.Queue
import Types.Key
import Types.Backend
import Types.GitConfig
import qualified Types.Remote
import Types.Crypto
import Types.BranchState
import Types.TrustLevel
import Types.Group
import Types.Messages
import Types.Concurrency
import Types.UUID
import Types.FileMatcher
import Types.NumCopies
import Types.LockCache
import Types.DesktopNotify
import Types.CleanupActions
import Types.AdjustedBranch
import Types.WorkerPool
import Types.IndexFiles
import Types.CatFileHandles
import Types.RemoteConfig
import Types.TransferrerPool
import Types.VectorClock
import Annex.VectorClock.Utility
import Annex.Debug.Utility
import qualified Database.Keys.Handle as Keys
import Utility.InodeCache
import Utility.Url
import Utility.ResourcePool
import Utility.HumanTime
import Git.Credential (CredentialCache(..))

import "mtl" Control.Monad.Reader
import Control.Concurrent
import Control.Concurrent.STM
import qualified Control.Monad.Fail as Fail
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import Data.Time.Clock.POSIX

{- git-annex's monad is a ReaderT around an AnnexState stored in a MVar,
 - and an AnnexRead. The MVar is not exposed outside this module.
 -
 - Note that when an Annex action fails and the exception is caught,
 - any changes the action has made to the AnnexState are retained,
 - due to the use of the MVar to store the state.
 -}
newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState, AnnexRead) IO a }
	deriving (
		Monad,
		MonadIO,
		MonadReader (MVar AnnexState, AnnexRead),
		MonadCatch,
		MonadThrow,
		MonadMask,
		Fail.MonadFail,
		Functor,
		Applicative,
		Alternative
	)

-- Values that can be read, but not modified by an Annex action.
data AnnexRead = AnnexRead
	{ activekeys :: TVar (M.Map Key ThreadId)
	, activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
	, keysdbhandle :: Keys.DbHandle
	, sshstalecleaned :: TMVar Bool
	, signalactions :: TVar (M.Map SignalAction (Int -> IO ()))
	, transferrerpool :: TransferrerPool
	, debugenabled :: Bool
	, debugselector :: DebugSelector
	, explainenabled :: Bool
	, ciphers :: TMVar (M.Map StorableCipher Cipher)
	, fast :: Bool
	, force :: Bool
	, forcenumcopies :: Maybe NumCopies
	, forcemincopies :: Maybe MinCopies
	, forcebackend :: Maybe String
	, useragent :: Maybe String
	, desktopnotify :: DesktopNotify
	, gitcredentialcache :: TMVar CredentialCache
	}

newAnnexRead :: GitConfig -> IO AnnexRead
newAnnexRead c = do
	emptyactivekeys <- newTVarIO M.empty
	emptyactiveremotes <- newMVar M.empty
	kh <- Keys.newDbHandle
	sc <- newTMVarIO False
	si <- newTVarIO M.empty
	tp <- newTransferrerPool
	cm <- newTMVarIO M.empty
	cc <- newTMVarIO (CredentialCache M.empty)
	return $ AnnexRead
		{ activekeys = emptyactivekeys
		, activeremotes = emptyactiveremotes
		, keysdbhandle = kh
		, sshstalecleaned = sc
		, signalactions = si
		, transferrerpool = tp
		, debugenabled = annexDebug c
		, debugselector = debugSelectorFromGitConfig c
		, explainenabled = False
		, ciphers = cm
		, fast = False
		, force = False
		, forcebackend = Nothing
		, forcenumcopies = Nothing
		, forcemincopies = Nothing
		, useragent = Nothing
		, desktopnotify = mempty
		, gitcredentialcache = cc
		}

-- Values that can change while running an Annex action.
data AnnexState = AnnexState
	{ repo :: Git.Repo
	, repoadjustment :: (Git.Repo -> IO Git.Repo)
	, gitconfig :: GitConfig
	, gitconfigadjustment :: (GitConfig -> GitConfig)
	, gitconfigoverride :: [String]
	, gitremotes :: Maybe [Git.Repo]
	, gitconfiginodecache :: Maybe InodeCache
	, backend :: Maybe (BackendA Annex)
	, remotes :: [Types.Remote.RemoteA Annex]
	, output :: MessageState
	, concurrency :: ConcurrencySetting
	, daemon :: Bool
	, branchstate :: BranchState
	, repoqueue :: Maybe (Git.Queue.Queue Annex)
	, catfilehandles :: CatFileHandles
	, hashobjecthandle :: Maybe (ResourcePool HashObjectHandle)
	, checkattrhandle :: Maybe (ResourcePool CheckAttrHandle)
	, checkignorehandle :: Maybe (ResourcePool CheckIgnoreHandle)
	, globalnumcopies :: Maybe (Maybe NumCopies)
	, globalmincopies :: Maybe (Maybe MinCopies)
	, limit :: ExpandableMatcher Annex
	, timelimit :: Maybe (Duration, POSIXTime)
	, sizelimit :: Maybe (TVar Integer)
	, uuiddescmap :: Maybe UUIDDescMap
	, preferredcontentmap :: Maybe (FileMatcherMap Annex)
	, requiredcontentmap :: Maybe (FileMatcherMap Annex)
	, remoteconfigmap :: Maybe (M.Map UUID RemoteConfig)
	, forcetrust :: TrustMap
	, trustmap :: Maybe TrustMap
	, groupmap :: Maybe GroupMap
	, lockcache :: LockCache
	, fields :: M.Map String String
	, cleanupactions :: M.Map CleanupAction (Annex ())
	, sentinalstatus :: Maybe SentinalStatus
	, errcounter :: Integer
	, reachedlimit :: Bool
	, adjustedbranchrefreshcounter :: Integer
	, unusedkeys :: Maybe (S.Set Key)
	, tempurls :: M.Map Key URLString
	, existinghooks :: M.Map Git.Hook.Hook Bool
	, workers :: Maybe (TMVar (WorkerPool (AnnexState, AnnexRead)))
	, cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment))
	, cachedgitenv :: Maybe (AltIndexFile, FilePath, [(String, String)])
	, urloptions :: Maybe UrlOptions
	, insmudgecleanfilter :: Bool
	, getvectorclock :: IO CandidateVectorClock
	}

newAnnexState :: GitConfig -> Git.Repo -> IO AnnexState
newAnnexState c r = do
	o <- newMessageState
	vc <- startVectorClock
	return $ AnnexState
		{ repo = r
		, repoadjustment = return
		, gitconfig = c
		, gitconfigadjustment = id
		, gitconfigoverride = []
		, gitremotes = Nothing
		, gitconfiginodecache = Nothing
		, backend = Nothing
		, remotes = []
		, output = o
		, concurrency = ConcurrencyCmdLine NonConcurrent
		, daemon = False
		, branchstate = startBranchState
		, repoqueue = Nothing
		, catfilehandles = catFileHandlesNonConcurrent
		, hashobjecthandle = Nothing
		, checkattrhandle = Nothing
		, checkignorehandle = Nothing
		, globalnumcopies = Nothing
		, globalmincopies = Nothing
		, limit = BuildingMatcher []
		, timelimit = Nothing
		, sizelimit = Nothing
		, uuiddescmap = Nothing
		, preferredcontentmap = Nothing
		, requiredcontentmap = Nothing
		, remoteconfigmap = Nothing
		, forcetrust = M.empty
		, trustmap = Nothing
		, groupmap = Nothing
		, lockcache = M.empty
		, fields = M.empty
		, cleanupactions = M.empty
		, sentinalstatus = Nothing
		, errcounter = 0
		, reachedlimit = False
		, adjustedbranchrefreshcounter = 0
		, unusedkeys = Nothing
		, tempurls = M.empty
		, existinghooks = M.empty
		, workers = Nothing
		, cachedcurrentbranch = Nothing
		, cachedgitenv = Nothing
		, urloptions = Nothing
		, insmudgecleanfilter = False
		, getvectorclock = vc
		}

{- Makes an Annex state object for the specified git repo.
 - Ensures the config is read, if it was not already, and performs
 - any necessary git repo fixups. -}
new :: Git.Repo -> IO (AnnexState, AnnexRead)
new r = do
	r' <- Git.Config.read r
	let c = extractGitConfig FromGitConfig r'
	st <- newAnnexState c =<< fixupRepo r' c
	rd <- newAnnexRead c
	return (st, rd)

{- Performs an action in the Annex monad from a starting state,
 - returning a new state. -}
run :: (AnnexState, AnnexRead) -> Annex a -> IO (a, (AnnexState, AnnexRead))
run (st, rd) a = do
	mv <- newMVar st
	run' mv rd a 

run' :: MVar AnnexState -> AnnexRead -> Annex a -> IO (a, (AnnexState, AnnexRead))
run' mvar rd a = do
	r <- runReaderT (runAnnex a) (mvar, rd)
	st <- takeMVar mvar
	return (r, (st, rd))

{- Performs an action in the Annex monad from a starting state, 
 - and throws away the changed state. -}
eval :: (AnnexState, AnnexRead) -> Annex a -> IO a
eval v a = fst <$> run v a

{- Makes a runner action, that allows diving into IO and from inside
 - the IO action, running an Annex action. -}
makeRunner :: Annex (Annex a -> IO a)
makeRunner = do
	(mvar, rd) <- ask
	return $ \a -> do
		(r, (s, _rd)) <- run' mvar rd a
		putMVar mvar s
		return r

getRead :: (AnnexRead -> v) -> Annex v
getRead selector = selector . snd <$> ask

getState :: (AnnexState -> v) -> Annex v
getState selector = do
	mvar <- fst <$> ask
	st <- liftIO $ readMVar mvar
	return $ selector st

changeState :: (AnnexState -> AnnexState) -> Annex ()
changeState modifier = do
	mvar <- fst <$> ask
	liftIO $ modifyMVar_ mvar $ return . modifier

withState :: (AnnexState -> IO (AnnexState, b)) -> Annex b
withState modifier = do
	mvar <- fst <$> ask
	liftIO $ modifyMVar mvar modifier

{- Sets a field to a value -}
setField :: String -> String -> Annex ()
setField field value = changeState $ \st ->
	st { fields = M.insert field value $ fields st }

{- Adds a cleanup action to perform. -}
addCleanupAction :: CleanupAction -> Annex () -> Annex ()
addCleanupAction k a = changeState $ \st ->
	st { cleanupactions = M.insert k a $ cleanupactions st }

{- Sets the type of output to emit. -}
setOutput :: OutputType -> Annex ()
setOutput o = changeState $ \st ->
	let m = output st
	in st { output = m { outputType = adjustOutputType (outputType m) o } }

{- Gets the value of a field. -}
getField :: String -> Annex (Maybe String)
getField field = M.lookup field <$> getState fields

{- Returns the annex's git repository. -}
gitRepo :: Annex Git.Repo
gitRepo = getState repo

{- Runs an IO action in the annex's git repository. -}
inRepo :: (Git.Repo -> IO a) -> Annex a
inRepo a = liftIO . a =<< gitRepo

{- Extracts a value from the annex's git repisitory. -}
fromRepo :: (Git.Repo -> a) -> Annex a
fromRepo a = a <$> gitRepo

{- Calculates a value from an annex's git repository and its GitConfig. -}
calcRepo :: (Git.Repo -> GitConfig -> IO a) -> Annex a
calcRepo a = do
	s <- getState id
	liftIO $ a (repo s) (gitconfig s)

calcRepo' :: (Git.Repo -> GitConfig -> a) -> Annex a
calcRepo' f = do
	s <- getState id
	pure $ f (repo s) (gitconfig s)

{- Gets the GitConfig settings. -}
getGitConfig :: Annex GitConfig
getGitConfig = getState gitconfig

{- Overrides a GitConfig setting. The modification persists across
 - reloads of the repo's config. -}
overrideGitConfig :: (GitConfig -> GitConfig) -> Annex ()
overrideGitConfig f = changeState $ \st -> st
	{ gitconfigadjustment = gitconfigadjustment st . f
	, gitconfig = f (gitconfig st)
	}

{- Adds an adjustment to the Repo data. Adjustments persist across reloads
 - of the repo's config.
 -
 - Note that the action may run more than once, and should avoid eg,
 - appending the same value to a repo's config when run repeatedly.
 -}
adjustGitRepo :: (Git.Repo -> IO Git.Repo) -> Annex ()
adjustGitRepo a = do
	changeState $ \st -> st { repoadjustment = \r -> repoadjustment st r >>= a }
	changeGitRepo =<< gitRepo

{- Adds git config setting, like "foo=bar". It will be passed with -c
 - to git processes. The config setting is also recorded in the Repo,
 - and the GitConfig is updated. -}
addGitConfigOverride :: String -> Annex ()
addGitConfigOverride v = do
	adjustGitRepo $ \r ->
		Git.Config.store (encodeBS v) Git.Config.ConfigList $
			r { Git.gitGlobalOpts = go (Git.gitGlobalOpts r) }
	changeState $ \st -> st { gitconfigoverride = v : gitconfigoverride st }
  where
	-- Remove any prior occurrence of the setting to avoid
	-- building up many of them when the adjustment is run repeatedly,
	-- and add the setting to the end.
	go [] = [Param "-c", Param v]
	go (Param "-c": Param v':rest) | v' == v = go rest
	go (c:rest) = c : go rest

{- Values that were passed to addGitConfigOverride. -}
getGitConfigOverrides :: Annex [String]
getGitConfigOverrides = reverse <$> getState gitconfigoverride

{- Changing the git Repo data also involves re-extracting its GitConfig. -}
changeGitRepo :: Git.Repo -> Annex ()
changeGitRepo r = do
	repoadjuster <- getState repoadjustment
	gitconfigadjuster <- getState gitconfigadjustment
	r' <- liftIO $ repoadjuster r
	changeState $ \st -> st
		{ repo = r'
		, gitconfig = gitconfigadjuster $
			extractGitConfig FromGitConfig r'
		}

{- Gets the RemoteGitConfig from a remote, given the Git.Repo for that
 - remote. -}
getRemoteGitConfig :: Git.Repo -> Annex RemoteGitConfig
getRemoteGitConfig r = do
	g <- gitRepo
	liftIO $ atomically $ extractRemoteGitConfig g (Git.repoDescribe r)

{- Converts an Annex action into an IO action, that runs with a copy
 - of the current Annex state. 
 -
 - Use with caution; the action should not rely on changing the
 - state, as it will be thrown away. -}
withCurrentState :: Annex a -> Annex (IO a)
withCurrentState a = do
	(mvar, rd) <- ask
	st <- liftIO $ readMVar mvar
	return $ eval (st, rd) a

{- It's not safe to use setCurrentDirectory in the Annex monad,
 - because the git repo paths are stored relative.
 - Instead, use this.
 -}
changeDirectory :: FilePath -> Annex ()
changeDirectory d = do
	r <- liftIO . Git.adjustPath absPath =<< gitRepo
	liftIO $ setCurrentDirectory d
	r' <- liftIO $ Git.relPath r
	changeState $ \st -> st { repo = r' }

incError :: Annex ()
incError = changeState $ \st -> 
	let !c = errcounter st + 1 
	    !st' = st { errcounter = c }
	in st'

getGitRemotes :: Annex [Git.Repo]
getGitRemotes = do
	st <- getState id
	case gitremotes st of
		Just rs -> return rs
		Nothing -> do
			rs <- liftIO $ Git.Construct.fromRemotes (repo st)
			changeState $ \st' -> st' { gitremotes = Just rs }
			return rs
-												copyright statements

											
										
										
											2010-10-27 20:53:54 +00:00
+								{- git-annex monad
 								 -
-												fix --time-limit

It got broken in several ways by the streaming seeking optimisations
around version 8.20201007.

Moved time limit checking out of the matcher, which was a hack in the
first place. So everywhere that uses Limit.getMatcher needs to check
time limit. Well, almost everywhere. Command.Info uses it, but it does
not make sense to time limit getting info. And Command.MultiCast uses it
just to build up a list of files that then get passed to a command, so
it would never have hit the timeout in a useful way.

This implementation is a little more expensive when at time limit than
necessary, since it continues seeking only to discard everything after the
time limit. I did try making it close the file handles to force a faster
shutdown, but that didn't work and hung. Could certianly be improved
somehow, but seeking is probably not the expensive bit when a time limit
is hit, so this seems acceptable for now.

											
										
										
											2021-01-04 19:25:28 +00:00
+								 - Copyright 2010-2021 Joey Hess <id@joeyh.name>
-												copyright statements

											
										
										
											2010-10-27 20:53:54 +00:00
+								 -
-												update licenses from GPL to AGPL

This does not change the overall license of the git-annex program, which
was already AGPL due to a number of sources files being AGPL already.

Legally speaking, I'm adding a new license under which these files are
now available; I already released their current contents under the GPL
license. Now they're dual licensed GPL and AGPL. However, I intend
for all my future changes to these files to only be released under the
AGPL license, and I won't be tracking the dual licensing status, so I'm
simply changing the license statement to say it's AGPL.

(In some cases, others wrote parts of the code of a file and released it
under the GPL; but in all cases I have contributed a significant portion
of the code in each file and it's that code that is getting the AGPL
license; the GPL license of other contributors allows combining with
AGPL code.)

											
										
										
											2019-03-13 19:48:14 +00:00
+								 - Licensed under the GNU AGPL version 3 or higher.
-												copyright statements

											
										
										
											2010-10-27 20:53:54 +00:00
+								 -}
-												got annexing working

											
										
										
											2010-10-10 19:04:07 +00:00
-												turn of PackageImports in cabal file

This makes it easier to build eg benchmarks of individual modules.

May be that most of these PackageImports are not really necessary,
dunno.

											
										
										
											2022-02-25 17:16:36 +00:00
+								{-# LANGUAGE GeneralizedNewtypeDeriving, BangPatterns, PackageImports #-}
-												make Annex an opaque data type

Was a type alias; using newtype has the benefit that type errors will
show "Annex foo" rather than two lines of internal type nonsense. Yay!
There should be no other effects to size or runtime.

I've tried to do this at least twice before (each time I read RWH chapter 10);
finally understood how to this time.. sorta.

											
										
										
											2011-08-19 18:28:07 +00:00
-												explicit exports

											
										
										
											2010-10-11 21:52:46 +00:00
+								module Annex (
-												successfully split Annex and AnnexState out of TypeInternals

											
										
										
											2011-01-26 01:49:04 +00:00
+									Annex,
 									AnnexState(..),
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									AnnexRead(..),
-												more reorg, spiffed up state monad

											
										
										
											2010-10-14 07:18:11 +00:00
+									new,
 									run,
-												simplify evals

											
										
										
											2010-11-01 03:24:16 +00:00
+									eval,
-												fix lockKey to run callback in original Annex monad, not local remote's

											
										
										
											2015-10-09 17:35:28 +00:00
+									makeRunner,
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									getRead,
-												rework config storage

Moved away from a map of flags to storing config directly in the AnnexState
structure. Got rid of most accessor functions in Annex.

This allowed supporting multiple --exclude flags.

											
										
										
											2011-01-26 04:17:38 +00:00
+									getState,
 									changeState,
-												When accessing a local remote, shut down git-cat-file processes afterwards, to ensure that remotes on removable media can be unmounted. Closes: #758630

This does mean that eg, copying multiple files to a local remote will
become slightly slower, since it now restarts git-cat-file after each copy.
Should not be significant slowdown.

The reason git-cat-file is run on the remote at all is to update its
location log. In order to add an item to it, it needs to get the current
content of the log. Finding a way to avoid needing to do that would be a
good path to avoiding this slowdown if it does become a problem somehow.

This commit was sponsored by Evan Deaubl.

											
										
										
											2014-08-20 16:01:45 +00:00
+									withState,
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
+									setField,
-												fix test suite build

											
										
										
											2012-04-30 17:59:05 +00:00
+									setOutput,
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
+									getField,
-												propagate signals to the transferrer process group

Done on unix, could not implement it on windows quite.

The signal library gets part of the way needed for windows.
But I had to open https://github.com/pmlodawski/signal/issues/1 because
it lacks raiseSignal.

Also, I don't know what the equivilant of getProcessGroupIDOf is on
windows. And System.Process does not provide a way to send any signal to
a process group except for SIGINT.

This commit was sponsored by Boyd Stephen Smith Jr. on Patreon.

											
										
										
											2020-12-11 19:28:58 +00:00
+									addCleanupAction,
-												reorder repo parameters last

Many functions took the repo as their first parameter. Changing it
consistently to be the last parameter allows doing some useful things with
currying, that reduce boilerplate.

In particular, g <- gitRepo is almost never needed now, instead
use inRepo to run an IO action in the repo, and fromRepo to get
a value from the repo.

This also provides more opportunities to use monadic and applicative
combinators.

											
										
										
											2011-11-08 19:34:10 +00:00
+									gitRepo,
 									inRepo,
 									fromRepo,
-												Use lower case hash directories for storing files on crippled filesystems, same as is already done for bare repositories.

* since this is a crippled filesystem anyway, git-annex doesn't use
  symlinks on it
* so there's no reason to use the mixed case hash directories that we're
  stuck using to avoid breaking everyone's symlinks to the content
* so we can do what is already done for all bare repos, and make non-bare
  repos on crippled filesystems use the all-lower case hash directories
* which are, happily, all 3 letters long, so they cannot conflict with
  mixed case hash directories
* so I was able to 100% fix this and even resuming `git annex add` in the
  test case will recover and it will all just work.

											
										
										
											2013-04-04 19:46:33 +00:00
+									calcRepo,
-												add annex.dbdir (WIP)

WIP: This is mostly complete, but there is a problem: createDirectoryUnder
throws an error when annex.dbdir is set to outside the git repo.

annex.dbdir is a workaround for filesystems where sqlite does not work,
due to eg, the filesystem not properly supporting locking.

It's intended to be set before initializing the repository. Changing it
in an existing repository can be done, but would be the same as making a
new repository and moving all the annexed objects into it. While the
databases get recreated from the git-annex branch in that situation, any
information that is in the databases but not stored in the branch gets
lost. It may be that no information ever gets stored in the databases
that cannot be reconstructed from the branch, but I have not verified
that.

Sponsored-by: Dartmouth College's Datalad project

											
										
										
											2022-08-11 20:57:44 +00:00
+									calcRepo',
-												type based git config handling for remotes

Still a couple of places that use git config ad-hoc, but this is most of it
done.

											
										
										
											2013-01-01 17:52:47 +00:00
+									getGitConfig,
-												rename changeGitConfig to overrideGitConfig and avoid unncessary calls

It's important that it be clear that it overrides a config, such that
reloading the git config won't change it, and in particular, setConfig
won't change it.

Most of the calls to changeGitConfig were actually after setConfig,
which was redundant and unncessary. So removed those.

The only remaining one, besides --debug, is in the handling of
repository-global config values. That one's ok, because the
way mergeGitConfig is implemented, it does not override any value that
is set in git config. If a value with a repo-global setting was passed
to setConfig, it would set it in the git config, reload the git config,
re-apply mergeGitConfig, and use the newly set value, which is the right
thing.

											
										
										
											2020-02-27 05:06:35 +00:00
+									overrideGitConfig,
-												type based git config handling

Now there's a Config type, that's extracted from the git config at startup.
Note that laziness means that individual config values are only looked up
and parsed on demand, and so we get implicit memoization for all of them.
So this is not only prettier and more type safe, it optimises several
places that didn't have explicit memoization before. As well as getting rid
of the ugly explicit memoization code.

Not yet done for annex.<remote>.* configuration settings.

											
										
										
											2012-12-30 03:10:18 +00:00
+									changeGitRepo,
-												Bug fix: Git config settings passed to git-annex -c did not always take effect.

When Config.setConfig runs, it throws away the old Repo and loads a new
one. So, add an action to adjust the Repo so that -c settings will persist
across that.

											
										
										
											2016-01-22 17:47:41 +00:00
+									adjustGitRepo,
-												refix bug in a better way

Always run Git.Config.store, so when the git config gets reloaded,
the override gets re-added to it, and changeGitRepo then calls extractGitConfig
on it and sees the annex.* settings from the override.

Remove any prior occurance of -c v and add it to the end. This way,
-c foo=1 -c foo=2 -c foo=1 will pass -c foo=1 to git, rather than -c foo=2

Note that, if git had some multiline config that got built up by
multiple -c's, this would not work still. But it never worked because
before the bug got fixed in the first place, the -c value was repeated
many times, so the multivalue thing would have been wrong. I don't think
-c can be used with multiline configs anyway, though git-config does
talk about them?

											
										
										
											2020-07-02 17:32:33 +00:00
+									addGitConfigOverride,
-												pass along -c options to child git-annex processes

											
										
										
											2020-12-15 14:44:36 +00:00
+									getGitConfigOverrides,
-												factor out getRemoteGitConfig

											
										
										
											2014-05-16 20:08:20 +00:00
+									getRemoteGitConfig,
-												Bugfix: Fix bug in inode cache sentinal check, which broke copying to local repos if the repo being copied from had moved to a different filesystem or otherwise changed all its inodes'

											
										
										
											2013-03-12 20:41:54 +00:00
+									withCurrentState,
-												handle sync's use of setCurrentDirectory to work with relative paths

I think this is the last problimatic setCurrentDirectory. I also audited
for extrnal commands that git-annex might run with cwd = foo, and did not
find any that were passed any FilePath that might be absolute.

											
										
										
											2015-01-07 02:23:04 +00:00
+									changeDirectory,
-												Improve startup time for commands that do not operate on remotes

And for tab completion, by not unnessessarily statting paths to remotes,
which used to cause eg, spin-up of removable drives.

Got rid of the remotes member of Git.Repo. This was a bit painful.

Remote.Git modifies the list of remotes as it reads their configs,
so still need a persistent list of remotes. So, put it in as
Annex.gitremotes. It's only populated by getGitRemotes, so commands
like examinekey that don't care about remotes won't do so.

This commit was sponsored by Jake Vosloo on Patreon.

											
										
										
											2018-01-09 19:36:56 +00:00
+									getGitRemotes,
-												refactor

											
										
										
											2015-04-30 19:04:01 +00:00
+									incError,
-												explicit exports

											
										
										
											2010-10-11 21:52:46 +00:00
+								) where
-												got annexing working

											
										
										
											2010-10-10 19:04:07 +00:00
-												factor out common imports

no code changes

											
										
										
											2011-10-04 02:24:57 +00:00
+								import Common
-												renamed GitRepo to Git

It was always imported qualified as Git anyway

											
										
										
											2011-06-30 17:16:57 +00:00
+								import qualified Git
-												split out three modules from Git

Constructors and configuration make sense in separate modules.
A separate Git.Types is needed to avoid cycles.

											
										
										
											2011-12-13 19:05:07 +00:00
+								import qualified Git.Config
-												Improve startup time for commands that do not operate on remotes

And for tab completion, by not unnessessarily statting paths to remotes,
which used to cause eg, spin-up of removable drives.

Got rid of the remotes member of Git.Repo. This was a bit painful.

Remote.Git modifies the list of remotes as it reads their configs,
so still need a persistent list of remotes. So, put it in as
Annex.gitremotes. It's only populated by getGitRemotes, so commands
like examinekey that don't care about remotes won't do so.

This commit was sponsored by Jake Vosloo on Patreon.

											
										
										
											2018-01-09 19:36:56 +00:00
+								import qualified Git.Construct
-												Submodules are now supported by git-annex!

Seems to work, but still experimental until it's been tested more.

When repositories are on filesystems not supporting symlinks, the .git dir
symlink trick cannot be used. Since we're going to be in direct mode
anyway, the .git dir symlink is not strictly needed.

However, I have not fixed the code that creates new annex symlinks to
handle this case -- the committed symlinks will be wrong.

git annex sync happens to currently fail in a submodule using direct mode,
because there's no HEAD ref. That also needs to be dealt with to get
this fully working in crippled filesystems.

Leaving http://github.com/datalad/datalad/issues/44 open until these issues
are dealt with.

											
										
										
											2015-03-02 20:43:44 +00:00
+								import Annex.Fixup
-												Sped up git-annex add in direct mode and v6 by using git hash-object --batch.

Speeds up hashSymlink and hashPointerFile.

											
										
										
											2016-03-14 19:58:46 +00:00
+								import Git.HashObject
-												rework git check-attr interface

Now gitattributes are looked up, efficiently, in only the places that
really need them, using the same approach used for cat-file.

The old CheckAttr code seemed very fragile, in the way it streamed files
through git check-attr.
I actually found that cad8824852aa0623dc41eac02a9e2bae47d88ec4
was still deadlocking with ghc 7.4, at the end of adding a lot of files.
This should fix that problem, and avoid future ones.

The best part is that this removes withAttrFilesInGit and withNumCopies,
which were complicated Seek methods, as well as simplfying the types
for several other Seek methods that had a Backend tupled in.

											
										
										
											2012-02-14 03:42:44 +00:00
+								import Git.CheckAttr
-												gitignore support for the assistant and watcher

Requires git 1.8.4 or newer. When it's installed, a background
git check-ignore process is run, and used to efficiently check ignores
whenever a new file is added.

Thanks to Adam Spiers, for getting the necessary support into git for this.

A complication is what to do about files that are gitignored but have
been checked into git anyway. git commands assume the ignore has been
overridden in this case, and not need any more overriding to commit a
changed version.

However, for the assistant to do the same, it would have to run git ls-files
to check if the ignored file is in git. This is somewhat expensive. Or it
could use the running git-cat-file process to query the file that way,
but that requires transferring the whole file content over a pipe, so it
can be quite expensive too, for files that are not git-annex
symlinks.

Now imagine if the user knows that a file or directory tree will be getting
frequent changes, and doesn't want the assistant to sync it, so gitignores
it. The assistant could overload the system with repeated ls-files checks!

So, I've decided that the assistant will not automatically commit changes
to files that are gitignored. This is a tradeoff. Hopefully it won't be a
problem to adjust .gitignore settings to not ignore files you want the
assistant to autocommit, or to manually git annex add files that are listed
in .gitignore.

(This could be revisited if git-annex gets access to an interface to check
the content of the index w/o forking a git command. This could be libgit2,
or perhaps a separate git cat-file --batch-check process, so it wouldn't
need to ship over the whole file content.)

This commit was sponsored by Francois Marier. Thanks!

											
										
										
											2013-08-02 23:31:55 +00:00
+								import Git.CheckIgnore
-												pre-commit-annex hook script to automatically extract metadata from lots of types of files

Using the extract(1) program to do the heavy lifting.

Decided to make git-annex run pre-commit-annex when committing. Since
git-annex pre-commit also runs it, it'll be run when git commit is run too,
via the pre-commit hook. This basically gives back the pre-commit hook
that git-annex took away. The implementation avoids repeatedly looking
for the hook script when the assistant is running and committing
repeatedly; only checks if the hook is available once.

To make the script simpler, made git-annex metadata -s field?=value
only set a field when it's not already got a value.

This commit was sponsored by bak.

											
										
										
											2014-03-02 22:01:07 +00:00
+								import qualified Git.Hook
-												use Common in a few more modules

											
										
										
											2011-12-20 18:37:53 +00:00
+								import qualified Git.Queue
-												add "unused" preferred content expression

With a really nice optimisation that keeps it from having any overhead
in normal operation!

This commit was sponsored by Ulises Vitulli.

											
										
										
											2014-01-22 20:35:32 +00:00
+								import Types.Key
-												rename modules for data types into Types/ directory

											
										
										
											2011-06-02 01:56:04 +00:00
+								import Types.Backend
-												type based git config handling for remotes

Still a couple of places that use git config ad-hoc, but this is most of it
done.

											
										
										
											2013-01-01 17:52:47 +00:00
+								import Types.GitConfig
-												rework annex-ignore handling

Only one place need to filter the list of remotes for ignored remotes:
keyPossibilities. Make the full list available to everything else.

This allows getting rid of the special case handing for --from and --to
to make ignored remotes not be ignored with those options.

											
										
										
											2011-09-19 00:11:39 +00:00
+								import qualified Types.Remote
-												rename modules for data types into Types/ directory

											
										
										
											2011-06-02 01:56:04 +00:00
+								import Types.Crypto
-												Branch handling improvements

Support creating the branch.

Unified branch state into a single data type.

Only commit changes when the index has been changed.

											
										
										
											2011-06-22 19:58:30 +00:00
+								import Types.BranchState
-												cache the trustmap

Doubles the speed of fsck, and speeds up drop as well.

											
										
										
											2011-06-24 01:25:39 +00:00
+								import Types.TrustLevel
-												group, ungroup: New commands to indicate groups of repositories.

											
										
										
											2012-10-01 19:12:04 +00:00
+								import Types.Group
-												display "Recording state in git..." when staging the journal

A bit tricky to avoid printing it twice in a row when there are queued git
commands to run and journal to stage.

Added a generic way to run an action that may output multiple side
messages, with only the first displayed.

											
										
										
											2012-04-27 17:23:52 +00:00
+								import Types.Messages
-												disentangle concurrency and message type

This makes -Jn work with --json and --quiet, where before
setting -Jn disabled those options.

Concurrent json output is currently a mess though since threads output
chunks over top of one-another.

											
										
										
											2016-09-09 16:57:42 +00:00
+								import Types.Concurrency
-												added preferred-content log, and allow editing it with vicfg

This includes a full parser for the boolean expressions in the log,
that compiles them into Matchers. Those matchers are not used yet.

A complication is that matching against an expression should never
crash git-annex with an error. Instead, vicfg checks that the expressions
parse. If a bad expression (or an expression understood by some future
git-annex version) gets into the log, it'll be ignored.

Most of the code in Limit couldn't fail anyway, but I did have to make
limitCopies check its parameter first, and return an error if it's bad,
rather than erroring at runtime.

											
										
										
											2012-10-04 19:48:59 +00:00
+								import Types.UUID
-												refactor

											
										
										
											2013-05-25 03:07:26 +00:00
+								import Types.FileMatcher
-												reorganize numcopies code (no behavior changes)

Move stuff into Logs.NumCopies. Add a NumCopies newtype.

Better names for various serialization classes that are specific to one
thing or another.

											
										
										
											2014-01-21 20:08:19 +00:00
+								import Types.NumCopies
-												use lock pools throughout git-annex

The one exception is in Utility.Daemon. As long as a process only
daemonizes once, which seems reasonable, and as long as it avoids calling
checkDaemon once it's already running as a daemon, the fcntl locking
gotchas won't be a problem there.

Annex.LockFile has it's own separate lock pool layer, which has been
renamed to LockCache. This is a persistent cache of locks that persist
until closed.

This is not quite done; lockContent stil needs to be converted.

											
										
										
											2015-05-18 20:23:07 +00:00
+								import Types.LockCache
-												add desktop notifications

Motivation: Hook scripts for nautilus or other file managers
need to provide the user with feedback that a file is being downloaded.

This commit was sponsored by THM Schoemaker.

											
										
										
											2014-03-22 14:42:38 +00:00
+								import Types.DesktopNotify
-												clean up cleanup action enumeration

											
										
										
											2014-03-13 23:06:26 +00:00
+								import Types.CleanupActions
-												refactor getCurrentBranch

Both Command.Sync and Annex.Ingest had their own versions of this.

The one in Annex.Ingest used Git.Branch.currentUnsafe, but does not seem
to need it. That is only checking to see if it's in an adjusted unlocked
branch, and when in an adjusted branch, the branch does in fact exist,
so the added check that Git.Branch.current does is fine.

This commit was sponsored by Denis Dzyubenko on Patreon.

											
										
										
											2018-10-19 19:17:48 +00:00
+								import Types.AdjustedBranch
-												improved WorkerPool abstraction

No behavior changes.

											
										
										
											2019-06-05 17:03:05 +00:00
+								import Types.WorkerPool
-												cache annex index filename for 1.5% speedup to queries

											
										
										
											2020-04-10 17:37:04 +00:00
+								import Types.IndexFiles
-												cat-file resource pool

Avoid running a large number of git cat-file child processes when run with
a large -J value.

This implementation takes care to avoid adding any overhead to git-annex
when run without -J. When run with -J, there is a small bit of added
overhead, to manipulate the resource pool. That optimisation added a
fair bit of complexity.

											
										
										
											2020-04-20 17:53:27 +00:00
+								import Types.CatFileHandles
-												cache remote.log

Unlikely to speed up any of the existing uses much, but I want to use it
in a message that might be displayed many times.

											
										
										
											2020-09-22 17:52:26 +00:00
+								import Types.RemoteConfig
-												move TransferrerPool from Assistant state to Annex state

This commit was sponsored by Graham Spencer on Patreon.

											
										
										
											2020-12-07 17:08:59 +00:00
+								import Types.TransferrerPool
-												generate more compact git-annex branch for imports

Especially from borg, where the content identifier logs
all end up being the same identical file!

But also, for other imports, the location tracking logs can,
in some cases, be identical files.

Bonus optimisation: Avoid looking up (and parsing when set)
GIT_ANNEX_VECTOR_CLOCK env var every time a log is written to.
Although the lookup does happen at startup even when no
log will be written now.

											
										
										
											2020-12-23 19:21:33 +00:00
+								import Types.VectorClock
 								import Annex.VectorClock.Utility
-												implement fastDebug

Most of the changes here involve global option parsing: GlobalSetter
changed so it can both run an Annex action to set state, but can also
change the AnnexRead value, which is immutable once the Annex monad is
running.

That allowed a debugselector value to be added to AnnexRead, seeded
from the git config. The --debugfilter option's GlobalSetter then updates
the AnnexRead.

This improved GlobalSetter can later be used to move more stuff to
AnnexRead. Things that don't involve a git config will be easier to
move, and probably a *lot* of things can be moved eventually.

fastDebug, while implemented, is not used anywhere yet. But it should be
fast..

											
										
										
											2021-04-06 19:14:00 +00:00
+								import Annex.Debug.Utility
-												optimise read and write for Keys database (untested)

Writes are optimised by queueing up multiple writes when possible.
The queue is flushed after the Annex monad action finishes. That makes it
happen on program termination, and also whenever a nested Annex monad action
finishes.

Reads are optimised by checking once (per AnnexState) if the database
exists. If the database doesn't exist yet, all reads return mempty.

Reads also cause queued writes to be flushed, so reads will always be
consistent with writes (as long as they're made inside the same Annex monad).
A future optimisation path would be to determine when that's not necessary,
which is probably most of the time, and avoid flushing unncessarily.

Design notes for this commit:

- separate reads from writes
- reuse a handle which is left open until program
  exit or until the MVar goes out of scope (and autoclosed then)
- writes are queued
  - queue is flushed periodically
  - immediate queue flush before any read
  - auto-flush queue when database handle is garbage collected
  - flush queue on exit from Annex monad
    (Note that this may happen repeatedly for a single database connection;
    or a connection may be reused for multiple Annex monad actions,
    possibly even concurrent ones.)
- if database does not exist (or is empty) the handle
  is not opened by reads; reads instead return empty results
- writes open the handle if it was not open previously

											
										
										
											2015-12-23 22:34:51 +00:00
+								import qualified Database.Keys.Handle as Keys
-												fix for Windows file timestamp timezone madness

On Windows, changing the time zone causes the apparent mtime of files to
change. This confuses git-annex, which natually thinks this means the files
have actually been modified (since THAT'S WHAT A MTIME IS FOR, BILL <sheesh>).

Work around this stupidity, by using the inode sentinal file to detect if
the timezone has changed, and calculate a TSDelta, which will be applied
when generating InodeCaches.

This should add no overhead at all on unix. Indeed, I sped up a few
things slightly in the refactoring.

Seems to basically work! But it has a big known problem:
If the timezone changes while the assistant (or a long-running command)
runs, it won't notice, since it only checks the inode cache once, and
so will use the old delta for all new inode caches it generates for new
files it's added. Which will result in them seeming changed the next time
it runs.

This commit was sponsored by Vincent Demeester.

											
										
										
											2014-06-11 21:51:12 +00:00
+								import Utility.InodeCache
-												Urls can now be claimed by remotes. This will allow creating, for example, a external special remote that handles magnet: and *.torrent urls.

											
										
										
											2014-12-08 23:14:24 +00:00
+								import Utility.Url
-												check-attr resource pool

Limited to min of -JN or number of CPU cores, because it will often be
CPU bound, once it's read the gitignore file for a directory.

In some situations it's more disk bound, but in any case it's unlikely
to be the main bottleneck that -J is used to avoid. Eg, when dropping,
this is used for numcopies checks, but the main bottleneck will be
accessing the remotes to verify presence. So the user might decide to
-J32 that, but having 32 check-attr processes would just waste however
many filehandles they open, and probably worsen their performance due to
CPU contention.

Note that, I first tried just letting up to the -JN be started. However,
even when it's no bottleneck at all, that still results in all of them
being started. Why? Well, all the worker threads start up nearly
simulantaneously, so there's a thundering herd..

											
										
										
											2020-04-21 14:38:44 +00:00
+								import Utility.ResourcePool
-												fix --time-limit

It got broken in several ways by the streaming seeking optimisations
around version 8.20201007.

Moved time limit checking out of the matcher, which was a hack in the
first place. So everywhere that uses Limit.getMatcher needs to check
time limit. Well, almost everywhere. Command.Info uses it, but it does
not make sense to time limit getting info. And Command.MultiCast uses it
just to build up a list of files that then get passed to a command, so
it would never have hit the timeout in a useful way.

This implementation is a little more expensive when at time limit than
necessary, since it continues seeking only to discard everything after the
time limit. I did try making it close the file handles to force a faster
shutdown, but that didn't work and hung. Could certianly be improved
somehow, but seeking is probably not the expensive bit when a time limit
is hit, so this seems acceptable for now.

											
										
										
											2021-01-04 19:25:28 +00:00
+								import Utility.HumanTime
-												cache credentials in memory when doing http basic auth to a git remote

When accessing a git remote over http needs a git credential prompt for a
password, cache it for the lifetime of the git-annex process, rather than
repeatedly prompting.

The git-lfs special remote already caches the credential when discovering
the endpoint. And presumably commands like git pull do as well, since they
may download multiple urls from a remote.

The TMVar CredentialCache is read, so two concurrent calls to
getBasicAuthFromCredential will both prompt for a credential.
There would already be two concurrent password prompts in such a case,
and existing uses of `prompt` probably avoid it. Anyway, it's no worse
than before.

											
										
										
											2022-09-09 17:53:38 +00:00
+								import Git.Credential (CredentialCache(..))
-												fix for Windows file timestamp timezone madness

On Windows, changing the time zone causes the apparent mtime of files to
change. This confuses git-annex, which natually thinks this means the files
have actually been modified (since THAT'S WHAT A MTIME IS FOR, BILL <sheesh>).

Work around this stupidity, by using the inode sentinal file to detect if
the timezone has changed, and calculate a TSDelta, which will be applied
when generating InodeCaches.

This should add no overhead at all on unix. Indeed, I sped up a few
things slightly in the refactoring.

Seems to basically work! But it has a big known problem:
If the timezone changes while the assistant (or a long-running command)
runs, it won't notice, since it only checks the inode cache once, and
so will use the old delta for all new inode caches it generates for new
files it's added. Which will result in them seeming changed the next time
it runs.

This commit was sponsored by Vincent Demeester.

											
										
										
											2014-06-11 21:51:12 +00:00
 								import "mtl" Control.Monad.Reader
 								import Control.Concurrent
-												fix sshCleanup race using STM

											
										
										
											2017-05-11 22:29:51 +00:00
+								import Control.Concurrent.STM
-												Support being built with ghc 8.0.1 (MonadFail)

Tested on an older ghc by enabling MonadFailDesugaring globally.

In TransferQueue, the lack of a MonadFail for STM exposed what would
normally be a bug in the pattern matching, although in this case an
earlier check that the queue was not empty avoided a pattern match
failure.

											
										
										
											2019-01-05 15:54:06 +00:00
+								import qualified Control.Monad.Fail as Fail
-												avoid insertWith' depreaction warning

Switch to Data.Map.Strict everywhere that used it.

There are still lots of lazy maps in git-annex. I think switching these
is safe. The risk is that there might be a map that is used in a way
that relies on the values not being evaluated to WHNF, and switching to
strict might result in bad performance or memory use. So, I have not
switched everything.

											
										
										
											2018-04-22 17:28:31 +00:00
+								import qualified Data.Map.Strict as M
-												fix for Windows file timestamp timezone madness

On Windows, changing the time zone causes the apparent mtime of files to
change. This confuses git-annex, which natually thinks this means the files
have actually been modified (since THAT'S WHAT A MTIME IS FOR, BILL <sheesh>).

Work around this stupidity, by using the inode sentinal file to detect if
the timezone has changed, and calculate a TSDelta, which will be applied
when generating InodeCaches.

This should add no overhead at all on unix. Indeed, I sped up a few
things slightly in the refactoring.

Seems to basically work! But it has a big known problem:
If the timezone changes while the assistant (or a long-running command)
runs, it won't notice, since it only checks the inode cache once, and
so will use the old delta for all new inode caches it generates for new
files it's added. Which will result in them seeming changed the next time
it runs.

This commit was sponsored by Vincent Demeester.

											
										
										
											2014-06-11 21:51:12 +00:00
+								import qualified Data.Set as S
-												fix --time-limit

It got broken in several ways by the streaming seeking optimisations
around version 8.20201007.

Moved time limit checking out of the matcher, which was a hack in the
first place. So everywhere that uses Limit.getMatcher needs to check
time limit. Well, almost everywhere. Command.Info uses it, but it does
not make sense to time limit getting info. And Command.MultiCast uses it
just to build up a list of files that then get passed to a command, so
it would never have hit the timeout in a useful way.

This implementation is a little more expensive when at time limit than
necessary, since it continues seeking only to discard everything after the
time limit. I did try making it close the file handles to force a faster
shutdown, but that didn't work and hung. Could certianly be improved
somehow, but seeking is probably not the expensive bit when a time limit
is hit, so this seems acceptable for now.

											
										
										
											2021-01-04 19:25:28 +00:00
+								import Data.Time.Clock.POSIX
-												successfully split Annex and AnnexState out of TypeInternals

											
										
										
											2011-01-26 01:49:04 +00:00
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								{- git-annex's monad is a ReaderT around an AnnexState stored in a MVar,
 								 - and an AnnexRead. The MVar is not exposed outside this module.
-												unify exception handling into Utility.Exception

Removed old extensible-exceptions, only needed for very old ghc.

Made webdav use Utility.Exception, to work after some changes in DAV's
exception handling.

Removed Annex.Exception. Mostly this was trivial, but note that
tryAnnex is replaced with tryNonAsync and catchAnnex replaced with
catchNonAsync. In theory that could be a behavior change, since the former
caught all exceptions, and the latter don't catch async exceptions.

However, in practice, nothing in the Annex monad uses async exceptions.
Grepping for throwTo and killThread only find stuff in the assistant,
which does not seem related.

Command.Add.undo is changed to accept a SomeException, and things
that use it for rollback now catch non-async exceptions, rather than
only IOExceptions.

											
										
										
											2014-08-08 01:55:44 +00:00
+								 -
 								 - Note that when an Annex action fails and the exception is caught,
-												get -J: Download different files from different remotes when the remotes have the same costs.

Only done in -J mode because only if there's concurrency can downloading
from two remotes be faster. Without concurrency, it's likely the case that
sequential downloads from the same remote are faster than switching back
and forth between two remotes.

There is some hairy MVar code here, but basically it just keeps
the activeremotes MVar full except when deciding which remote to assign
to a thread.

Also affects gets by sync --content -J

This commit was sponsored by Jochen Bartl.

											
										
										
											2016-09-06 16:42:50 +00:00
+								 - any changes the action has made to the AnnexState are retained,
-												unify exception handling into Utility.Exception

Removed old extensible-exceptions, only needed for very old ghc.

Made webdav use Utility.Exception, to work after some changes in DAV's
exception handling.

Removed Annex.Exception. Mostly this was trivial, but note that
tryAnnex is replaced with tryNonAsync and catchAnnex replaced with
catchNonAsync. In theory that could be a behavior change, since the former
caught all exceptions, and the latter don't catch async exceptions.

However, in practice, nothing in the Annex monad uses async exceptions.
Grepping for throwTo and killThread only find stuff in the assistant,
which does not seem related.

Command.Add.undo is changed to accept a SomeException, and things
that use it for rollback now catch non-async exceptions, rather than
only IOExceptions.

											
										
										
											2014-08-08 01:55:44 +00:00
+								 - due to the use of the MVar to store the state.
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+								 -}
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState, AnnexRead) IO a }
-												make Annex an opaque data type

Was a type alias; using newtype has the benefit that type errors will
show "Annex foo" rather than two lines of internal type nonsense. Yay!
There should be no other effects to size or runtime.

I've tried to do this at least twice before (each time I read RWH chapter 10);
finally understood how to this time.. sorta.

											
										
										
											2011-08-19 18:28:07 +00:00
+									deriving (
 										Monad,
 										MonadIO,
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+										MonadReader (MVar AnnexState, AnnexRead),
-												Use exceptions in place of deprecated MonadCatchIO-transformers

											
										
										
											2014-05-28 21:01:57 +00:00
+										MonadCatch,
 										MonadThrow,
 										MonadMask,
-												Support being built with ghc 8.0.1 (MonadFail)

Tested on an older ghc by enabling MonadFailDesugaring globally.

In TransferQueue, the lack of a MonadFail for STM exposed what would
normally be a bug in the pattern matching, although in this case an
earlier check that the queue was not empty avoided a pattern match
failure.

											
										
										
											2019-01-05 15:54:06 +00:00
+										Fail.MonadFail,
-												code simplification thanks to applicative functors

											
										
										
											2011-08-25 04:28:55 +00:00
+										Functor,
-												import command is updating tracking branch

											
										
										
											2019-02-26 17:11:25 +00:00
+										Applicative,
 										Alternative
-												make Annex an opaque data type

Was a type alias; using newtype has the benefit that type errors will
show "Annex foo" rather than two lines of internal type nonsense. Yay!
There should be no other effects to size or runtime.

I've tried to do this at least twice before (each time I read RWH chapter 10);
finally understood how to this time.. sorta.

											
										
										
											2011-08-19 18:28:07 +00:00
+									)
-												successfully split Annex and AnnexState out of TypeInternals

											
										
										
											2011-01-26 01:49:04 +00:00
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								-- Values that can be read, but not modified by an Annex action.
 								data AnnexRead = AnnexRead
 									{ activekeys :: TVar (M.Map Key ThreadId)
 									, activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
 									, keysdbhandle :: Keys.DbHandle
 									, sshstalecleaned :: TMVar Bool
 									, signalactions :: TVar (M.Map SignalAction (Int -> IO ()))
 									, transferrerpool :: TransferrerPool
-												fix fastDebug to check if debugging is actually enabled

Had to add to AnnexRead an indication of whether debugging is enabled.

Could have just made setupConsole not install a debug output action that
outputs, and have enableDebug be what installs that, but then in the
common case where there is no debug selector, and so all debug output is
selected, it would run the debug output action every time, which entails
an IORef access. Which would make fastDebug too slow..

											
										
										
											2021-04-06 20:28:37 +00:00
+									, debugenabled :: Bool
-												implement fastDebug

Most of the changes here involve global option parsing: GlobalSetter
changed so it can both run an Annex action to set state, but can also
change the AnnexRead value, which is immutable once the Annex monad is
running.

That allowed a debugselector value to be added to AnnexRead, seeded
from the git config. The --debugfilter option's GlobalSetter then updates
the AnnexRead.

This improved GlobalSetter can later be used to move more stuff to
AnnexRead. Things that don't involve a git config will be easier to
move, and probably a *lot* of things can be moved eventually.

fastDebug, while implemented, is not used anywhere yet. But it should be
fast..

											
										
										
											2021-04-06 19:14:00 +00:00
+									, debugselector :: DebugSelector
-												initial implementation of --explain

Currently it only displays explanations of options like --in and --copies.

In the future, it should explain preferred content expression evaluation
and other decisions.

The explanations of a few things could be better. In particular,
"standard" will just appear as-is (or as "!standard" if it doesn't
match), rather than explaining why the standard preferred content expression
for the group matches or not.

Currently as implemented, it goes to stdout, and so commands like
git-annex find that have custom output will not display --explain
information. Perhaps that should change, dunno.

Sponsored-by: Dartmouth College's DANDI project

											
										
										
											2023-07-25 20:11:06 +00:00
+									, explainenabled :: Bool
-												Avoid more than 1 gpg password prompt at the same time

Which could happen occasionally before when concurrency is enabled.
While not much of a problem when it did happen, better to avoid it. Also,
since it seems likely the gpg-agent sometimes fails in such a situation,
this makes it not happen when running a single git-annex command with
concurrency enabled.

This commit was sponsored by Jake Vosloo on Patreon.

											
										
										
											2021-04-27 20:36:33 +00:00
+									, ciphers :: TMVar (M.Map StorableCipher Cipher)
-												move several readonly values to AnnexRead

This improves performance to a small extent in several places.

Sponsored-by: Tobias Ammann on Patreon

											
										
										
											2022-06-28 19:28:14 +00:00
+									, fast :: Bool
 									, force :: Bool
 									, forcenumcopies :: Maybe NumCopies
 									, forcemincopies :: Maybe MinCopies
 									, forcebackend :: Maybe String
-												final readonly values moves to AnnexRead

At this point I've checked all AnnexState values and these were all that
remained that could move.

Pity that Annex.repo can't move, but it gets modified sometimes..

A couple of AnnexState values are set by options and could be AnnexRead,
but happen to use Annex when being set.

Sponsored-by: Max Thoursie on Patreon

											
										
										
											2022-06-28 20:02:01 +00:00
+									, useragent :: Maybe String
 									, desktopnotify :: DesktopNotify
-												cache credentials in memory when doing http basic auth to a git remote

When accessing a git remote over http needs a git credential prompt for a
password, cache it for the lifetime of the git-annex process, rather than
repeatedly prompting.

The git-lfs special remote already caches the credential when discovering
the endpoint. And presumably commands like git pull do as well, since they
may download multiple urls from a remote.

The TMVar CredentialCache is read, so two concurrent calls to
getBasicAuthFromCredential will both prompt for a credential.
There would already be two concurrent password prompts in such a case,
and existing uses of `prompt` probably avoid it. Anyway, it's no worse
than before.

											
										
										
											2022-09-09 17:53:38 +00:00
+									, gitcredentialcache :: TMVar CredentialCache
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									}
-												implement fastDebug

Most of the changes here involve global option parsing: GlobalSetter
changed so it can both run an Annex action to set state, but can also
change the AnnexRead value, which is immutable once the Annex monad is
running.

That allowed a debugselector value to be added to AnnexRead, seeded
from the git config. The --debugfilter option's GlobalSetter then updates
the AnnexRead.

This improved GlobalSetter can later be used to move more stuff to
AnnexRead. Things that don't involve a git config will be easier to
move, and probably a *lot* of things can be moved eventually.

fastDebug, while implemented, is not used anywhere yet. But it should be
fast..

											
										
										
											2021-04-06 19:14:00 +00:00
+								newAnnexRead :: GitConfig -> IO AnnexRead
 								newAnnexRead c = do
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									emptyactivekeys <- newTVarIO M.empty
 									emptyactiveremotes <- newMVar M.empty
 									kh <- Keys.newDbHandle
 									sc <- newTMVarIO False
 									si <- newTVarIO M.empty
 									tp <- newTransferrerPool
-												Avoid more than 1 gpg password prompt at the same time

Which could happen occasionally before when concurrency is enabled.
While not much of a problem when it did happen, better to avoid it. Also,
since it seems likely the gpg-agent sometimes fails in such a situation,
this makes it not happen when running a single git-annex command with
concurrency enabled.

This commit was sponsored by Jake Vosloo on Patreon.

											
										
										
											2021-04-27 20:36:33 +00:00
+									cm <- newTMVarIO M.empty
-												cache credentials in memory when doing http basic auth to a git remote

When accessing a git remote over http needs a git credential prompt for a
password, cache it for the lifetime of the git-annex process, rather than
repeatedly prompting.

The git-lfs special remote already caches the credential when discovering
the endpoint. And presumably commands like git pull do as well, since they
may download multiple urls from a remote.

The TMVar CredentialCache is read, so two concurrent calls to
getBasicAuthFromCredential will both prompt for a credential.
There would already be two concurrent password prompts in such a case,
and existing uses of `prompt` probably avoid it. Anyway, it's no worse
than before.

											
										
										
											2022-09-09 17:53:38 +00:00
+									cc <- newTMVarIO (CredentialCache M.empty)
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									return $ AnnexRead
 										{ activekeys = emptyactivekeys
 										, activeremotes = emptyactiveremotes
 										, keysdbhandle = kh
 										, sshstalecleaned = sc
 										, signalactions = si
 										, transferrerpool = tp
-												fix fastDebug to check if debugging is actually enabled

Had to add to AnnexRead an indication of whether debugging is enabled.

Could have just made setupConsole not install a debug output action that
outputs, and have enableDebug be what installs that, but then in the
common case where there is no debug selector, and so all debug output is
selected, it would run the debug output action every time, which entails
an IORef access. Which would make fastDebug too slow..

											
										
										
											2021-04-06 20:28:37 +00:00
+										, debugenabled = annexDebug c
-												implement fastDebug

Most of the changes here involve global option parsing: GlobalSetter
changed so it can both run an Annex action to set state, but can also
change the AnnexRead value, which is immutable once the Annex monad is
running.

That allowed a debugselector value to be added to AnnexRead, seeded
from the git config. The --debugfilter option's GlobalSetter then updates
the AnnexRead.

This improved GlobalSetter can later be used to move more stuff to
AnnexRead. Things that don't involve a git config will be easier to
move, and probably a *lot* of things can be moved eventually.

fastDebug, while implemented, is not used anywhere yet. But it should be
fast..

											
										
										
											2021-04-06 19:14:00 +00:00
+										, debugselector = debugSelectorFromGitConfig c
-												initial implementation of --explain

Currently it only displays explanations of options like --in and --copies.

In the future, it should explain preferred content expression evaluation
and other decisions.

The explanations of a few things could be better. In particular,
"standard" will just appear as-is (or as "!standard" if it doesn't
match), rather than explaining why the standard preferred content expression
for the group matches or not.

Currently as implemented, it goes to stdout, and so commands like
git-annex find that have custom output will not display --explain
information. Perhaps that should change, dunno.

Sponsored-by: Dartmouth College's DANDI project

											
										
										
											2023-07-25 20:11:06 +00:00
+										, explainenabled = False
-												Avoid more than 1 gpg password prompt at the same time

Which could happen occasionally before when concurrency is enabled.
While not much of a problem when it did happen, better to avoid it. Also,
since it seems likely the gpg-agent sometimes fails in such a situation,
this makes it not happen when running a single git-annex command with
concurrency enabled.

This commit was sponsored by Jake Vosloo on Patreon.

											
										
										
											2021-04-27 20:36:33 +00:00
+										, ciphers = cm
-												move several readonly values to AnnexRead

This improves performance to a small extent in several places.

Sponsored-by: Tobias Ammann on Patreon

											
										
										
											2022-06-28 19:28:14 +00:00
+										, fast = False
 										, force = False
 										, forcebackend = Nothing
 										, forcenumcopies = Nothing
 										, forcemincopies = Nothing
-												final readonly values moves to AnnexRead

At this point I've checked all AnnexState values and these were all that
remained that could move.

Pity that Annex.repo can't move, but it gets modified sometimes..

A couple of AnnexState values are set by options and could be AnnexRead,
but happen to use Annex when being set.

Sponsored-by: Max Thoursie on Patreon

											
										
										
											2022-06-28 20:02:01 +00:00
+										, useragent = Nothing
 										, desktopnotify = mempty
-												cache credentials in memory when doing http basic auth to a git remote

When accessing a git remote over http needs a git credential prompt for a
password, cache it for the lifetime of the git-annex process, rather than
repeatedly prompting.

The git-lfs special remote already caches the credential when discovering
the endpoint. And presumably commands like git pull do as well, since they
may download multiple urls from a remote.

The TMVar CredentialCache is read, so two concurrent calls to
getBasicAuthFromCredential will both prompt for a credential.
There would already be two concurrent password prompts in such a case,
and existing uses of `prompt` probably avoid it. Anyway, it's no worse
than before.

											
										
										
											2022-09-09 17:53:38 +00:00
+										, gitcredentialcache = cc
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+										}
 								-- Values that can change while running an Annex action.
-												rework config storage

Moved away from a map of flags to storing config directly in the AnnexState
structure. Got rid of most accessor functions in Annex.

This allowed supporting multiple --exclude flags.

											
										
										
											2011-01-26 04:17:38 +00:00
+								data AnnexState = AnnexState
 									{ repo :: Git.Repo
-												Bug fix: Git config settings passed to git-annex -c did not always take effect.

When Config.setConfig runs, it throws away the old Repo and loads a new
one. So, add an action to adjust the Repo so that -c settings will persist
across that.

											
										
										
											2016-01-22 17:47:41 +00:00
+									, repoadjustment :: (Git.Repo -> IO Git.Repo)
-												type based git config handling for remotes

Still a couple of places that use git config ad-hoc, but this is most of it
done.

											
										
										
											2013-01-01 17:52:47 +00:00
+									, gitconfig :: GitConfig
-												Bugfix: Don't ignore --debug when it is followed by -c

											
										
										
											2020-02-27 04:52:37 +00:00
+									, gitconfigadjustment :: (GitConfig -> GitConfig)
-												pass along -c options to child git-annex processes

											
										
										
											2020-12-15 14:44:36 +00:00
+									, gitconfigoverride :: [String]
-												Improve startup time for commands that do not operate on remotes

And for tab completion, by not unnessessarily statting paths to remotes,
which used to cause eg, spin-up of removable drives.

Got rid of the remotes member of Git.Repo. This was a bit painful.

Remote.Git modifies the list of remotes as it reads their configs,
so still need a persistent list of remotes. So, put it in as
Annex.gitremotes. It's only populated by getGitRemotes, so commands
like examinekey that don't care about remotes won't do so.

This commit was sponsored by Jake Vosloo on Patreon.

											
										
										
											2018-01-09 19:36:56 +00:00
+									, gitremotes :: Maybe [Git.Repo]
-												detect v10 upgrade while running

Capstone of the v10 upgrade process.

Tested with a git-annex drop in a v8 repo that had a local v8 remote.
Upgrading the repo to v10 (with --force) immedaitely caused it to notice
and switch over to v10 locking. Upgrading the remote also caused it to
switch over when operating on the remote.

The InodeCache makes this fairly efficient, just an added stat call per
lock of an object file. After the v10 upgrade, there is no more
overhead.

Sponsored-by: Dartmouth College's Datalad project

											
										
										
											2022-01-21 16:56:07 +00:00
+									, gitconfiginodecache :: Maybe InodeCache
-												annex.backend is the new name for what was annex.backends

It takes a single key-value backend, rather than the unncessary and confusing list.
The old option still works if set.

Simplified some old old code too.

This commit was sponsored by Thomas Hochstein on Patreon.

											
										
										
											2017-05-09 19:04:07 +00:00
+									, backend :: Maybe (BackendA Annex)
-												type alias cleanup

											
										
										
											2011-12-31 08:11:39 +00:00
+									, remotes :: [Types.Remote.RemoteA Annex]
-												display "Recording state in git..." when staging the journal

A bit tricky to avoid printing it twice in a row when there are queued git
commands to run and journal to stage.

Added a generic way to run an action that may output multiple side
messages, with only the first displayed.

											
										
										
											2012-04-27 17:23:52 +00:00
+									, output :: MessageState
-												differentiate between concurrency enabled at command line and by git config

The latter should not affect --batch mode.

											
										
										
											2020-09-16 15:41:28 +00:00
+									, concurrency :: ConcurrencySetting
-												assistant: Work around git-cat-file's not reloading the index after files are staged.

Argh.

											
										
										
											2013-05-25 04:37:41 +00:00
+									, daemon :: Bool
-												Branch handling improvements

Support creating the branch.

Unified branch state into a single data type.

Only commit changes when the index has been changed.

											
										
										
											2011-06-22 19:58:30 +00:00
+									, branchstate :: BranchState
-												remove one more warningIO

Had to generalize Git.Queue so it can run an Annex action, yipes.

Only remaining warningIO are in the legacy chunk code.

											
										
										
											2019-11-12 14:44:51 +00:00
+									, repoqueue :: Maybe (Git.Queue.Queue Annex)
-												cat-file resource pool

Avoid running a large number of git cat-file child processes when run with
a large -J value.

This implementation takes care to avoid adding any overhead to git-annex
when run without -J. When run with -J, there is a small bit of added
overhead, to manipulate the resource pool. That optimisation added a
fair bit of complexity.

											
										
										
											2020-04-20 17:53:27 +00:00
+									, catfilehandles :: CatFileHandles
-												use ResourcePool for hash-object handles

Avoid starting an unncessary number of git hash-object processes when
concurrency is enabled.

Sponsored-by: Dartmouth College's DANDI project

											
										
										
											2022-07-25 21:32:39 +00:00
+									, hashobjecthandle :: Maybe (ResourcePool HashObjectHandle)
-												check-attr resource pool

Limited to min of -JN or number of CPU cores, because it will often be
CPU bound, once it's read the gitignore file for a directory.

In some situations it's more disk bound, but in any case it's unlikely
to be the main bottleneck that -J is used to avoid. Eg, when dropping,
this is used for numcopies checks, but the main bottleneck will be
accessing the remotes to verify presence. So the user might decide to
-J32 that, but having 32 check-attr processes would just waste however
many filehandles they open, and probably worsen their performance due to
CPU contention.

Note that, I first tried just letting up to the -JN be started. However,
even when it's no bottleneck at all, that still results in all of them
being started. Why? Well, all the worker threads start up nearly
simulantaneously, so there's a thundering herd..

											
										
										
											2020-04-21 14:38:44 +00:00
+									, checkattrhandle :: Maybe (ResourcePool CheckAttrHandle)
-												check-ignore resource pool

Much like check-attr before.

											
										
										
											2020-04-21 15:20:10 +00:00
+									, checkignorehandle :: Maybe (ResourcePool CheckIgnoreHandle)
-												cache negative lookups of global numcopies and mincopies

Speeds up eg git-annex sync --content by up to 50%. When it does not need
to transfer or drop anything, it now noops a lot more quickly.

I didn't see anything else in sync --content noop loop that could really
be sped up. It has to cat git objects to keys, stat object files, etc.

Sponsored-by: unqueued on Patreon

											
										
										
											2023-06-06 18:15:47 +00:00
+									, globalnumcopies :: Maybe (Maybe NumCopies)
 									, globalmincopies :: Maybe (Maybe MinCopies)
-												reorg matcher types; no non-type code changes

											
										
										
											2014-03-29 18:43:34 +00:00
+									, limit :: ExpandableMatcher Annex
-												fix --time-limit

It got broken in several ways by the streaming seeking optimisations
around version 8.20201007.

Moved time limit checking out of the matcher, which was a hack in the
first place. So everywhere that uses Limit.getMatcher needs to check
time limit. Well, almost everywhere. Command.Info uses it, but it does
not make sense to time limit getting info. And Command.MultiCast uses it
just to build up a list of files that then get passed to a command, so
it would never have hit the timeout in a useful way.

This implementation is a little more expensive when at time limit than
necessary, since it continues seeking only to discard everything after the
time limit. I did try making it close the file handles to force a faster
shutdown, but that didn't work and hung. Could certianly be improved
somehow, but seeking is probably not the expensive bit when a time limit
is hit, so this seems acceptable for now.

											
										
										
											2021-01-04 19:25:28 +00:00
+									, timelimit :: Maybe (Duration, POSIXTime)
-												add --size-limit option

When this option is not used, there should be effectively no added
overhead, thanks to the optimisation in
b3cd0cc6ba4e5b9e2ae0abd9c8b2ec32475e09d2.

When an action fails on a file, the size of the file still counts toward
the size limit. This was necessary to support concurrency, but also
generally seems like the right choice.

Most commands that operate on annexed files support the option.
export and import do not, and I don't know if it would make sense for
export to.. Why would you want an incomplete export? sync doesn't, and
while it would be easy to make it support it for transferring files,
it's not clear if dropping files should also take the size limit into
account. Commands like add that don't operate on annexed files don't
support the option either.

Exiting 101 not yet implemented.

Sponsored-by: Denis Dzyubenko on Patreon

											
										
										
											2021-06-04 20:08:42 +00:00
+									, sizelimit :: Maybe (TVar Integer)
-												add a UUIDDesc type containing a ByteString

Groundwork for handling uuid.log using ByteString

											
										
										
											2019-01-01 19:39:45 +00:00
+									, uuiddescmap :: Maybe UUIDDescMap
-												reorg matcher types; no non-type code changes

											
										
										
											2014-03-29 18:43:34 +00:00
+									, preferredcontentmap :: Maybe (FileMatcherMap Annex)
 									, requiredcontentmap :: Maybe (FileMatcherMap Annex)
-												cache remote.log

Unlikely to speed up any of the existing uses much, but I want to use it
in a message that might be displayed many times.

											
										
										
											2020-09-22 17:52:26 +00:00
+									, remoteconfigmap :: Maybe (M.Map UUID RemoteConfig)
-												Add annex-trustlevel configuration settings, which can be used to  override the trust level of a remote.

This overrides the trust.log, and is overridden by the command-line trust
parameters.

It would have been nicer to have Logs.Trust.trustMap just look up the
configuration for all remotes, but a dependency loop prevented that
(Remotes depends on Logs.Trust in several ways). So instead, look up
the configuration when building remotes, storing it in the same forcetrust
field used for the command-line trust parameters.

											
										
										
											2012-01-10 03:31:44 +00:00
+									, forcetrust :: TrustMap
-												cache the trustmap

Doubles the speed of fsck, and speeds up drop as well.

											
										
										
											2011-06-24 01:25:39 +00:00
+									, trustmap :: Maybe TrustMap
-												group, ungroup: New commands to indicate groups of repositories.

											
										
										
											2012-10-01 19:12:04 +00:00
+									, groupmap :: Maybe GroupMap
-												use lock pools throughout git-annex

The one exception is in Utility.Daemon. As long as a process only
daemonizes once, which seems reasonable, and as long as it avoids calling
checkDaemon once it's already running as a daemon, the fcntl locking
gotchas won't be a problem there.

Annex.LockFile has it's own separate lock pool layer, which has been
renamed to LockCache. This is a persistent cache of locks that persist
until closed.

This is not quite done; lockContent stil needs to be converted.

											
										
										
											2015-05-18 20:23:07 +00:00
+									, lockcache :: LockCache
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
+									, fields :: M.Map String String
-												propagate signals to the transferrer process group

Done on unix, could not implement it on windows quite.

The signal library gets part of the way needed for windows.
But I had to open https://github.com/pmlodawski/signal/issues/1 because
it lacks raiseSignal.

Also, I don't know what the equivilant of getProcessGroupIDOf is on
windows. And System.Process does not provide a way to send any signal to
a process group except for SIGINT.

This commit was sponsored by Boyd Stephen Smith Jr. on Patreon.

											
										
										
											2020-12-11 19:28:58 +00:00
+									, cleanupactions :: M.Map CleanupAction (Annex ())
-												fix for Windows file timestamp timezone madness

On Windows, changing the time zone causes the apparent mtime of files to
change. This confuses git-annex, which natually thinks this means the files
have actually been modified (since THAT'S WHAT A MTIME IS FOR, BILL <sheesh>).

Work around this stupidity, by using the inode sentinal file to detect if
the timezone has changed, and calculate a TSDelta, which will be applied
when generating InodeCaches.

This should add no overhead at all on unix. Indeed, I sped up a few
things slightly in the refactoring.

Seems to basically work! But it has a big known problem:
If the timezone changes while the assistant (or a long-running command)
runs, it won't notice, since it only checks the inode cache once, and
so will use the old delta for all new inode caches it generates for new
files it's added. Which will result in them seeming changed the next time
it runs.

This commit was sponsored by Vincent Demeester.

											
										
										
											2014-06-11 21:51:12 +00:00
+									, sentinalstatus :: Maybe SentinalStatus
-												fix inversion of control in CommandSeek (no behavior changes)

I've been disliking how the command seek actions were written for some
time, with their inversion of control and ugly workarounds.

The last straw to fix it was sync --content, which didn't fit the
Annex [CommandStart] interface well at all. I have not yet made it take
advantage of the changed interface though.

The crucial change, and probably why I didn't do it this way from the
beginning, is to make each CommandStart action be run with exceptions
caught, and if it fails, increment a failure counter in annex state.
So I finally remove the very first code I wrote for git-annex, which
was before I had exception handling in the Annex monad, and so ran outside
that monad, passing state explicitly as it ran each CommandStart action.

This was a real slog from 1 to 5 am.

Test suite passes.

Memory usage is lower than before, sometimes by a couple of megabytes, and
remains constant, even when running in a large repo, and even when
repeatedly failing and incrementing the error counter. So no accidental
laziness space leaks.

Wall clock speed is identical, even in large repos.

This commit was sponsored by an anonymous bitcoiner.

											
										
										
											2014-01-20 08:11:42 +00:00
+									, errcounter :: Integer
-												Improved handling of --time-limit when combined with -J

When concurrency is enabled, there can be worker threads still running
when the time limit is checked. Exiting right there does not
give those threads time to finish what they're doing. Instead, the seeking
is wrapped up, and git-annex then shuts down cleanly.

The whole point of --time-limit existing, rather than using timeout(1)
when running git-annex is to let git-annex finish the action(s) it is
working on when the time limit is reached, and shut down cleanly.

I noticed this problem when investigating why restagePointerFile might
not have run after get/drop of an unlocked file. With --time-limit -J,
a worker thread may have finished updating a work tree file, and be killed
by the time limit check before it can run restagePointerFile. So despite
--time-limit running the shutdown actions, the work tree file didn't get
restaged.

Sponsored-by: Dartmouth College's DANDI project

											
										
										
											2022-09-22 16:47:40 +00:00
+									, reachedlimit :: Bool
-												annex.adjustedbranchrefresh

Added annex.adjustedbranchrefresh git config to update adjusted branches
set up by git-annex adjust --unlock-present/--hide-missing.

Note, in a few cases, I was not able to make the adjusted branch
be updated in calls to moveAnnex, because information about what
file corresponds to a key is not available. They are:

* If two files point to one file, then eg, `git annex get foo` will
  update the branch to unlock foo, but will not unlock bar, because it
  does not know about it. Might be fixable by making `git annex get
  bar` do something besides skipping bar?
* git-annex-shell recvkey likewise (so sends over ssh from old versions
  of git-annex)
* git-annex setkey
* git-annex transferkey if the user does not use --file
* git-annex multicast sends keys with no associated file info

Doing a single full refresh at the end, after any incremental refresh,
will deal with those edge cases.

											
										
										
											2020-11-16 18:09:55 +00:00
+									, adjustedbranchrefreshcounter :: Integer
-												add "unused" preferred content expression

With a really nice optimisation that keeps it from having any overhead
in normal operation!

This commit was sponsored by Ulises Vitulli.

											
										
										
											2014-01-22 20:35:32 +00:00
+									, unusedkeys :: Maybe (S.Set Key)
-												Urls can now be claimed by remotes. This will allow creating, for example, a external special remote that handles magnet: and *.torrent urls.

											
										
										
											2014-12-08 23:14:24 +00:00
+									, tempurls :: M.Map Key URLString
-												pre-commit-annex hook script to automatically extract metadata from lots of types of files

Using the extract(1) program to do the heavy lifting.

Decided to make git-annex run pre-commit-annex when committing. Since
git-annex pre-commit also runs it, it'll be run when git commit is run too,
via the pre-commit hook. This basically gives back the pre-commit hook
that git-annex took away. The implementation avoids repeatedly looking
for the hook script when the assistant is running and committing
repeatedly; only checks if the hook is available once.

To make the script simpler, made git-annex metadata -s field?=value
only set a field when it's not already got a value.

This commit was sponsored by bak.

											
										
										
											2014-03-02 22:01:07 +00:00
+									, existinghooks :: M.Map Git.Hook.Hook Bool
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									, workers :: Maybe (TMVar (WorkerPool (AnnexState, AnnexRead)))
-												refactor getCurrentBranch

Both Command.Sync and Annex.Ingest had their own versions of this.

The one in Annex.Ingest used Git.Branch.currentUnsafe, but does not seem
to need it. That is only checking to see if it's in an adjusted unlocked
branch, and when in an adjusted branch, the branch does in fact exist,
so the added check that Git.Branch.current does is fine.

This commit was sponsored by Denis Dzyubenko on Patreon.

											
										
										
											2018-10-19 19:17:48 +00:00
+									, cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment))
-												cache annex index filename for 1.5% speedup to queries

											
										
										
											2020-04-10 17:37:04 +00:00
+									, cachedgitenv :: Maybe (AltIndexFile, FilePath, [(String, String)])
-												Avoid running annex.http-headers-command more than once.

											
										
										
											2018-04-04 19:00:51 +00:00
+									, urloptions :: Maybe UrlOptions
-												fix deadlock

Fix a deadlock that could occur after git-annex got an unlocked file,
causing the command to hang indefinitely.

Known to happen on vfat filesystems, possibly others.

Note that a deadlock is still theoretically possible, if anything
smudge --clean does causes it to run the git queue for some other
reason.

Apparently that doesn't happen, but will need to keep an eye on it.

											
										
										
											2020-06-18 16:56:29 +00:00
+									, insmudgecleanfilter :: Bool
-												deal better with clock skew situations, using vector clocks

* Deal with clock skew, both forwards and backwards, when logging
  information to the git-annex branch.
* GIT_ANNEX_VECTOR_CLOCK can now be set to a fixed value (eg 1)
  rather than needing to be advanced each time a new change is made.
* Misuse of GIT_ANNEX_VECTOR_CLOCK will no longer confuse git-annex.

When changing a file in the git-annex branch, the vector clock to use is now
determined by first looking at the current time (or GIT_ANNEX_VECTOR_CLOCK
when set), and comparing it to the newest vector clock already in use in
that file. If a newer time stamp was already in use, advance it forward by
a second instead.

When the clock is set to a time in the past, this avoids logging with
an old timestamp, which would risk that log line later being ignored in favor
of "newer" line that is really not newer.

When a log entry has been made with a clock that was set far ahead in the
future, this avoids newer information being logged with an older timestamp
and so being ignored in favor of that future-timestamped information.
Once all clocks get fixed, this will result in the vector clocks being
incremented, until finally enough time has passed that time gets back ahead
of the vector clock value, and then it will return to usual operation.

(This latter situation is not ideal, but it seems the best that can be done.
The issue with it is, since all writers will be incrementing the last
vector clock they saw, there's no way to tell when one writer made a write
significantly later in time than another, so the earlier write might
arbitrarily be picked when merging. This problem is why git-annex uses
timestamps in the first place, rather than pure vector clocks.)

Advancing forward by 1 second is somewhat arbitrary. setDead
advances a timestamp by just 1 picosecond, and the vector clock could
too. But then it would interfere with setDead, which wants to be
overrulled by any change. So it could use 2 picoseconds or something,
but that seems weird. It could just as well advance it forward by a
minute or whatever, but then it would be harder for real time to catch
up with the vector clock when forward clock slew had happened.

A complication is that many log files contain several different peices of
information, and it may be best to only use vector clocks for the same peice
of information. For example, a key's location log file contains
InfoPresent/InfoMissing for each UUID, and it only looks at the vector
clocks for the UUID that is being changed, and not other UUIDs.

Although exactly where the dividing line is can be hard to determine.
Consider metadata logs, where a field "tag" can have multiple values set
at different times. Should it advance forward past the last tag?
Probably. What about when a different field is set, should it look at
the clocks of other fields? Perhaps not, but currently it does, and
this does not seems like it will cause any problems.

Another one I'm not entirely sure about is the export log, which is
keyed by (fromuuid, touuid). So if multiple repos are exporting to the
same remote, different vector clocks can be used for that remote.
It looks like that's probably ok, because it does not try to determine
what order things occurred when there was an export conflict.

Sponsored-by: Jochen Bartl on Patreon

											
										
										
											2021-08-03 20:45:20 +00:00
+									, getvectorclock :: IO CandidateVectorClock
-												add cipher field to AnnexState

											
										
										
											2011-04-16 20:41:46 +00:00
+									}
-												rework config storage

Moved away from a map of flags to storing config directly in the AnnexState
structure. Got rid of most accessor functions in Annex.

This allowed supporting multiple --exclude flags.

											
										
										
											2011-01-26 04:17:38 +00:00
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								newAnnexState :: GitConfig -> Git.Repo -> IO AnnexState
 								newAnnexState c r = do
-												Ssh password prompting improved when using -J

When ssh connection caching is enabled (and when GIT_ANNEX_USE_GIT_SSH is
not set), only one ssh password prompt will be made per host, and only one
ssh password prompt will be made at a time.

This also fixes a race in prepSocket's stale ssh connection stopping
when run with -J. It was possible for one thread to start a cached ssh
connection, and another thread to immediately stop it, resulting in excess
connections being made.

This commit was supported by the NSF-funded DataLad project.

											
										
										
											2017-05-11 21:33:18 +00:00
+									o <- newMessageState
-												generate more compact git-annex branch for imports

Especially from borg, where the content identifier logs
all end up being the same identical file!

But also, for other imports, the location tracking logs can,
in some cases, be identical files.

Bonus optimisation: Avoid looking up (and parsing when set)
GIT_ANNEX_VECTOR_CLOCK env var every time a log is written to.
Although the lookup does happen at startup even when no
log will be written now.

											
										
										
											2020-12-23 19:21:33 +00:00
+									vc <- startVectorClock
-												get -J: Download different files from different remotes when the remotes have the same costs.

Only done in -J mode because only if there's concurrency can downloading
from two remotes be faster. Without concurrency, it's likely the case that
sequential downloads from the same remote are faster than switching back
and forth between two remotes.

There is some hairy MVar code here, but basically it just keeps
the activeremotes MVar full except when deciding which remote to assign
to a thread.

Also affects gets by sync --content -J

This commit was sponsored by Jochen Bartl.

											
										
										
											2016-09-06 16:42:50 +00:00
+									return $ AnnexState
 										{ repo = r
 										, repoadjustment = return
 										, gitconfig = c
-												Bugfix: Don't ignore --debug when it is followed by -c

											
										
										
											2020-02-27 04:52:37 +00:00
+										, gitconfigadjustment = id
-												pass along -c options to child git-annex processes

											
										
										
											2020-12-15 14:44:36 +00:00
+										, gitconfigoverride = []
-												Improve startup time for commands that do not operate on remotes

And for tab completion, by not unnessessarily statting paths to remotes,
which used to cause eg, spin-up of removable drives.

Got rid of the remotes member of Git.Repo. This was a bit painful.

Remote.Git modifies the list of remotes as it reads their configs,
so still need a persistent list of remotes. So, put it in as
Annex.gitremotes. It's only populated by getGitRemotes, so commands
like examinekey that don't care about remotes won't do so.

This commit was sponsored by Jake Vosloo on Patreon.

											
										
										
											2018-01-09 19:36:56 +00:00
+										, gitremotes = Nothing
-												detect v10 upgrade while running

Capstone of the v10 upgrade process.

Tested with a git-annex drop in a v8 repo that had a local v8 remote.
Upgrading the repo to v10 (with --force) immedaitely caused it to notice
and switch over to v10 locking. Upgrading the remote also caused it to
switch over when operating on the remote.

The InodeCache makes this fairly efficient, just an added stat call per
lock of an object file. After the v10 upgrade, there is no more
overhead.

Sponsored-by: Dartmouth College's Datalad project

											
										
										
											2022-01-21 16:56:07 +00:00
+										, gitconfiginodecache = Nothing
-												annex.backend is the new name for what was annex.backends

It takes a single key-value backend, rather than the unncessary and confusing list.
The old option still works if set.

Simplified some old old code too.

This commit was sponsored by Thomas Hochstein on Patreon.

											
										
										
											2017-05-09 19:04:07 +00:00
+										, backend = Nothing
-												get -J: Download different files from different remotes when the remotes have the same costs.

Only done in -J mode because only if there's concurrency can downloading
from two remotes be faster. Without concurrency, it's likely the case that
sequential downloads from the same remote are faster than switching back
and forth between two remotes.

There is some hairy MVar code here, but basically it just keeps
the activeremotes MVar full except when deciding which remote to assign
to a thread.

Also affects gets by sync --content -J

This commit was sponsored by Jochen Bartl.

											
										
										
											2016-09-06 16:42:50 +00:00
+										, remotes = []
-												Ssh password prompting improved when using -J

When ssh connection caching is enabled (and when GIT_ANNEX_USE_GIT_SSH is
not set), only one ssh password prompt will be made per host, and only one
ssh password prompt will be made at a time.

This also fixes a race in prepSocket's stale ssh connection stopping
when run with -J. It was possible for one thread to start a cached ssh
connection, and another thread to immediately stop it, resulting in excess
connections being made.

This commit was supported by the NSF-funded DataLad project.

											
										
										
											2017-05-11 21:33:18 +00:00
+										, output = o
-												differentiate between concurrency enabled at command line and by git config

The latter should not affect --batch mode.

											
										
										
											2020-09-16 15:41:28 +00:00
+										, concurrency = ConcurrencyCmdLine NonConcurrent
-												get -J: Download different files from different remotes when the remotes have the same costs.

Only done in -J mode because only if there's concurrency can downloading
from two remotes be faster. Without concurrency, it's likely the case that
sequential downloads from the same remote are faster than switching back
and forth between two remotes.

There is some hairy MVar code here, but basically it just keeps
the activeremotes MVar full except when deciding which remote to assign
to a thread.

Also affects gets by sync --content -J

This commit was sponsored by Jochen Bartl.

											
										
										
											2016-09-06 16:42:50 +00:00
+										, daemon = False
 										, branchstate = startBranchState
 										, repoqueue = Nothing
-												cat-file resource pool

Avoid running a large number of git cat-file child processes when run with
a large -J value.

This implementation takes care to avoid adding any overhead to git-annex
when run without -J. When run with -J, there is a small bit of added
overhead, to manipulate the resource pool. That optimisation added a
fair bit of complexity.

											
										
										
											2020-04-20 17:53:27 +00:00
+										, catfilehandles = catFileHandlesNonConcurrent
-												get -J: Download different files from different remotes when the remotes have the same costs.

Only done in -J mode because only if there's concurrency can downloading
from two remotes be faster. Without concurrency, it's likely the case that
sequential downloads from the same remote are faster than switching back
and forth between two remotes.

There is some hairy MVar code here, but basically it just keeps
the activeremotes MVar full except when deciding which remote to assign
to a thread.

Also affects gets by sync --content -J

This commit was sponsored by Jochen Bartl.

											
										
										
											2016-09-06 16:42:50 +00:00
+										, hashobjecthandle = Nothing
 										, checkattrhandle = Nothing
 										, checkignorehandle = Nothing
 										, globalnumcopies = Nothing
-												mincopies

This is conceptually very simple, just making a 1 that was hard coded be
exposed as a config option. The hard part was plumbing all that, and
dealing with complexities like reading it from git attributes at the
same time that numcopies is read.

Behavior change: When numcopies is set to 0, git-annex used to drop
content without requiring any copies. Now to get that (highly unsafe)
behavior, mincopies also needs to be set to 0. It seemed better to
remove that edge case, than complicate mincopies by ignoring it when
numcopies is 0.

This commit was sponsored by Denis Dzyubenko on Patreon.

											
										
										
											2021-01-06 18:11:08 +00:00
+										, globalmincopies = Nothing
-												get -J: Download different files from different remotes when the remotes have the same costs.

Only done in -J mode because only if there's concurrency can downloading
from two remotes be faster. Without concurrency, it's likely the case that
sequential downloads from the same remote are faster than switching back
and forth between two remotes.

There is some hairy MVar code here, but basically it just keeps
the activeremotes MVar full except when deciding which remote to assign
to a thread.

Also affects gets by sync --content -J

This commit was sponsored by Jochen Bartl.

											
										
										
											2016-09-06 16:42:50 +00:00
+										, limit = BuildingMatcher []
-												fix --time-limit

It got broken in several ways by the streaming seeking optimisations
around version 8.20201007.

Moved time limit checking out of the matcher, which was a hack in the
first place. So everywhere that uses Limit.getMatcher needs to check
time limit. Well, almost everywhere. Command.Info uses it, but it does
not make sense to time limit getting info. And Command.MultiCast uses it
just to build up a list of files that then get passed to a command, so
it would never have hit the timeout in a useful way.

This implementation is a little more expensive when at time limit than
necessary, since it continues seeking only to discard everything after the
time limit. I did try making it close the file handles to force a faster
shutdown, but that didn't work and hung. Could certianly be improved
somehow, but seeking is probably not the expensive bit when a time limit
is hit, so this seems acceptable for now.

											
										
										
											2021-01-04 19:25:28 +00:00
+										, timelimit = Nothing
-												add --size-limit option

When this option is not used, there should be effectively no added
overhead, thanks to the optimisation in
b3cd0cc6ba4e5b9e2ae0abd9c8b2ec32475e09d2.

When an action fails on a file, the size of the file still counts toward
the size limit. This was necessary to support concurrency, but also
generally seems like the right choice.

Most commands that operate on annexed files support the option.
export and import do not, and I don't know if it would make sense for
export to.. Why would you want an incomplete export? sync doesn't, and
while it would be easy to make it support it for transferring files,
it's not clear if dropping files should also take the size limit into
account. Commands like add that don't operate on annexed files don't
support the option either.

Exiting 101 not yet implemented.

Sponsored-by: Denis Dzyubenko on Patreon

											
										
										
											2021-06-04 20:08:42 +00:00
+										, sizelimit = Nothing
-												add a UUIDDesc type containing a ByteString

Groundwork for handling uuid.log using ByteString

											
										
										
											2019-01-01 19:39:45 +00:00
+										, uuiddescmap = Nothing
-												get -J: Download different files from different remotes when the remotes have the same costs.

Only done in -J mode because only if there's concurrency can downloading
from two remotes be faster. Without concurrency, it's likely the case that
sequential downloads from the same remote are faster than switching back
and forth between two remotes.

There is some hairy MVar code here, but basically it just keeps
the activeremotes MVar full except when deciding which remote to assign
to a thread.

Also affects gets by sync --content -J

This commit was sponsored by Jochen Bartl.

											
										
										
											2016-09-06 16:42:50 +00:00
+										, preferredcontentmap = Nothing
 										, requiredcontentmap = Nothing
-												cache remote.log

Unlikely to speed up any of the existing uses much, but I want to use it
in a message that might be displayed many times.

											
										
										
											2020-09-22 17:52:26 +00:00
+										, remoteconfigmap = Nothing
-												get -J: Download different files from different remotes when the remotes have the same costs.

Only done in -J mode because only if there's concurrency can downloading
from two remotes be faster. Without concurrency, it's likely the case that
sequential downloads from the same remote are faster than switching back
and forth between two remotes.

There is some hairy MVar code here, but basically it just keeps
the activeremotes MVar full except when deciding which remote to assign
to a thread.

Also affects gets by sync --content -J

This commit was sponsored by Jochen Bartl.

											
										
										
											2016-09-06 16:42:50 +00:00
+										, forcetrust = M.empty
 										, trustmap = Nothing
 										, groupmap = Nothing
 										, lockcache = M.empty
 										, fields = M.empty
-												propagate signals to the transferrer process group

Done on unix, could not implement it on windows quite.

The signal library gets part of the way needed for windows.
But I had to open https://github.com/pmlodawski/signal/issues/1 because
it lacks raiseSignal.

Also, I don't know what the equivilant of getProcessGroupIDOf is on
windows. And System.Process does not provide a way to send any signal to
a process group except for SIGINT.

This commit was sponsored by Boyd Stephen Smith Jr. on Patreon.

											
										
										
											2020-12-11 19:28:58 +00:00
+										, cleanupactions = M.empty
-												get -J: Download different files from different remotes when the remotes have the same costs.

Only done in -J mode because only if there's concurrency can downloading
from two remotes be faster. Without concurrency, it's likely the case that
sequential downloads from the same remote are faster than switching back
and forth between two remotes.

There is some hairy MVar code here, but basically it just keeps
the activeremotes MVar full except when deciding which remote to assign
to a thread.

Also affects gets by sync --content -J

This commit was sponsored by Jochen Bartl.

											
										
										
											2016-09-06 16:42:50 +00:00
+										, sentinalstatus = Nothing
 										, errcounter = 0
-												Improved handling of --time-limit when combined with -J

When concurrency is enabled, there can be worker threads still running
when the time limit is checked. Exiting right there does not
give those threads time to finish what they're doing. Instead, the seeking
is wrapped up, and git-annex then shuts down cleanly.

The whole point of --time-limit existing, rather than using timeout(1)
when running git-annex is to let git-annex finish the action(s) it is
working on when the time limit is reached, and shut down cleanly.

I noticed this problem when investigating why restagePointerFile might
not have run after get/drop of an unlocked file. With --time-limit -J,
a worker thread may have finished updating a work tree file, and be killed
by the time limit check before it can run restagePointerFile. So despite
--time-limit running the shutdown actions, the work tree file didn't get
restaged.

Sponsored-by: Dartmouth College's DANDI project

											
										
										
											2022-09-22 16:47:40 +00:00
+										, reachedlimit = False
-												annex.adjustedbranchrefresh

Added annex.adjustedbranchrefresh git config to update adjusted branches
set up by git-annex adjust --unlock-present/--hide-missing.

Note, in a few cases, I was not able to make the adjusted branch
be updated in calls to moveAnnex, because information about what
file corresponds to a key is not available. They are:

* If two files point to one file, then eg, `git annex get foo` will
  update the branch to unlock foo, but will not unlock bar, because it
  does not know about it. Might be fixable by making `git annex get
  bar` do something besides skipping bar?
* git-annex-shell recvkey likewise (so sends over ssh from old versions
  of git-annex)
* git-annex setkey
* git-annex transferkey if the user does not use --file
* git-annex multicast sends keys with no associated file info

Doing a single full refresh at the end, after any incremental refresh,
will deal with those edge cases.

											
										
										
											2020-11-16 18:09:55 +00:00
+										, adjustedbranchrefreshcounter = 0
-												get -J: Download different files from different remotes when the remotes have the same costs.

Only done in -J mode because only if there's concurrency can downloading
from two remotes be faster. Without concurrency, it's likely the case that
sequential downloads from the same remote are faster than switching back
and forth between two remotes.

There is some hairy MVar code here, but basically it just keeps
the activeremotes MVar full except when deciding which remote to assign
to a thread.

Also affects gets by sync --content -J

This commit was sponsored by Jochen Bartl.

											
										
										
											2016-09-06 16:42:50 +00:00
+										, unusedkeys = Nothing
 										, tempurls = M.empty
 										, existinghooks = M.empty
-												speed up enteringStage in non-concurrent mode

Avoid a STM transaction.

Also got rid of UnallocatedWorkerPool.

											
										
										
											2019-06-19 19:47:54 +00:00
+										, workers = Nothing
-												get -J: Download different files from different remotes when the remotes have the same costs.

Only done in -J mode because only if there's concurrency can downloading
from two remotes be faster. Without concurrency, it's likely the case that
sequential downloads from the same remote are faster than switching back
and forth between two remotes.

There is some hairy MVar code here, but basically it just keeps
the activeremotes MVar full except when deciding which remote to assign
to a thread.

Also affects gets by sync --content -J

This commit was sponsored by Jochen Bartl.

											
										
										
											2016-09-06 16:42:50 +00:00
+										, cachedcurrentbranch = Nothing
-												Optimisations to git-annex branch query and setting, avoiding repeated copies of the environment.

Speeds up commands like  "git-annex find --in remote" by over 50%.

Profiling showed that adjustGitEnv was 21% of the time and 37% of the
allocations of that command. It copied the environment each time with
getEnvironment.

The only repeated use of adjustGitEnv is in withIndexFile, which tends to
be run at least once per file. So, it was optimised by keeping a cache of
the environment, which can be reused.

There could be other better ways to optimise this. Maybe get the while
environment once at startup. But, then it would have to be serialized back
out each time running a child process, so I doubt that would be a net win.

It might be better to cache a version of the environment that is
pre-modified to use .git-annex/index. But, profiling doesn't show that
modifying the enviroment is taking any significant time.

											
										
										
											2016-09-29 17:36:48 +00:00
+										, cachedgitenv = Nothing
-												Avoid running annex.http-headers-command more than once.

											
										
										
											2018-04-04 19:00:51 +00:00
+										, urloptions = Nothing
-												fix deadlock

Fix a deadlock that could occur after git-annex got an unlocked file,
causing the command to hang indefinitely.

Known to happen on vfat filesystems, possibly others.

Note that a deadlock is still theoretically possible, if anything
smudge --clean does causes it to run the git queue for some other
reason.

Apparently that doesn't happen, but will need to keep an eye on it.

											
										
										
											2020-06-18 16:56:29 +00:00
+										, insmudgecleanfilter = False
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+										, getvectorclock = vc
-												get -J: Download different files from different remotes when the remotes have the same costs.

Only done in -J mode because only if there's concurrency can downloading
from two remotes be faster. Without concurrency, it's likely the case that
sequential downloads from the same remote are faster than switching back
and forth between two remotes.

There is some hairy MVar code here, but basically it just keeps
the activeremotes MVar full except when deciding which remote to assign
to a thread.

Also affects gets by sync --content -J

This commit was sponsored by Jochen Bartl.

											
										
										
											2016-09-06 16:42:50 +00:00
+										}
-												more reorg, spiffed up state monad

											
										
										
											2010-10-14 07:18:11 +00:00
-												Add support for core.worktree, and fix support for GIT_WORK_TREE and GIT_DIR.

The environment needs to override git-config. Changed when git config is
read, and avoid rereading it once it's been read.

chdir for both worktree settings.

											
										
										
											2012-05-18 22:20:53 +00:00
+								{- Makes an Annex state object for the specified git repo.
-												Submodules are now supported by git-annex!

Seems to work, but still experimental until it's been tested more.

When repositories are on filesystems not supporting symlinks, the .git dir
symlink trick cannot be used. Since we're going to be in direct mode
anyway, the .git dir symlink is not strictly needed.

However, I have not fixed the code that creates new annex symlinks to
handle this case -- the committed symlinks will be wrong.

git annex sync happens to currently fail in a submodule using direct mode,
because there's no HEAD ref. That also needs to be dealt with to get
this fully working in crippled filesystems.

Leaving http://github.com/datalad/datalad/issues/44 open until these issues
are dealt with.

											
										
										
											2015-03-02 20:43:44 +00:00
+								 - Ensures the config is read, if it was not already, and performs
 								 - any necessary git repo fixups. -}
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								new :: Git.Repo -> IO (AnnexState, AnnexRead)
-												fix reversion in relative paths to local remotes of direct mode repos

0980f3dae62b8a9e2d6a6b40767299559e4675a8 broke support for local remotes
from direct mode repos, because the relative path was taken to be from the
gitdir, rather than from the work tree.

											
										
										
											2013-11-26 22:11:37 +00:00
+								new r = do
-												avoid making absolute git remote path relative

When a git remote is configured with an absolute path, use that path,
rather than making it relative. If it's configured with a relative path,
use that.

Git.Construct.fromPath changed to preserve the path as-is,
rather than making it absolute. And Annex.new changed to not
convert the path to relative. Instead, Git.CurrentRepo.get
generates a relative path.

A few things that used fromAbsPath unncessarily were changed in passing to
use fromPath instead. I'm seeing fromAbsPath as a security check,
while before it was being used in some cases when the path was
known absolute already. It may be that fromAbsPath is not really needed,
but only git-annex-shell uses it now, and I'm not 100% sure that there's
not some input that would cause a relative path to be used, opening a
security hole, without the security check. So left it as-is.

Test suite passes and strace shows the configured remote url is used
unchanged in the path into it. I can't be 100% sure there's not some code
somewhere that takes an absolute path to the repo and converts it to
relative and uses it, but it seems pretty unlikely that the code paths used
for a git remote would call such code. One place I know of is gitAnnexLink,
but I'm pretty sure that git remotes never deal with annex symlinks. If
that did get called, it generates a path relative to cwd, which would have
been wrong before this change as well, when operating on a remote.

											
										
										
											2021-02-08 17:18:01 +00:00
+									r' <- Git.Config.read r
-												git-annex config annex.largefiles

annex.largefiles can be configured by git-annex config, to more easily set
a default that will also be used by clones, without needing to shoehorn the
expression into the gitattributes file. The git config and gitattributes
override that.

Whenever something is added to git-annex config, we have to consider what
happens if a user puts a purposfully bad value in there. Or, if a new
git-annex adds some new value that an old git-annex can't parse.
In this case, a global annex.largefiles that can't be parsed currently
makes an error be thrown. That might not be ideal, but the gitattribute
behaves the same, and is almost equally repo-global.

Performance notes:

git-annex add and addurl construct a matcher once
and uses it for every file, so the added time penalty for reading the global
config log is minor. If the gitattributes annex.largefiles were deprecated,
git-annex add would get around 2% faster (excluding hashing), because
looking that up for each file is not fast. So this new way of setting
it is progress toward speeding up add.

git-annex smudge does need to load the log every time. As well as checking
the git attribute. Not ideal. Setting annex.gitaddtoannex=false avoids
both overheads.

											
										
										
											2019-12-20 16:12:31 +00:00
+									let c = extractGitConfig FromGitConfig r'
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									st <- newAnnexState c =<< fixupRepo r' c
-												implement fastDebug

Most of the changes here involve global option parsing: GlobalSetter
changed so it can both run an Annex action to set state, but can also
change the AnnexRead value, which is immutable once the Annex monad is
running.

That allowed a debugselector value to be added to AnnexRead, seeded
from the git config. The --debugfilter option's GlobalSetter then updates
the AnnexRead.

This improved GlobalSetter can later be used to move more stuff to
AnnexRead. Things that don't involve a git config will be easier to
move, and probably a *lot* of things can be moved eventually.

fastDebug, while implemented, is not used anywhere yet. But it should be
fast..

											
										
										
											2021-04-06 19:14:00 +00:00
+									rd <- newAnnexRead c
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									return (st, rd)
-												more reorg, spiffed up state monad

											
										
										
											2010-10-14 07:18:11 +00:00
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+								{- Performs an action in the Annex monad from a starting state,
 								 - returning a new state. -}
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								run :: (AnnexState, AnnexRead) -> Annex a -> IO (a, (AnnexState, AnnexRead))
 								run (st, rd) a = do
 									mv <- newMVar st
 									run' mv rd a
 								run' :: MVar AnnexState -> AnnexRead -> Annex a -> IO (a, (AnnexState, AnnexRead))
 								run' mvar rd a = do
 									r <- runReaderT (runAnnex a) (mvar, rd)
 									st <- takeMVar mvar
 									return (r, (st, rd))
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
 								{- Performs an action in the Annex monad from a starting state,
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								 - and throws away the changed state. -}
 								eval :: (AnnexState, AnnexRead) -> Annex a -> IO a
 								eval v a = fst <$> run v a
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
-												fix lockKey to run callback in original Annex monad, not local remote's

											
										
										
											2015-10-09 17:35:28 +00:00
+								{- Makes a runner action, that allows diving into IO and from inside
 								 - the IO action, running an Annex action. -}
 								makeRunner :: Annex (Annex a -> IO a)
 								makeRunner = do
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									(mvar, rd) <- ask
-												flush keys db queue even on exception

Also fixed a bug in makeRunner; run' leaves the mvar empty so have to
refill it.

											
										
										
											2015-12-23 23:38:18 +00:00
+									return $ \a -> do
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+										(r, (s, _rd)) <- run' mvar rd a
-												flush keys db queue even on exception

Also fixed a bug in makeRunner; run' leaves the mvar empty so have to
refill it.

											
										
										
											2015-12-23 23:38:18 +00:00
+										putMVar mvar s
 										return r
-												fix lockKey to run callback in original Annex monad, not local remote's

											
										
										
											2015-10-09 17:35:28 +00:00
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								getRead :: (AnnexRead -> v) -> Annex v
 								getRead selector = selector . snd <$> ask
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+								getState :: (AnnexState -> v) -> Annex v
 								getState selector = do
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									mvar <- fst <$> ask
 									st <- liftIO $ readMVar mvar
 									return $ selector st
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
 								changeState :: (AnnexState -> AnnexState) -> Annex ()
 								changeState modifier = do
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									mvar <- fst <$> ask
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+									liftIO $ modifyMVar_ mvar $ return . modifier
-												more reorg, spiffed up state monad

											
										
										
											2010-10-14 07:18:11 +00:00
-												Optimisations to git-annex branch query and setting, avoiding repeated copies of the environment.

Speeds up commands like  "git-annex find --in remote" by over 50%.

Profiling showed that adjustGitEnv was 21% of the time and 37% of the
allocations of that command. It copied the environment each time with
getEnvironment.

The only repeated use of adjustGitEnv is in withIndexFile, which tends to
be run at least once per file. So, it was optimised by keeping a cache of
the environment, which can be reused.

There could be other better ways to optimise this. Maybe get the while
environment once at startup. But, then it would have to be serialized back
out each time running a child process, so I doubt that would be a net win.

It might be better to cache a version of the environment that is
pre-modified to use .git-annex/index. But, profiling doesn't show that
modifying the enviroment is taking any significant time.

											
										
										
											2016-09-29 17:36:48 +00:00
+								withState :: (AnnexState -> IO (AnnexState, b)) -> Annex b
-												When accessing a local remote, shut down git-cat-file processes afterwards, to ensure that remotes on removable media can be unmounted. Closes: #758630

This does mean that eg, copying multiple files to a local remote will
become slightly slower, since it now restarts git-cat-file after each copy.
Should not be significant slowdown.

The reason git-cat-file is run on the remote at all is to update its
location log. In order to add an item to it, it needs to get the current
content of the log. Finding a way to avoid needing to do that would be a
good path to avoiding this slowdown if it does become a problem somehow.

This commit was sponsored by Evan Deaubl.

											
										
										
											2014-08-20 16:01:45 +00:00
+								withState modifier = do
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									mvar <- fst <$> ask
-												Optimisations to git-annex branch query and setting, avoiding repeated copies of the environment.

Speeds up commands like  "git-annex find --in remote" by over 50%.

Profiling showed that adjustGitEnv was 21% of the time and 37% of the
allocations of that command. It copied the environment each time with
getEnvironment.

The only repeated use of adjustGitEnv is in withIndexFile, which tends to
be run at least once per file. So, it was optimised by keeping a cache of
the environment, which can be reused.

There could be other better ways to optimise this. Maybe get the while
environment once at startup. But, then it would have to be serialized back
out each time running a child process, so I doubt that would be a net win.

It might be better to cache a version of the environment that is
pre-modified to use .git-annex/index. But, profiling doesn't show that
modifying the enviroment is taking any significant time.

											
										
										
											2016-09-29 17:36:48 +00:00
+									liftIO $ modifyMVar mvar modifier
-												When accessing a local remote, shut down git-cat-file processes afterwards, to ensure that remotes on removable media can be unmounted. Closes: #758630

This does mean that eg, copying multiple files to a local remote will
become slightly slower, since it now restarts git-cat-file after each copy.
Should not be significant slowdown.

The reason git-cat-file is run on the remote at all is to update its
location log. In order to add an item to it, it needs to get the current
content of the log. Finding a way to avoid needing to do that would be a
good path to avoiding this slowdown if it does become a problem somehow.

This commit was sponsored by Evan Deaubl.

											
										
										
											2014-08-20 16:01:45 +00:00
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
+								{- Sets a field to a value -}
 								setField :: String -> String -> Annex ()
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								setField field value = changeState $ \st ->
 									st { fields = M.insert field value $ fields st }
-												do a cleanup commit after moving data from or to a git remote

Added Annex.cleanup, which is a general purpose interface for adding
actions to run at the end.

Remotes with the old git-annex-shell will commit every time, and have no
commit command, so hide stderr when running the commit command.

											
										
										
											2012-02-25 22:02:49 +00:00
 								{- Adds a cleanup action to perform. -}
-												propagate signals to the transferrer process group

Done on unix, could not implement it on windows quite.

The signal library gets part of the way needed for windows.
But I had to open https://github.com/pmlodawski/signal/issues/1 because
it lacks raiseSignal.

Also, I don't know what the equivilant of getProcessGroupIDOf is on
windows. And System.Process does not provide a way to send any signal to
a process group except for SIGINT.

This commit was sponsored by Boyd Stephen Smith Jr. on Patreon.

											
										
										
											2020-12-11 19:28:58 +00:00
+								addCleanupAction :: CleanupAction -> Annex () -> Annex ()
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								addCleanupAction k a = changeState $ \st ->
 									st { cleanupactions = M.insert k a $ cleanupactions st }
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
-												fix test suite build

											
										
										
											2012-04-30 17:59:05 +00:00
+								{- Sets the type of output to emit. -}
 								setOutput :: OutputType -> Annex ()
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								setOutput o = changeState $ \st ->
 									let m = output st
 									in st { output = m { outputType = adjustOutputType (outputType m) o } }
-												fix test suite build

											
										
										
											2012-04-30 17:59:05 +00:00
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
+								{- Gets the value of a field. -}
 								getField :: String -> Annex (Maybe String)
 								getField field = M.lookup field <$> getState fields
-												reorder repo parameters last

Many functions took the repo as their first parameter. Changing it
consistently to be the last parameter allows doing some useful things with
currying, that reduce boilerplate.

In particular, g <- gitRepo is almost never needed now, instead
use inRepo to run an IO action in the repo, and fromRepo to get
a value from the repo.

This also provides more opportunities to use monadic and applicative
combinators.

											
										
										
											2011-11-08 19:34:10 +00:00
+								{- Returns the annex's git repository. -}
-												more reorg, spiffed up state monad

											
										
										
											2010-10-14 07:18:11 +00:00
+								gitRepo :: Annex Git.Repo
-												successfully split Annex and AnnexState out of TypeInternals

											
										
										
											2011-01-26 01:49:04 +00:00
+								gitRepo = getState repo
-												reorder repo parameters last

Many functions took the repo as their first parameter. Changing it
consistently to be the last parameter allows doing some useful things with
currying, that reduce boilerplate.

In particular, g <- gitRepo is almost never needed now, instead
use inRepo to run an IO action in the repo, and fromRepo to get
a value from the repo.

This also provides more opportunities to use monadic and applicative
combinators.

											
										
										
											2011-11-08 19:34:10 +00:00
 								{- Runs an IO action in the annex's git repository. -}
 								inRepo :: (Git.Repo -> IO a) -> Annex a
-												golf

											
										
										
											2011-11-12 18:24:07 +00:00
+								inRepo a = liftIO . a =<< gitRepo
-												reorder repo parameters last

Many functions took the repo as their first parameter. Changing it
consistently to be the last parameter allows doing some useful things with
currying, that reduce boilerplate.

In particular, g <- gitRepo is almost never needed now, instead
use inRepo to run an IO action in the repo, and fromRepo to get
a value from the repo.

This also provides more opportunities to use monadic and applicative
combinators.

											
										
										
											2011-11-08 19:34:10 +00:00
 								{- Extracts a value from the annex's git repisitory. -}
 								fromRepo :: (Git.Repo -> a) -> Annex a
 								fromRepo a = a <$> gitRepo
-												type based git config handling

Now there's a Config type, that's extracted from the git config at startup.
Note that laziness means that individual config values are only looked up
and parsed on demand, and so we get implicit memoization for all of them.
So this is not only prettier and more type safe, it optimises several
places that didn't have explicit memoization before. As well as getting rid
of the ugly explicit memoization code.

Not yet done for annex.<remote>.* configuration settings.

											
										
										
											2012-12-30 03:10:18 +00:00
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+								{- Calculates a value from an annex's git repository and its GitConfig. -}
-												Use lower case hash directories for storing files on crippled filesystems, same as is already done for bare repositories.

* since this is a crippled filesystem anyway, git-annex doesn't use
  symlinks on it
* so there's no reason to use the mixed case hash directories that we're
  stuck using to avoid breaking everyone's symlinks to the content
* so we can do what is already done for all bare repos, and make non-bare
  repos on crippled filesystems use the all-lower case hash directories
* which are, happily, all 3 letters long, so they cannot conflict with
  mixed case hash directories
* so I was able to 100% fix this and even resuming `git annex add` in the
  test case will recover and it will all just work.

											
										
										
											2013-04-04 19:46:33 +00:00
+								calcRepo :: (Git.Repo -> GitConfig -> IO a) -> Annex a
 								calcRepo a = do
 									s <- getState id
 									liftIO $ a (repo s) (gitconfig s)
-												add annex.dbdir (WIP)

WIP: This is mostly complete, but there is a problem: createDirectoryUnder
throws an error when annex.dbdir is set to outside the git repo.

annex.dbdir is a workaround for filesystems where sqlite does not work,
due to eg, the filesystem not properly supporting locking.

It's intended to be set before initializing the repository. Changing it
in an existing repository can be done, but would be the same as making a
new repository and moving all the annexed objects into it. While the
databases get recreated from the git-annex branch in that situation, any
information that is in the databases but not stored in the branch gets
lost. It may be that no information ever gets stored in the databases
that cannot be reconstructed from the branch, but I have not verified
that.

Sponsored-by: Dartmouth College's Datalad project

											
										
										
											2022-08-11 20:57:44 +00:00
+								calcRepo' :: (Git.Repo -> GitConfig -> a) -> Annex a
 								calcRepo' f = do
 									s <- getState id
 									pure $ f (repo s) (gitconfig s)
-												type based git config handling for remotes

Still a couple of places that use git config ad-hoc, but this is most of it
done.

											
										
										
											2013-01-01 17:52:47 +00:00
+								{- Gets the GitConfig settings. -}
 								getGitConfig :: Annex GitConfig
 								getGitConfig = getState gitconfig
-												type based git config handling

Now there's a Config type, that's extracted from the git config at startup.
Note that laziness means that individual config values are only looked up
and parsed on demand, and so we get implicit memoization for all of them.
So this is not only prettier and more type safe, it optimises several
places that didn't have explicit memoization before. As well as getting rid
of the ugly explicit memoization code.

Not yet done for annex.<remote>.* configuration settings.

											
										
										
											2012-12-30 03:10:18 +00:00
-												rename changeGitConfig to overrideGitConfig and avoid unncessary calls

It's important that it be clear that it overrides a config, such that
reloading the git config won't change it, and in particular, setConfig
won't change it.

Most of the calls to changeGitConfig were actually after setConfig,
which was redundant and unncessary. So removed those.

The only remaining one, besides --debug, is in the handling of
repository-global config values. That one's ok, because the
way mergeGitConfig is implemented, it does not override any value that
is set in git config. If a value with a repo-global setting was passed
to setConfig, it would set it in the git config, reload the git config,
re-apply mergeGitConfig, and use the newly set value, which is the right
thing.

											
										
										
											2020-02-27 05:06:35 +00:00
+								{- Overrides a GitConfig setting. The modification persists across
-												Bugfix: Don't ignore --debug when it is followed by -c

											
										
										
											2020-02-27 04:52:37 +00:00
+								 - reloads of the repo's config. -}
-												rename changeGitConfig to overrideGitConfig and avoid unncessary calls

It's important that it be clear that it overrides a config, such that
reloading the git config won't change it, and in particular, setConfig
won't change it.

Most of the calls to changeGitConfig were actually after setConfig,
which was redundant and unncessary. So removed those.

The only remaining one, besides --debug, is in the handling of
repository-global config values. That one's ok, because the
way mergeGitConfig is implemented, it does not override any value that
is set in git config. If a value with a repo-global setting was passed
to setConfig, it would set it in the git config, reload the git config,
re-apply mergeGitConfig, and use the newly set value, which is the right
thing.

											
										
										
											2020-02-27 05:06:35 +00:00
+								overrideGitConfig :: (GitConfig -> GitConfig) -> Annex ()
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								overrideGitConfig f = changeState $ \st -> st
 									{ gitconfigadjustment = gitconfigadjustment st . f
 									, gitconfig = f (gitconfig st)
-												Bugfix: Don't ignore --debug when it is followed by -c

											
										
										
											2020-02-27 04:52:37 +00:00
+									}
-												Bug fix: Git config settings passed to git-annex -c did not always take effect.

When Config.setConfig runs, it throws away the old Repo and loads a new
one. So, add an action to adjust the Repo so that -c settings will persist
across that.

											
										
										
											2016-01-22 17:47:41 +00:00
 								{- Adds an adjustment to the Repo data. Adjustments persist across reloads
-												Fix a minor bug that caused options provided with -c to be passed multiple times to git.

											
										
										
											2020-03-16 17:06:44 +00:00
+								 - of the repo's config.
 								 -
 								 - Note that the action may run more than once, and should avoid eg,
 								 - appending the same value to a repo's config when run repeatedly.
 								 -}
-												Bug fix: Git config settings passed to git-annex -c did not always take effect.

When Config.setConfig runs, it throws away the old Repo and loads a new
one. So, add an action to adjust the Repo so that -c settings will persist
across that.

											
										
										
											2016-01-22 17:47:41 +00:00
+								adjustGitRepo :: (Git.Repo -> IO Git.Repo) -> Annex ()
 								adjustGitRepo a = do
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									changeState $ \st -> st { repoadjustment = \r -> repoadjustment st r >>= a }
-												Bug fix: Git config settings passed to git-annex -c did not always take effect.

When Config.setConfig runs, it throws away the old Repo and loads a new
one. So, add an action to adjust the Repo so that -c settings will persist
across that.

											
										
										
											2016-01-22 17:47:41 +00:00
+									changeGitRepo =<< gitRepo
-												Bugfix: Fix bug in inode cache sentinal check, which broke copying to local repos if the repo being copied from had moved to a different filesystem or otherwise changed all its inodes'

											
										
										
											2013-03-12 20:41:54 +00:00
-												refix bug in a better way

Always run Git.Config.store, so when the git config gets reloaded,
the override gets re-added to it, and changeGitRepo then calls extractGitConfig
on it and sees the annex.* settings from the override.

Remove any prior occurance of -c v and add it to the end. This way,
-c foo=1 -c foo=2 -c foo=1 will pass -c foo=1 to git, rather than -c foo=2

Note that, if git had some multiline config that got built up by
multiple -c's, this would not work still. But it never worked because
before the bug got fixed in the first place, the -c value was repeated
many times, so the multivalue thing would have been wrong. I don't think
-c can be used with multiline configs anyway, though git-config does
talk about them?

											
										
										
											2020-07-02 17:32:33 +00:00
+								{- Adds git config setting, like "foo=bar". It will be passed with -c
-												pass along -c options to child git-annex processes

											
										
										
											2020-12-15 14:44:36 +00:00
+								 - to git processes. The config setting is also recorded in the Repo,
-												refix bug in a better way

Always run Git.Config.store, so when the git config gets reloaded,
the override gets re-added to it, and changeGitRepo then calls extractGitConfig
on it and sees the annex.* settings from the override.

Remove any prior occurance of -c v and add it to the end. This way,
-c foo=1 -c foo=2 -c foo=1 will pass -c foo=1 to git, rather than -c foo=2

Note that, if git had some multiline config that got built up by
multiple -c's, this would not work still. But it never worked because
before the bug got fixed in the first place, the -c value was repeated
many times, so the multivalue thing would have been wrong. I don't think
-c can be used with multiline configs anyway, though git-config does
talk about them?

											
										
										
											2020-07-02 17:32:33 +00:00
+								 - and the GitConfig is updated. -}
 								addGitConfigOverride :: String -> Annex ()
-												pass along -c options to child git-annex processes

											
										
										
											2020-12-15 14:44:36 +00:00
+								addGitConfigOverride v = do
 									adjustGitRepo $ \r ->
-												simplify and speed up Utility.FileSystemEncoding

This eliminates the distinction between decodeBS and decodeBS', encodeBS
and encodeBS', etc. The old implementation truncated at NUL, and the
primed versions had to do extra work to avoid that problem. The new
implementation does not truncate at NUL, and is also a lot faster.
(Benchmarked at 2x faster for decodeBS and 3x for encodeBS; more for the
primed versions.)

Note that filepath-bytestring 1.4.2.1.8 contains the same optimisation,
and upgrading to it will speed up to/fromRawFilePath.

AFAIK, nothing relied on the old behavior of truncating at NUL. Some
code used the faster versions in places where I was sure there would not
be a NUL. So this change is unlikely to break anything.

Also, moved s2w8 and w82s out of the module, as they do not involve
filesystem encoding really.

Sponsored-by: Shae Erisson on Patreon

											
										
										
											2021-08-11 00:45:02 +00:00
+										Git.Config.store (encodeBS v) Git.Config.ConfigList $
-												pass along -c options to child git-annex processes

											
										
										
											2020-12-15 14:44:36 +00:00
+											r { Git.gitGlobalOpts = go (Git.gitGlobalOpts r) }
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									changeState $ \st -> st { gitconfigoverride = v : gitconfigoverride st }
-												refix bug in a better way

Always run Git.Config.store, so when the git config gets reloaded,
the override gets re-added to it, and changeGitRepo then calls extractGitConfig
on it and sees the annex.* settings from the override.

Remove any prior occurance of -c v and add it to the end. This way,
-c foo=1 -c foo=2 -c foo=1 will pass -c foo=1 to git, rather than -c foo=2

Note that, if git had some multiline config that got built up by
multiple -c's, this would not work still. But it never worked because
before the bug got fixed in the first place, the -c value was repeated
many times, so the multivalue thing would have been wrong. I don't think
-c can be used with multiline configs anyway, though git-config does
talk about them?

											
										
										
											2020-07-02 17:32:33 +00:00
+								  where
-												Apply codespell -w throughout

											
										
										
											2023-03-14 02:39:16 +00:00
+									-- Remove any prior occurrence of the setting to avoid
-												refix bug in a better way

Always run Git.Config.store, so when the git config gets reloaded,
the override gets re-added to it, and changeGitRepo then calls extractGitConfig
on it and sees the annex.* settings from the override.

Remove any prior occurance of -c v and add it to the end. This way,
-c foo=1 -c foo=2 -c foo=1 will pass -c foo=1 to git, rather than -c foo=2

Note that, if git had some multiline config that got built up by
multiple -c's, this would not work still. But it never worked because
before the bug got fixed in the first place, the -c value was repeated
many times, so the multivalue thing would have been wrong. I don't think
-c can be used with multiline configs anyway, though git-config does
talk about them?

											
										
										
											2020-07-02 17:32:33 +00:00
+									-- building up many of them when the adjustment is run repeatedly,
 									-- and add the setting to the end.
 									go [] = [Param "-c", Param v]
 									go (Param "-c": Param v':rest) | v' == v = go rest
 									go (c:rest) = c : go rest
-												pass along -c options to child git-annex processes

											
										
										
											2020-12-15 14:44:36 +00:00
+								{- Values that were passed to addGitConfigOverride. -}
 								getGitConfigOverrides :: Annex [String]
 								getGitConfigOverrides = reverse <$> getState gitconfigoverride
-												Bugfix: Don't ignore --debug when it is followed by -c

											
										
										
											2020-02-27 04:52:37 +00:00
+								{- Changing the git Repo data also involves re-extracting its GitConfig. -}
 								changeGitRepo :: Git.Repo -> Annex ()
 								changeGitRepo r = do
 									repoadjuster <- getState repoadjustment
 									gitconfigadjuster <- getState gitconfigadjustment
 									r' <- liftIO $ repoadjuster r
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									changeState $ \st -> st
-												Bugfix: Don't ignore --debug when it is followed by -c

											
										
										
											2020-02-27 04:52:37 +00:00
+										{ repo = r'
 										, gitconfig = gitconfigadjuster $
 											extractGitConfig FromGitConfig r'
 										}
-												factor out getRemoteGitConfig

											
										
										
											2014-05-16 20:08:20 +00:00
+								{- Gets the RemoteGitConfig from a remote, given the Git.Repo for that
 								 - remote. -}
 								getRemoteGitConfig :: Git.Repo -> Annex RemoteGitConfig
 								getRemoteGitConfig r = do
 									g <- gitRepo
-												add annex-ignore-command and annex-sync-command configs

Added remote configuration settings annex-ignore-command and
annex-sync-command, which are dynamic equivilants of the annex-ignore
and annex-sync configurations.

For this I needed a new DynamicConfig infrastructure. Its implementation
should be as fast as before when there is no dynamic config, and it caches
so shell commands are only run once.

Note that annex-ignore-command exits nonzero when the remote should be ignored.
While that may seem backwards, it allows using the same command for it as
for annex-sync-command when you want to disable both.

This commit was sponsored by Trenton Cronholm on Patreon.

											
										
										
											2017-08-17 16:26:14 +00:00
+									liftIO $ atomically $ extractRemoteGitConfig g (Git.repoDescribe r)
-												factor out getRemoteGitConfig

											
										
										
											2014-05-16 20:08:20 +00:00
-												Bugfix: Fix bug in inode cache sentinal check, which broke copying to local repos if the repo being copied from had moved to a different filesystem or otherwise changed all its inodes'

											
										
										
											2013-03-12 20:41:54 +00:00
+								{- Converts an Annex action into an IO action, that runs with a copy
 								 - of the current Annex state.
 								 -
 								 - Use with caution; the action should not rely on changing the
 								 - state, as it will be thrown away. -}
 								withCurrentState :: Annex a -> Annex (IO a)
 								withCurrentState a = do
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									(mvar, rd) <- ask
 									st <- liftIO $ readMVar mvar
 									return $ eval (st, rd) a
-												handle sync's use of setCurrentDirectory to work with relative paths

I think this is the last problimatic setCurrentDirectory. I also audited
for extrnal commands that git-annex might run with cwd = foo, and did not
find any that were passed any FilePath that might be absolute.

											
										
										
											2015-01-07 02:23:04 +00:00
 								{- It's not safe to use setCurrentDirectory in the Annex monad,
 								 - because the git repo paths are stored relative.
 								 - Instead, use this.
 								 -}
 								changeDirectory :: FilePath -> Annex ()
 								changeDirectory d = do
 									r <- liftIO . Git.adjustPath absPath =<< gitRepo
 									liftIO $ setCurrentDirectory d
 									r' <- liftIO $ Git.relPath r
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									changeState $ \st -> st { repo = r' }
-												refactor

											
										
										
											2015-04-30 19:04:01 +00:00
 								incError :: Annex ()
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+								incError = changeState $ \st ->
 									let !c = errcounter st + 1
 									    !st' = st { errcounter = c }
 									in st'
-												Improve startup time for commands that do not operate on remotes

And for tab completion, by not unnessessarily statting paths to remotes,
which used to cause eg, spin-up of removable drives.

Got rid of the remotes member of Git.Repo. This was a bit painful.

Remote.Git modifies the list of remotes as it reads their configs,
so still need a persistent list of remotes. So, put it in as
Annex.gitremotes. It's only populated by getGitRemotes, so commands
like examinekey that don't care about remotes won't do so.

This commit was sponsored by Jake Vosloo on Patreon.

											
										
										
											2018-01-09 19:36:56 +00:00
 								getGitRemotes :: Annex [Git.Repo]
 								getGitRemotes = do
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+									st <- getState id
 									case gitremotes st of
-												Improve startup time for commands that do not operate on remotes

And for tab completion, by not unnessessarily statting paths to remotes,
which used to cause eg, spin-up of removable drives.

Got rid of the remotes member of Git.Repo. This was a bit painful.

Remote.Git modifies the list of remotes as it reads their configs,
so still need a persistent list of remotes. So, put it in as
Annex.gitremotes. It's only populated by getGitRemotes, so commands
like examinekey that don't care about remotes won't do so.

This commit was sponsored by Jake Vosloo on Patreon.

											
										
										
											2018-01-09 19:36:56 +00:00
+										Just rs -> return rs
 										Nothing -> do
-												start splitting out readonly values from AnnexState

Values in AnnexRead can be read more efficiently, without MVar overhead.
Only a few things have been moved into there, and the performance
increase so far is not likely to be noticable.

This is groundwork for putting more stuff in there, particularly a value
that indicates if debugging is enabled.

The obvious next step is to change option parsing to not run in the
Annex monad to set values in AnnexState, and instead return a pure value
that gets stored in AnnexRead.

											
										
										
											2021-04-02 19:26:21 +00:00
+											rs <- liftIO $ Git.Construct.fromRemotes (repo st)
 											changeState $ \st' -> st' { gitremotes = Just rs }
-												Improve startup time for commands that do not operate on remotes

And for tab completion, by not unnessessarily statting paths to remotes,
which used to cause eg, spin-up of removable drives.

Got rid of the remotes member of Git.Repo. This was a bit painful.

Remote.Git modifies the list of remotes as it reads their configs,
so still need a persistent list of remotes. So, put it in as
Annex.gitremotes. It's only populated by getGitRemotes, so commands
like examinekey that don't care about remotes won't do so.

This commit was sponsored by Jake Vosloo on Patreon.

											
										
										
											2018-01-09 19:36:56 +00:00
+											return rs