git-annex/Annex.hs

{- git-annex monad
 -
 - Copyright 2010-2013 Joey Hess <joey@kitenet.net>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}

{-# LANGUAGE GeneralizedNewtypeDeriving, PackageImports #-}

module Annex (
	Annex,
	AnnexState(..),
	PreferredContentMap,
	new,
	run,
	eval,
	getState,
	changeState,
	setFlag,
	setField,
	setOutput,
	getFlag,
	getField,
	addCleanup,
	gitRepo,
	inRepo,
	fromRepo,
	calcRepo,
	getGitConfig,
	changeGitConfig,
	changeGitRepo,
	withCurrentState,
) where

import "mtl" Control.Monad.Reader
import "MonadCatchIO-transformers" Control.Monad.CatchIO
import Control.Concurrent

import Common
import qualified Git
import qualified Git.Config
import Annex.Direct.Fixup
import Git.CatFile
import Git.CheckAttr
import Git.CheckIgnore
import Git.SharedRepository
import qualified Git.Queue
import Types.Key
import Types.Backend
import Types.GitConfig
import qualified Types.Remote
import Types.Crypto
import Types.BranchState
import Types.TrustLevel
import Types.Group
import Types.Messages
import Types.UUID
import Types.FileMatcher
import Types.NumCopies
import Types.LockPool
import qualified Utility.Matcher
import qualified Data.Map as M
import qualified Data.Set as S

{- git-annex's monad is a ReaderT around an AnnexState stored in a MVar.
 - This allows modifying the state in an exception-safe fashion.
 - The MVar is not exposed outside this module.
 -}
newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState) IO a }
	deriving (
		Monad,
		MonadIO,
		MonadReader (MVar AnnexState),
		MonadCatchIO,
		Functor,
		Applicative
	)

type Matcher a = Either [Utility.Matcher.Token a] (Utility.Matcher.Matcher a)
type PreferredContentMap = M.Map UUID (Utility.Matcher.Matcher (S.Set UUID -> MatchInfo -> Annex Bool))

-- internal state storage
data AnnexState = AnnexState
	{ repo :: Git.Repo
	, gitconfig :: GitConfig
	, backends :: [BackendA Annex]
	, remotes :: [Types.Remote.RemoteA Annex]
	, output :: MessageState
	, force :: Bool
	, fast :: Bool
	, auto :: Bool
	, daemon :: Bool
	, branchstate :: BranchState
	, repoqueue :: Maybe Git.Queue.Queue
	, catfilehandles :: M.Map FilePath CatFileHandle
	, checkattrhandle :: Maybe CheckAttrHandle
	, checkignorehandle :: Maybe (Maybe CheckIgnoreHandle)
	, forcebackend :: Maybe String
	, globalnumcopies :: Maybe NumCopies
	, forcenumcopies :: Maybe NumCopies
	, limit :: Matcher (MatchInfo -> Annex Bool)
	, uuidmap :: Maybe UUIDMap
	, preferredcontentmap :: Maybe PreferredContentMap
	, shared :: Maybe SharedRepository
	, forcetrust :: TrustMap
	, trustmap :: Maybe TrustMap
	, groupmap :: Maybe GroupMap
	, ciphers :: M.Map StorableCipher Cipher
	, lockpool :: LockPool
	, flags :: M.Map String Bool
	, fields :: M.Map String String
	, cleanup :: M.Map String (Annex ())
	, inodeschanged :: Maybe Bool
	, useragent :: Maybe String
	, errcounter :: Integer
	, unusedkeys :: Maybe (S.Set Key)
	}

newState :: GitConfig -> Git.Repo -> AnnexState
newState c r = AnnexState
	{ repo = r
	, gitconfig = c
	, backends = []
	, remotes = []
	, output = defaultMessageState
	, force = False
	, fast = False
	, auto = False
	, daemon = False
	, branchstate = startBranchState
	, repoqueue = Nothing
	, catfilehandles = M.empty
	, checkattrhandle = Nothing
	, checkignorehandle = Nothing
	, forcebackend = Nothing
	, globalnumcopies = Nothing
	, forcenumcopies = Nothing
	, limit = Left []
	, uuidmap = Nothing
	, preferredcontentmap = Nothing
	, shared = Nothing
	, forcetrust = M.empty
	, trustmap = Nothing
	, groupmap = Nothing
	, ciphers = M.empty
	, lockpool = M.empty
	, flags = M.empty
	, fields = M.empty
	, cleanup = M.empty
	, inodeschanged = Nothing
	, useragent = Nothing
	, errcounter = 0
	, unusedkeys = Nothing
	}

{- Makes an Annex state object for the specified git repo.
 - Ensures the config is read, if it was not already. -}
new :: Git.Repo -> IO AnnexState
new r = do
	r' <- Git.Config.read r
	let c = extractGitConfig r'
	newState c <$> if annexDirect c then fixupDirect r' else return r'

{- Performs an action in the Annex monad from a starting state,
 - returning a new state. -}
run :: AnnexState -> Annex a -> IO (a, AnnexState)
run s a = do
	mvar <- newMVar s
	r <- runReaderT (runAnnex a) mvar
	s' <- takeMVar mvar
	return (r, s')

{- Performs an action in the Annex monad from a starting state, 
 - and throws away the new state. -}
eval :: AnnexState -> Annex a -> IO a
eval s a = do
	mvar <- newMVar s
	runReaderT (runAnnex a) mvar

getState :: (AnnexState -> v) -> Annex v
getState selector = do
	mvar <- ask
	s <- liftIO $ readMVar mvar
	return $ selector s

changeState :: (AnnexState -> AnnexState) -> Annex ()
changeState modifier = do
	mvar <- ask
	liftIO $ modifyMVar_ mvar $ return . modifier

{- Sets a flag to True -}
setFlag :: String -> Annex ()
setFlag flag = changeState $ \s ->
	s { flags = M.insertWith' const flag True $ flags s }

{- Sets a field to a value -}
setField :: String -> String -> Annex ()
setField field value = changeState $ \s ->
	s { fields = M.insertWith' const field value $ fields s }

{- Adds a cleanup action to perform. -}
addCleanup :: String -> Annex () -> Annex ()
addCleanup uid a = changeState $ \s ->
	s { cleanup = M.insertWith' const uid a $ cleanup s }

{- Sets the type of output to emit. -}
setOutput :: OutputType -> Annex ()
setOutput o = changeState $ \s ->
	s { output = (output s) { outputType = o } }

{- Checks if a flag was set. -}
getFlag :: String -> Annex Bool
getFlag flag = fromMaybe False . M.lookup flag <$> getState flags

{- Gets the value of a field. -}
getField :: String -> Annex (Maybe String)
getField field = M.lookup field <$> getState fields

{- Returns the annex's git repository. -}
gitRepo :: Annex Git.Repo
gitRepo = getState repo

{- Runs an IO action in the annex's git repository. -}
inRepo :: (Git.Repo -> IO a) -> Annex a
inRepo a = liftIO . a =<< gitRepo

{- Extracts a value from the annex's git repisitory. -}
fromRepo :: (Git.Repo -> a) -> Annex a
fromRepo a = a <$> gitRepo

{- Calculates a value from an annex's git repository and its GitConfig. -}
calcRepo :: (Git.Repo -> GitConfig -> IO a) -> Annex a
calcRepo a = do
	s <- getState id
	liftIO $ a (repo s) (gitconfig s)

{- Gets the GitConfig settings. -}
getGitConfig :: Annex GitConfig
getGitConfig = getState gitconfig

{- Modifies a GitConfig setting. -}
changeGitConfig :: (GitConfig -> GitConfig) -> Annex ()
changeGitConfig a = changeState $ \s -> s { gitconfig = a (gitconfig s) }

{- Changing the git Repo data also involves re-extracting its GitConfig. -}
changeGitRepo :: Git.Repo -> Annex ()
changeGitRepo r = changeState $ \s -> s
	{ repo = r
	, gitconfig = extractGitConfig r
	}

{- Converts an Annex action into an IO action, that runs with a copy
 - of the current Annex state. 
 -
 - Use with caution; the action should not rely on changing the
 - state, as it will be thrown away. -}
withCurrentState :: Annex a -> Annex (IO a)
withCurrentState a = do
	s <- getState id
	return $ eval s a
-												copyright statements

											
										
										
											2010-10-27 20:53:54 +00:00
+								{- git-annex monad
 								 -
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+								 - Copyright 2010-2013 Joey Hess <joey@kitenet.net>
-												copyright statements

											
										
										
											2010-10-27 20:53:54 +00:00
+								 -
 								 - Licensed under the GNU GPL version 3 or higher.
 								 -}
-												got annexing working

											
										
										
											2010-10-10 19:04:07 +00:00
-												fix standalone build of this module

											
										
										
											2013-11-22 16:21:37 +00:00
+								{-# LANGUAGE GeneralizedNewtypeDeriving, PackageImports #-}
-												make Annex an opaque data type

Was a type alias; using newtype has the benefit that type errors will
show "Annex foo" rather than two lines of internal type nonsense. Yay!
There should be no other effects to size or runtime.

I've tried to do this at least twice before (each time I read RWH chapter 10);
finally understood how to this time.. sorta.

											
										
										
											2011-08-19 18:28:07 +00:00
-												explicit exports

											
										
										
											2010-10-11 21:52:46 +00:00
+								module Annex (
-												successfully split Annex and AnnexState out of TypeInternals

											
										
										
											2011-01-26 01:49:04 +00:00
+									Annex,
 									AnnexState(..),
-												added preferred-content log, and allow editing it with vicfg

This includes a full parser for the boolean expressions in the log,
that compiles them into Matchers. Those matchers are not used yet.

A complication is that matching against an expression should never
crash git-annex with an error. Instead, vicfg checks that the expressions
parse. If a bad expression (or an expression understood by some future
git-annex version) gets into the log, it'll be ignored.

Most of the code in Limit couldn't fail anyway, but I did have to make
limitCopies check its parameter first, and return an error if it's bad,
rather than erroring at runtime.

											
										
										
											2012-10-04 19:48:59 +00:00
+									PreferredContentMap,
-												more reorg, spiffed up state monad

											
										
										
											2010-10-14 07:18:11 +00:00
+									new,
 									run,
-												simplify evals

											
										
										
											2010-11-01 03:24:16 +00:00
+									eval,
-												rework config storage

Moved away from a map of flags to storing config directly in the AnnexState
structure. Got rid of most accessor functions in Annex.

This allowed supporting multiple --exclude flags.

											
										
										
											2011-01-26 04:17:38 +00:00
+									getState,
 									changeState,
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
+									setFlag,
 									setField,
-												fix test suite build

											
										
										
											2012-04-30 17:59:05 +00:00
+									setOutput,
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
+									getFlag,
 									getField,
-												do a cleanup commit after moving data from or to a git remote

Added Annex.cleanup, which is a general purpose interface for adding
actions to run at the end.

Remotes with the old git-annex-shell will commit every time, and have no
commit command, so hide stderr when running the commit command.

											
										
										
											2012-02-25 22:02:49 +00:00
+									addCleanup,
-												reorder repo parameters last

Many functions took the repo as their first parameter. Changing it
consistently to be the last parameter allows doing some useful things with
currying, that reduce boilerplate.

In particular, g <- gitRepo is almost never needed now, instead
use inRepo to run an IO action in the repo, and fromRepo to get
a value from the repo.

This also provides more opportunities to use monadic and applicative
combinators.

											
										
										
											2011-11-08 19:34:10 +00:00
+									gitRepo,
 									inRepo,
 									fromRepo,
-												Use lower case hash directories for storing files on crippled filesystems, same as is already done for bare repositories.

* since this is a crippled filesystem anyway, git-annex doesn't use
  symlinks on it
* so there's no reason to use the mixed case hash directories that we're
  stuck using to avoid breaking everyone's symlinks to the content
* so we can do what is already done for all bare repos, and make non-bare
  repos on crippled filesystems use the all-lower case hash directories
* which are, happily, all 3 letters long, so they cannot conflict with
  mixed case hash directories
* so I was able to 100% fix this and even resuming `git annex add` in the
  test case will recover and it will all just work.

											
										
										
											2013-04-04 19:46:33 +00:00
+									calcRepo,
-												type based git config handling for remotes

Still a couple of places that use git config ad-hoc, but this is most of it
done.

											
										
										
											2013-01-01 17:52:47 +00:00
+									getGitConfig,
 									changeGitConfig,
-												type based git config handling

Now there's a Config type, that's extracted from the git config at startup.
Note that laziness means that individual config values are only looked up
and parsed on demand, and so we get implicit memoization for all of them.
So this is not only prettier and more type safe, it optimises several
places that didn't have explicit memoization before. As well as getting rid
of the ugly explicit memoization code.

Not yet done for annex.<remote>.* configuration settings.

											
										
										
											2012-12-30 03:10:18 +00:00
+									changeGitRepo,
-												Bugfix: Fix bug in inode cache sentinal check, which broke copying to local repos if the repo being copied from had moved to a different filesystem or otherwise changed all its inodes'

											
										
										
											2013-03-12 20:41:54 +00:00
+									withCurrentState,
-												explicit exports

											
										
										
											2010-10-11 21:52:46 +00:00
+								) where
-												got annexing working

											
										
										
											2010-10-10 19:04:07 +00:00
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+								import "mtl" Control.Monad.Reader
 								import "MonadCatchIO-transformers" Control.Monad.CatchIO
 								import Control.Concurrent
-												tweaks

											
										
										
											2010-10-16 20:20:49 +00:00
-												factor out common imports

no code changes

											
										
										
											2011-10-04 02:24:57 +00:00
+								import Common
-												renamed GitRepo to Git

It was always imported qualified as Git anyway

											
										
										
											2011-06-30 17:16:57 +00:00
+								import qualified Git
-												split out three modules from Git

Constructors and configuration make sense in separate modules.
A separate Git.Types is needed to avoid cycles.

											
										
										
											2011-12-13 19:05:07 +00:00
+								import qualified Git.Config
-												fix reversion in relative paths to local remotes of direct mode repos

0980f3dae62b8a9e2d6a6b40767299559e4675a8 broke support for local remotes
from direct mode repos, because the relative path was taken to be from the
gitdir, rather than from the work tree.

											
										
										
											2013-11-26 22:11:37 +00:00
+								import Annex.Direct.Fixup
-												refactor catfile code

split into generic IO code, and a thin Annex wrapper

											
										
										
											2011-09-28 19:15:42 +00:00
+								import Git.CatFile
-												rework git check-attr interface

Now gitattributes are looked up, efficiently, in only the places that
really need them, using the same approach used for cat-file.

The old CheckAttr code seemed very fragile, in the way it streamed files
through git check-attr.
I actually found that cad8824852aa0623dc41eac02a9e2bae47d88ec4
was still deadlocking with ghc 7.4, at the end of adding a lot of files.
This should fix that problem, and avoid future ones.

The best part is that this removes withAttrFilesInGit and withNumCopies,
which were complicated Seek methods, as well as simplfying the types
for several other Seek methods that had a Backend tupled in.

											
										
										
											2012-02-14 03:42:44 +00:00
+								import Git.CheckAttr
-												gitignore support for the assistant and watcher

Requires git 1.8.4 or newer. When it's installed, a background
git check-ignore process is run, and used to efficiently check ignores
whenever a new file is added.

Thanks to Adam Spiers, for getting the necessary support into git for this.

A complication is what to do about files that are gitignored but have
been checked into git anyway. git commands assume the ignore has been
overridden in this case, and not need any more overriding to commit a
changed version.

However, for the assistant to do the same, it would have to run git ls-files
to check if the ignored file is in git. This is somewhat expensive. Or it
could use the running git-cat-file process to query the file that way,
but that requires transferring the whole file content over a pipe, so it
can be quite expensive too, for files that are not git-annex
symlinks.

Now imagine if the user knows that a file or directory tree will be getting
frequent changes, and doesn't want the assistant to sync it, so gitignores
it. The assistant could overload the system with repeated ls-files checks!

So, I've decided that the assistant will not automatically commit changes
to files that are gitignored. This is a tradeoff. Hopefully it won't be a
problem to adjust .gitignore settings to not ignore files you want the
assistant to autocommit, or to manually git annex add files that are listed
in .gitignore.

(This could be revisited if git-annex gets access to an interface to check
the content of the index w/o forking a git command. This could be libgit2,
or perhaps a separate git cat-file --batch-check process, so it wouldn't
need to ship over the whole file content.)

This commit was sponsored by Francois Marier. Thanks!

											
										
										
											2013-08-02 23:31:55 +00:00
+								import Git.CheckIgnore
-												cache parsed core.sharedrepository

											
										
										
											2012-04-21 23:42:49 +00:00
+								import Git.SharedRepository
-												use Common in a few more modules

											
										
										
											2011-12-20 18:37:53 +00:00
+								import qualified Git.Queue
-												add "unused" preferred content expression

With a really nice optimisation that keeps it from having any overhead
in normal operation!

This commit was sponsored by Ulises Vitulli.

											
										
										
											2014-01-22 20:35:32 +00:00
+								import Types.Key
-												rename modules for data types into Types/ directory

											
										
										
											2011-06-02 01:56:04 +00:00
+								import Types.Backend
-												type based git config handling for remotes

Still a couple of places that use git config ad-hoc, but this is most of it
done.

											
										
										
											2013-01-01 17:52:47 +00:00
+								import Types.GitConfig
-												rework annex-ignore handling

Only one place need to filter the list of remotes for ignored remotes:
keyPossibilities. Make the full list available to everything else.

This allows getting rid of the special case handing for --from and --to
to make ignored remotes not be ignored with those options.

											
										
										
											2011-09-19 00:11:39 +00:00
+								import qualified Types.Remote
-												rename modules for data types into Types/ directory

											
										
										
											2011-06-02 01:56:04 +00:00
+								import Types.Crypto
-												Branch handling improvements

Support creating the branch.

Unified branch state into a single data type.

Only commit changes when the index has been changed.

											
										
										
											2011-06-22 19:58:30 +00:00
+								import Types.BranchState
-												cache the trustmap

Doubles the speed of fsck, and speeds up drop as well.

											
										
										
											2011-06-24 01:25:39 +00:00
+								import Types.TrustLevel
-												group, ungroup: New commands to indicate groups of repositories.

											
										
										
											2012-10-01 19:12:04 +00:00
+								import Types.Group
-												display "Recording state in git..." when staging the journal

A bit tricky to avoid printing it twice in a row when there are queued git
commands to run and journal to stage.

Added a generic way to run an action that may output multiple side
messages, with only the first displayed.

											
										
										
											2012-04-27 17:23:52 +00:00
+								import Types.Messages
-												added preferred-content log, and allow editing it with vicfg

This includes a full parser for the boolean expressions in the log,
that compiles them into Matchers. Those matchers are not used yet.

A complication is that matching against an expression should never
crash git-annex with an error. Instead, vicfg checks that the expressions
parse. If a bad expression (or an expression understood by some future
git-annex version) gets into the log, it'll be ignored.

Most of the code in Limit couldn't fail anyway, but I did have to make
limitCopies check its parameter first, and return an error if it's bad,
rather than erroring at runtime.

											
										
										
											2012-10-04 19:48:59 +00:00
+								import Types.UUID
-												refactor

											
										
										
											2013-05-25 03:07:26 +00:00
+								import Types.FileMatcher
-												reorganize numcopies code (no behavior changes)

Move stuff into Logs.NumCopies. Add a NumCopies newtype.

Better names for various serialization classes that are specific to one
thing or another.

											
										
										
											2014-01-21 20:08:19 +00:00
+								import Types.NumCopies
-												use locking on Windows

This is all the easy cases, where there was already a separate lock file.

											
										
										
											2014-01-28 18:17:14 +00:00
+								import Types.LockPool
-												refactor --exclude to use Utility.Matcher

This should change no behavior, but opens the poissibility to use the
matcher for other sorts of limits on which files git-annex processes.

											
										
										
											2011-09-18 21:47:49 +00:00
+								import qualified Utility.Matcher
-												Fix caching of decrypted ciphers, which failed when drop had to check multiple different encrypted special remotes.

											
										
										
											2011-12-08 20:01:46 +00:00
+								import qualified Data.Map as M
-												add AssumeNotPresent parameter to limits

Solves the issue with preferred content expressions and dropping that
I mentioned yesterday. My solution was to add a parameter to specify a set
of repositories where content should be assumed not to be present. When
deciding whether to drop, it can put the current repository in, and then
if the expression fails to match, the content can be dropped.

Using yesterday's example "(not copies=trusted:2) and (not in=usbdrive)",
when the local repo is one of the 2 trusted copies, the drop check will
see only 1 trusted copy, so the expression matches, and so the content will
not be dropped.

											
										
										
											2012-10-05 20:52:44 +00:00
+								import qualified Data.Set as S
-												successfully split Annex and AnnexState out of TypeInternals

											
										
										
											2011-01-26 01:49:04 +00:00
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+								{- git-annex's monad is a ReaderT around an AnnexState stored in a MVar.
 								 - This allows modifying the state in an exception-safe fashion.
 								 - The MVar is not exposed outside this module.
 								 -}
 								newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState) IO a }
-												make Annex an opaque data type

Was a type alias; using newtype has the benefit that type errors will
show "Annex foo" rather than two lines of internal type nonsense. Yay!
There should be no other effects to size or runtime.

I've tried to do this at least twice before (each time I read RWH chapter 10);
finally understood how to this time.. sorta.

											
										
										
											2011-08-19 18:28:07 +00:00
+									deriving (
 										Monad,
 										MonadIO,
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+										MonadReader (MVar AnnexState),
 										MonadCatchIO,
-												code simplification thanks to applicative functors

											
										
										
											2011-08-25 04:28:55 +00:00
+										Functor,
 										Applicative
-												make Annex an opaque data type

Was a type alias; using newtype has the benefit that type errors will
show "Annex foo" rather than two lines of internal type nonsense. Yay!
There should be no other effects to size or runtime.

I've tried to do this at least twice before (each time I read RWH chapter 10);
finally understood how to this time.. sorta.

											
										
										
											2011-08-19 18:28:07 +00:00
+									)
-												successfully split Annex and AnnexState out of TypeInternals

											
										
										
											2011-01-26 01:49:04 +00:00
-												type alias

											
										
										
											2011-12-31 08:19:10 +00:00
+								type Matcher a = Either [Utility.Matcher.Token a] (Utility.Matcher.Matcher a)
-												improve matcher data type to allow matching Keys, instead of just files (no behavior changes)

											
										
										
											2014-01-18 18:51:55 +00:00
+								type PreferredContentMap = M.Map UUID (Utility.Matcher.Matcher (S.Set UUID -> MatchInfo -> Annex Bool))
-												added preferred-content log, and allow editing it with vicfg

This includes a full parser for the boolean expressions in the log,
that compiles them into Matchers. Those matchers are not used yet.

A complication is that matching against an expression should never
crash git-annex with an error. Instead, vicfg checks that the expressions
parse. If a bad expression (or an expression understood by some future
git-annex version) gets into the log, it'll be ignored.

Most of the code in Limit couldn't fail anyway, but I did have to make
limitCopies check its parameter first, and return an error if it's bad,
rather than erroring at runtime.

											
										
										
											2012-10-04 19:48:59 +00:00
-												successfully split Annex and AnnexState out of TypeInternals

											
										
										
											2011-01-26 01:49:04 +00:00
+								-- internal state storage
-												rework config storage

Moved away from a map of flags to storing config directly in the AnnexState
structure. Got rid of most accessor functions in Annex.

This allowed supporting multiple --exclude flags.

											
										
										
											2011-01-26 04:17:38 +00:00
+								data AnnexState = AnnexState
 									{ repo :: Git.Repo
-												type based git config handling for remotes

Still a couple of places that use git config ad-hoc, but this is most of it
done.

											
										
										
											2013-01-01 17:52:47 +00:00
+									, gitconfig :: GitConfig
-												type alias cleanup

											
										
										
											2011-12-31 08:11:39 +00:00
+									, backends :: [BackendA Annex]
 									, remotes :: [Types.Remote.RemoteA Annex]
-												display "Recording state in git..." when staging the journal

A bit tricky to avoid printing it twice in a row when there are queued git
commands to run and journal to stage.

Added a generic way to run an action that may output multiple side
messages, with only the first displayed.

											
										
										
											2012-04-27 17:23:52 +00:00
+									, output :: MessageState
-												rework config storage

Moved away from a map of flags to storing config directly in the AnnexState
structure. Got rid of most accessor functions in Annex.

This allowed supporting multiple --exclude flags.

											
										
										
											2011-01-26 04:17:38 +00:00
+									, force :: Bool
-												fast mode

Add --fast flag, that can enable less expensive, but also less thurough versions of some commands.

* Add --fast flag, that can enable less expensive, but also less thurough
  versions of some commands.
* fsck: In fast mode, avoid checking checksums.
* unused: In fast mode, just show all existing temp files as unused,
  and avoid expensive scan for other unused content.

											
										
										
											2011-03-22 21:41:06 +00:00
+									, fast :: Bool
-												remove optimize subcommand; use --auto instead

get, drop: Added --auto option, which decides whether to get/drop content
as needed to work toward the configured numcopies.

The problem with bundling it up in optimize was that I then found I wanted
to run an optmize that did not drop files, only got them. Considered adding
a --only-get switch to it, but that seemed wrong. Instead, let's make
existing subcommands optionally smarter.

Note that the only actual difference between drop and drop --auto is that
the latter does not even try to drop a file if it knows of not enough
copies, and does not print any error messages about files it was unable to
drop.

It might be nice to make get avoid asking git for attributes when not in
auto mode. For now it always asks for attributes.

											
										
										
											2011-09-15 17:30:04 +00:00
+									, auto :: Bool
-												assistant: Work around git-cat-file's not reloading the index after files are staged.

Argh.

											
										
										
											2013-05-25 04:37:41 +00:00
+									, daemon :: Bool
-												Branch handling improvements

Support creating the branch.

Unified branch state into a single data type.

Only commit changes when the index has been changed.

											
										
										
											2011-06-22 19:58:30 +00:00
+									, branchstate :: BranchState
-												Added a annex.queuesize setting

useful when adding hundreds of thousands of files on a system with plenty
of memory.

git add gets quite slow in such a large repository, so if the system has
more than the ~32 mb of memory the queue can use by default, it's a useful
optimisation to increase the queue size, in order to decrease the number
of times git add is run.

											
										
										
											2012-02-15 15:13:13 +00:00
+									, repoqueue :: Maybe Git.Queue.Queue
-												start one git-cat-file per index file

This reverts 1c83b6c43936802b436f64748e6ede026946eefe and properly fixes
the issue discussed there.

This makes git-annex behave much nicer in direct mode.

											
										
										
											2013-05-15 22:46:38 +00:00
+									, catfilehandles :: M.Map FilePath CatFileHandle
-												rework git check-attr interface

Now gitattributes are looked up, efficiently, in only the places that
really need them, using the same approach used for cat-file.

The old CheckAttr code seemed very fragile, in the way it streamed files
through git check-attr.
I actually found that cad8824852aa0623dc41eac02a9e2bae47d88ec4
was still deadlocking with ghc 7.4, at the end of adding a lot of files.
This should fix that problem, and avoid future ones.

The best part is that this removes withAttrFilesInGit and withNumCopies,
which were complicated Seek methods, as well as simplfying the types
for several other Seek methods that had a Backend tupled in.

											
										
										
											2012-02-14 03:42:44 +00:00
+									, checkattrhandle :: Maybe CheckAttrHandle
-												gitignore support for the assistant and watcher

Requires git 1.8.4 or newer. When it's installed, a background
git check-ignore process is run, and used to efficiently check ignores
whenever a new file is added.

Thanks to Adam Spiers, for getting the necessary support into git for this.

A complication is what to do about files that are gitignored but have
been checked into git anyway. git commands assume the ignore has been
overridden in this case, and not need any more overriding to commit a
changed version.

However, for the assistant to do the same, it would have to run git ls-files
to check if the ignored file is in git. This is somewhat expensive. Or it
could use the running git-cat-file process to query the file that way,
but that requires transferring the whole file content over a pipe, so it
can be quite expensive too, for files that are not git-annex
symlinks.

Now imagine if the user knows that a file or directory tree will be getting
frequent changes, and doesn't want the assistant to sync it, so gitignores
it. The assistant could overload the system with repeated ls-files checks!

So, I've decided that the assistant will not automatically commit changes
to files that are gitignored. This is a tradeoff. Hopefully it won't be a
problem to adjust .gitignore settings to not ignore files you want the
assistant to autocommit, or to manually git annex add files that are listed
in .gitignore.

(This could be revisited if git-annex gets access to an interface to check
the content of the index w/o forking a git command. This could be libgit2,
or perhaps a separate git cat-file --batch-check process, so it wouldn't
need to ship over the whole file content.)

This commit was sponsored by Francois Marier. Thanks!

											
										
										
											2013-08-02 23:31:55 +00:00
+									, checkignorehandle :: Maybe (Maybe CheckIgnoreHandle)
-												--backend now overrides any backend configured in .gitattributes files.

											
										
										
											2011-05-18 23:34:46 +00:00
+									, forcebackend :: Maybe String
-												reorganize numcopies code (no behavior changes)

Move stuff into Logs.NumCopies. Add a NumCopies newtype.

Better names for various serialization classes that are specific to one
thing or another.

											
										
										
											2014-01-21 20:08:19 +00:00
+									, globalnumcopies :: Maybe NumCopies
-												numcopies cleanup, part 2

This includes several bug fixes.

											
										
										
											2014-01-21 21:08:49 +00:00
+									, forcenumcopies :: Maybe NumCopies
-												improve matcher data type to allow matching Keys, instead of just files (no behavior changes)

											
										
										
											2014-01-18 18:51:55 +00:00
+									, limit :: Matcher (MatchInfo -> Annex Bool)
-												add ConfigMonitor thread

Monitors git-annex branch for changes, which are noticed by the Merger
thread whenever the branch ref is changed (either due to an incoming push,
or a local change), and refreshes cached config values for modified config
files.

Rate limited to run no more often than once per minute. This is important
because frequent git-annex branch changes happen when files are being
added, or transferred, etc.

A primary use case is that, when preferred content changes are made,
and get pushed to remotes, the remotes start honoring those settings.
Other use cases include propigating repository description and trust
changes to remotes, and learning when a remote has added a new special
remote, so the webapp can present the GUI to enable that special remote
locally.

Also added a uuid.log cache. All other config files already had caches.

											
										
										
											2012-10-20 20:37:06 +00:00
+									, uuidmap :: Maybe UUIDMap
-												added preferred-content log, and allow editing it with vicfg

This includes a full parser for the boolean expressions in the log,
that compiles them into Matchers. Those matchers are not used yet.

A complication is that matching against an expression should never
crash git-annex with an error. Instead, vicfg checks that the expressions
parse. If a bad expression (or an expression understood by some future
git-annex version) gets into the log, it'll be ignored.

Most of the code in Limit couldn't fail anyway, but I did have to make
limitCopies check its parameter first, and return an error if it's bad,
rather than erroring at runtime.

											
										
										
											2012-10-04 19:48:59 +00:00
+									, preferredcontentmap :: Maybe PreferredContentMap
-												cache parsed core.sharedrepository

											
										
										
											2012-04-21 23:42:49 +00:00
+									, shared :: Maybe SharedRepository
-												Add annex-trustlevel configuration settings, which can be used to  override the trust level of a remote.

This overrides the trust.log, and is overridden by the command-line trust
parameters.

It would have been nicer to have Logs.Trust.trustMap just look up the
configuration for all remotes, but a dependency loop prevented that
(Remotes depends on Logs.Trust in several ways). So instead, look up
the configuration when building remotes, storing it in the same forcetrust
field used for the command-line trust parameters.

											
										
										
											2012-01-10 03:31:44 +00:00
+									, forcetrust :: TrustMap
-												cache the trustmap

Doubles the speed of fsck, and speeds up drop as well.

											
										
										
											2011-06-24 01:25:39 +00:00
+									, trustmap :: Maybe TrustMap
-												group, ungroup: New commands to indicate groups of repositories.

											
										
										
											2012-10-01 19:12:04 +00:00
+									, groupmap :: Maybe GroupMap
-												Added shared cipher mode to encryptable special remotes.

This option avoids gpg key distribution, at the expense of flexability, and
with the requirement that all clones of the git repository be equally
trusted.

											
										
										
											2012-04-29 18:02:18 +00:00
+									, ciphers :: M.Map StorableCipher Cipher
-												use locking on Windows

This is all the easy cases, where there was already a separate lock file.

											
										
										
											2014-01-28 18:17:14 +00:00
+									, lockpool :: LockPool
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
+									, flags :: M.Map String Bool
 									, fields :: M.Map String String
-												do a cleanup commit after moving data from or to a git remote

Added Annex.cleanup, which is a general purpose interface for adding
actions to run at the end.

Remotes with the old git-annex-shell will commit every time, and have no
commit command, so hide stderr when running the commit command.

											
										
										
											2012-02-25 22:02:49 +00:00
+									, cleanup :: M.Map String (Annex ())
-												Direct mode: Support filesystems like FAT which can change their inodes each time they are mounted.

											
										
										
											2013-02-19 20:26:07 +00:00
+									, inodeschanged :: Maybe Bool
-												Send a git-annex user-agent when downloading urls.

Overridable with --user-agent option.

Not yet done for S3 or WebDAV due to limitations of libraries used --
nether allows a user-agent header to be specified.

This commit sponsored by Michael Zehrer.

											
										
										
											2013-09-28 18:35:21 +00:00
+									, useragent :: Maybe String
-												fix inversion of control in CommandSeek (no behavior changes)

I've been disliking how the command seek actions were written for some
time, with their inversion of control and ugly workarounds.

The last straw to fix it was sync --content, which didn't fit the
Annex [CommandStart] interface well at all. I have not yet made it take
advantage of the changed interface though.

The crucial change, and probably why I didn't do it this way from the
beginning, is to make each CommandStart action be run with exceptions
caught, and if it fails, increment a failure counter in annex state.
So I finally remove the very first code I wrote for git-annex, which
was before I had exception handling in the Annex monad, and so ran outside
that monad, passing state explicitly as it ran each CommandStart action.

This was a real slog from 1 to 5 am.

Test suite passes.

Memory usage is lower than before, sometimes by a couple of megabytes, and
remains constant, even when running in a large repo, and even when
repeatedly failing and incrementing the error counter. So no accidental
laziness space leaks.

Wall clock speed is identical, even in large repos.

This commit was sponsored by an anonymous bitcoiner.

											
										
										
											2014-01-20 08:11:42 +00:00
+									, errcounter :: Integer
-												add "unused" preferred content expression

With a really nice optimisation that keeps it from having any overhead
in normal operation!

This commit was sponsored by Ulises Vitulli.

											
										
										
											2014-01-22 20:35:32 +00:00
+									, unusedkeys :: Maybe (S.Set Key)
-												add cipher field to AnnexState

											
										
										
											2011-04-16 20:41:46 +00:00
+									}
-												rework config storage

Moved away from a map of flags to storing config directly in the AnnexState
structure. Got rid of most accessor functions in Annex.

This allowed supporting multiple --exclude flags.

											
										
										
											2011-01-26 04:17:38 +00:00
-												fix reversion in relative paths to local remotes of direct mode repos

0980f3dae62b8a9e2d6a6b40767299559e4675a8 broke support for local remotes
from direct mode repos, because the relative path was taken to be from the
gitdir, rather than from the work tree.

											
										
										
											2013-11-26 22:11:37 +00:00
+								newState :: GitConfig -> Git.Repo -> AnnexState
 								newState c r = AnnexState
 									{ repo = r
-												support direct mode repositories with core.bare=true (not yet default)

Direct mode repositories can now have core.bare=true set, to prevent
accidentally running git commands that try to operate on the work tree,
and so do the wrong thing.

This is not yet the default, and it causes known problems for git-annex sync
due to receive.denyCurrentBranch not working in bare repositories.

This commit was sponsored by Richard Hartmann.

											
										
										
											2013-11-05 18:24:28 +00:00
+									, gitconfig = c
-												rework config storage

Moved away from a map of flags to storing config directly in the AnnexState
structure. Got rid of most accessor functions in Annex.

This allowed supporting multiple --exclude flags.

											
										
										
											2011-01-26 04:17:38 +00:00
+									, backends = []
-												add remotes slot to Annex

This required parameterizing the type for Remote, to avoid a cycle.

											
										
										
											2011-03-27 20:17:56 +00:00
+									, remotes = []
-												display "Recording state in git..." when staging the journal

A bit tricky to avoid printing it twice in a row when there are queued git
commands to run and journal to stage.

Added a generic way to run an action that may output multiple side
messages, with only the first displayed.

											
										
										
											2012-04-27 17:23:52 +00:00
+									, output = defaultMessageState
-												rework config storage

Moved away from a map of flags to storing config directly in the AnnexState
structure. Got rid of most accessor functions in Annex.

This allowed supporting multiple --exclude flags.

											
										
										
											2011-01-26 04:17:38 +00:00
+									, force = False
-												fast mode

Add --fast flag, that can enable less expensive, but also less thurough versions of some commands.

* Add --fast flag, that can enable less expensive, but also less thurough
  versions of some commands.
* fsck: In fast mode, avoid checking checksums.
* unused: In fast mode, just show all existing temp files as unused,
  and avoid expensive scan for other unused content.

											
										
										
											2011-03-22 21:41:06 +00:00
+									, fast = False
-												remove optimize subcommand; use --auto instead

get, drop: Added --auto option, which decides whether to get/drop content
as needed to work toward the configured numcopies.

The problem with bundling it up in optimize was that I then found I wanted
to run an optmize that did not drop files, only got them. Considered adding
a --only-get switch to it, but that seemed wrong. Instead, let's make
existing subcommands optionally smarter.

Note that the only actual difference between drop and drop --auto is that
the latter does not even try to drop a file if it knows of not enough
copies, and does not print any error messages about files it was unable to
drop.

It might be nice to make get avoid asking git for attributes when not in
auto mode. For now it always asks for attributes.

											
										
										
											2011-09-15 17:30:04 +00:00
+									, auto = False
-												assistant: Work around git-cat-file's not reloading the index after files are staged.

Argh.

											
										
										
											2013-05-25 04:37:41 +00:00
+									, daemon = False
-												Branch handling improvements

Support creating the branch.

Unified branch state into a single data type.

Only commit changes when the index has been changed.

											
										
										
											2011-06-22 19:58:30 +00:00
+									, branchstate = startBranchState
-												Added a annex.queuesize setting

useful when adding hundreds of thousands of files on a system with plenty
of memory.

git add gets quite slow in such a large repository, so if the system has
more than the ~32 mb of memory the queue can use by default, it's a useful
optimisation to increase the queue size, in order to decrease the number
of times git add is run.

											
										
										
											2012-02-15 15:13:13 +00:00
+									, repoqueue = Nothing
-												start one git-cat-file per index file

This reverts 1c83b6c43936802b436f64748e6ede026946eefe and properly fixes
the issue discussed there.

This makes git-annex behave much nicer in direct mode.

											
										
										
											2013-05-15 22:46:38 +00:00
+									, catfilehandles = M.empty
-												rework git check-attr interface

Now gitattributes are looked up, efficiently, in only the places that
really need them, using the same approach used for cat-file.

The old CheckAttr code seemed very fragile, in the way it streamed files
through git check-attr.
I actually found that cad8824852aa0623dc41eac02a9e2bae47d88ec4
was still deadlocking with ghc 7.4, at the end of adding a lot of files.
This should fix that problem, and avoid future ones.

The best part is that this removes withAttrFilesInGit and withNumCopies,
which were complicated Seek methods, as well as simplfying the types
for several other Seek methods that had a Backend tupled in.

											
										
										
											2012-02-14 03:42:44 +00:00
+									, checkattrhandle = Nothing
-												gitignore support for the assistant and watcher

Requires git 1.8.4 or newer. When it's installed, a background
git check-ignore process is run, and used to efficiently check ignores
whenever a new file is added.

Thanks to Adam Spiers, for getting the necessary support into git for this.

A complication is what to do about files that are gitignored but have
been checked into git anyway. git commands assume the ignore has been
overridden in this case, and not need any more overriding to commit a
changed version.

However, for the assistant to do the same, it would have to run git ls-files
to check if the ignored file is in git. This is somewhat expensive. Or it
could use the running git-cat-file process to query the file that way,
but that requires transferring the whole file content over a pipe, so it
can be quite expensive too, for files that are not git-annex
symlinks.

Now imagine if the user knows that a file or directory tree will be getting
frequent changes, and doesn't want the assistant to sync it, so gitignores
it. The assistant could overload the system with repeated ls-files checks!

So, I've decided that the assistant will not automatically commit changes
to files that are gitignored. This is a tradeoff. Hopefully it won't be a
problem to adjust .gitignore settings to not ignore files you want the
assistant to autocommit, or to manually git annex add files that are listed
in .gitignore.

(This could be revisited if git-annex gets access to an interface to check
the content of the index w/o forking a git command. This could be libgit2,
or perhaps a separate git cat-file --batch-check process, so it wouldn't
need to ship over the whole file content.)

This commit was sponsored by Francois Marier. Thanks!

											
										
										
											2013-08-02 23:31:55 +00:00
+									, checkignorehandle = Nothing
-												--backend now overrides any backend configured in .gitattributes files.

											
										
										
											2011-05-18 23:34:46 +00:00
+									, forcebackend = Nothing
-												global numcopies setting

* numcopies: New command, sets global numcopies value that is seen by all
  clones of a repository.
* The annex.numcopies git config setting is deprecated. Once the numcopies
  command is used to set the global number of copies, any annex.numcopies
  git configs will be ignored.
* assistant: Make the prefs page set the global numcopies.

This global numcopies setting is needed to let preferred content
expressions operate on numcopies.

It's also convenient, because typically if you want git-annex to preserve N
copies of files in a repo, you want it to do that no matter which repo it's
running in. Making it global avoids needing to warn the user about gotchas
involving inconsistent annex.numcopies settings.
(See changes to doc/numcopies.mdwn.)

Added a new variety of git-annex branch log file, that holds only 1 value.
Will probably be useful for other stuff later.

This commit was sponsored by Nicolas Pouillard.

											
										
										
											2014-01-20 20:47:56 +00:00
+									, globalnumcopies = Nothing
-												numcopies cleanup, part 2

This includes several bug fixes.

											
										
										
											2014-01-21 21:08:49 +00:00
+									, forcenumcopies = Nothing
-												refactor --exclude to use Utility.Matcher

This should change no behavior, but opens the poissibility to use the
matcher for other sorts of limits on which files git-annex processes.

											
										
										
											2011-09-18 21:47:49 +00:00
+									, limit = Left []
-												add ConfigMonitor thread

Monitors git-annex branch for changes, which are noticed by the Merger
thread whenever the branch ref is changed (either due to an incoming push,
or a local change), and refreshes cached config values for modified config
files.

Rate limited to run no more often than once per minute. This is important
because frequent git-annex branch changes happen when files are being
added, or transferred, etc.

A primary use case is that, when preferred content changes are made,
and get pushed to remotes, the remotes start honoring those settings.
Other use cases include propigating repository description and trust
changes to remotes, and learning when a remote has added a new special
remote, so the webapp can present the GUI to enable that special remote
locally.

Also added a uuid.log cache. All other config files already had caches.

											
										
										
											2012-10-20 20:37:06 +00:00
+									, uuidmap = Nothing
-												added preferred-content log, and allow editing it with vicfg

This includes a full parser for the boolean expressions in the log,
that compiles them into Matchers. Those matchers are not used yet.

A complication is that matching against an expression should never
crash git-annex with an error. Instead, vicfg checks that the expressions
parse. If a bad expression (or an expression understood by some future
git-annex version) gets into the log, it'll be ignored.

Most of the code in Limit couldn't fail anyway, but I did have to make
limitCopies check its parameter first, and return an error if it's bad,
rather than erroring at runtime.

											
										
										
											2012-10-04 19:48:59 +00:00
+									, preferredcontentmap = Nothing
-												cache parsed core.sharedrepository

											
										
										
											2012-04-21 23:42:49 +00:00
+									, shared = Nothing
-												Add annex-trustlevel configuration settings, which can be used to  override the trust level of a remote.

This overrides the trust.log, and is overridden by the command-line trust
parameters.

It would have been nicer to have Logs.Trust.trustMap just look up the
configuration for all remotes, but a dependency loop prevented that
(Remotes depends on Logs.Trust in several ways). So instead, look up
the configuration when building remotes, storing it in the same forcetrust
field used for the command-line trust parameters.

											
										
										
											2012-01-10 03:31:44 +00:00
+									, forcetrust = M.empty
-												cache the trustmap

Doubles the speed of fsck, and speeds up drop as well.

											
										
										
											2011-06-24 01:25:39 +00:00
+									, trustmap = Nothing
-												group, ungroup: New commands to indicate groups of repositories.

											
										
										
											2012-10-01 19:12:04 +00:00
+									, groupmap = Nothing
-												Fix caching of decrypted ciphers, which failed when drop had to check multiple different encrypted special remotes.

											
										
										
											2011-12-08 20:01:46 +00:00
+									, ciphers = M.empty
-												ssh connection caching

Ssh connection caching is now enabled automatically by git-annex. Only one
ssh connection is made to each host per git-annex run, which can speed some
things up a lot, as well as avoiding repeated password prompts. Concurrent
git-annex processes also share ssh connections. Cached ssh connections are
shut down when git-annex exits.

Note: The rsync special remote does not yet participate in the ssh
connection caching.

											
										
										
											2012-01-20 19:34:52 +00:00
+									, lockpool = M.empty
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
+									, flags = M.empty
 									, fields = M.empty
-												do a cleanup commit after moving data from or to a git remote

Added Annex.cleanup, which is a general purpose interface for adding
actions to run at the end.

Remotes with the old git-annex-shell will commit every time, and have no
commit command, so hide stderr when running the commit command.

											
										
										
											2012-02-25 22:02:49 +00:00
+									, cleanup = M.empty
-												Direct mode: Support filesystems like FAT which can change their inodes each time they are mounted.

											
										
										
											2013-02-19 20:26:07 +00:00
+									, inodeschanged = Nothing
-												Send a git-annex user-agent when downloading urls.

Overridable with --user-agent option.

Not yet done for S3 or WebDAV due to limitations of libraries used --
nether allows a user-agent header to be specified.

This commit sponsored by Michael Zehrer.

											
										
										
											2013-09-28 18:35:21 +00:00
+									, useragent = Nothing
-												fix inversion of control in CommandSeek (no behavior changes)

I've been disliking how the command seek actions were written for some
time, with their inversion of control and ugly workarounds.

The last straw to fix it was sync --content, which didn't fit the
Annex [CommandStart] interface well at all. I have not yet made it take
advantage of the changed interface though.

The crucial change, and probably why I didn't do it this way from the
beginning, is to make each CommandStart action be run with exceptions
caught, and if it fails, increment a failure counter in annex state.
So I finally remove the very first code I wrote for git-annex, which
was before I had exception handling in the Annex monad, and so ran outside
that monad, passing state explicitly as it ran each CommandStart action.

This was a real slog from 1 to 5 am.

Test suite passes.

Memory usage is lower than before, sometimes by a couple of megabytes, and
remains constant, even when running in a large repo, and even when
repeatedly failing and incrementing the error counter. So no accidental
laziness space leaks.

Wall clock speed is identical, even in large repos.

This commit was sponsored by an anonymous bitcoiner.

											
										
										
											2014-01-20 08:11:42 +00:00
+									, errcounter = 0
-												add "unused" preferred content expression

With a really nice optimisation that keeps it from having any overhead
in normal operation!

This commit was sponsored by Ulises Vitulli.

											
										
										
											2014-01-22 20:35:32 +00:00
+									, unusedkeys = Nothing
-												rework config storage

Moved away from a map of flags to storing config directly in the AnnexState
structure. Got rid of most accessor functions in Annex.

This allowed supporting multiple --exclude flags.

											
										
										
											2011-01-26 04:17:38 +00:00
+									}
-												more reorg, spiffed up state monad

											
										
										
											2010-10-14 07:18:11 +00:00
-												Add support for core.worktree, and fix support for GIT_WORK_TREE and GIT_DIR.

The environment needs to override git-config. Changed when git config is
read, and avoid rereading it once it's been read.

chdir for both worktree settings.

											
										
										
											2012-05-18 22:20:53 +00:00
+								{- Makes an Annex state object for the specified git repo.
 								 - Ensures the config is read, if it was not already. -}
-												remove unused backend machinery

The only remaining vestiage of backends is different types of keys. These
are still called "backends", mostly to avoid needing to change user interface
and configuration. But everything to do with storing keys in different
backends was gone; instead different types of remotes are used.

In the refactoring, lots of code was moved out of odd corners like
Backend.File, to closer to where it's used, like Command.Drop and
Command.Fsck. Quite a lot of dead code was removed. Several data structures
became simpler, which may result in better runtime efficiency. There should
be no user-visible changes.

											
										
										
											2011-07-05 22:31:46 +00:00
+								new :: Git.Repo -> IO AnnexState
-												fix reversion in relative paths to local remotes of direct mode repos

0980f3dae62b8a9e2d6a6b40767299559e4675a8 broke support for local remotes
from direct mode repos, because the relative path was taken to be from the
gitdir, rather than from the work tree.

											
										
										
											2013-11-26 22:11:37 +00:00
+								new r = do
 									r' <- Git.Config.read r
 									let c = extractGitConfig r'
 									newState c <$> if annexDirect c then fixupDirect r' else return r'
-												more reorg, spiffed up state monad

											
										
										
											2010-10-14 07:18:11 +00:00
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+								{- Performs an action in the Annex monad from a starting state,
 								 - returning a new state. -}
-												better types

											
										
										
											2011-01-11 22:13:26 +00:00
+								run :: AnnexState -> Annex a -> IO (a, AnnexState)
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+								run s a = do
 									mvar <- newMVar s
 									r <- runReaderT (runAnnex a) mvar
 									s' <- takeMVar mvar
 									return (r, s')
 								{- Performs an action in the Annex monad from a starting state,
 								 - and throws away the new state. -}
-												better types

											
										
										
											2011-01-11 22:13:26 +00:00
+								eval :: AnnexState -> Annex a -> IO a
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+								eval s a = do
 									mvar <- newMVar s
 									runReaderT (runAnnex a) mvar
 								getState :: (AnnexState -> v) -> Annex v
 								getState selector = do
 									mvar <- ask
 									s <- liftIO $ readMVar mvar
 									return $ selector s
 								changeState :: (AnnexState -> AnnexState) -> Annex ()
 								changeState modifier = do
 									mvar <- ask
 									liftIO $ modifyMVar_ mvar $ return . modifier
-												more reorg, spiffed up state monad

											
										
										
											2010-10-14 07:18:11 +00:00
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
+								{- Sets a flag to True -}
 								setFlag :: String -> Annex ()
 								setFlag flag = changeState $ \s ->
-												do a cleanup commit after moving data from or to a git remote

Added Annex.cleanup, which is a general purpose interface for adding
actions to run at the end.

Remotes with the old git-annex-shell will commit every time, and have no
commit command, so hide stderr when running the commit command.

											
										
										
											2012-02-25 22:02:49 +00:00
+									s { flags = M.insertWith' const flag True $ flags s }
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
 								{- Sets a field to a value -}
 								setField :: String -> String -> Annex ()
 								setField field value = changeState $ \s ->
-												do a cleanup commit after moving data from or to a git remote

Added Annex.cleanup, which is a general purpose interface for adding
actions to run at the end.

Remotes with the old git-annex-shell will commit every time, and have no
commit command, so hide stderr when running the commit command.

											
										
										
											2012-02-25 22:02:49 +00:00
+									s { fields = M.insertWith' const field value $ fields s }
 								{- Adds a cleanup action to perform. -}
 								addCleanup :: String -> Annex () -> Annex ()
 								addCleanup uid a = changeState $ \s ->
 									s { cleanup = M.insertWith' const uid a $ cleanup s }
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
-												fix test suite build

											
										
										
											2012-04-30 17:59:05 +00:00
+								{- Sets the type of output to emit. -}
 								setOutput :: OutputType -> Annex ()
 								setOutput o = changeState $ \s ->
 									s { output = (output s) { outputType = o } }
-												more command-specific options

Made --from and --to command-specific options.

Added generic storage for values of command-specific options,
which allows removing some of the special case fields in AnnexState.

(Also added generic storage for command-specific flags, although there are
not yet any.)

Note that this storage uses a Map, so repeatedly looking up the same value
is slightly more expensive than looking up an AnnexState field. But, the
value can be looked up once in the seek stage, transformed as necessary,
and passed in a closure to the start stage, and this avoids that overhead.

Still, I'm hesitant to use this for things like force or fast flags.
It's probably best to reserve it for flags that are only used by a few
commands, or options like --from and --to that it's important only be
allowed to be used with commands that implement them, to avoid user
confusion.

											
										
										
											2012-01-06 07:06:25 +00:00
+								{- Checks if a flag was set. -}
 								getFlag :: String -> Annex Bool
 								getFlag flag = fromMaybe False . M.lookup flag <$> getState flags
 								{- Gets the value of a field. -}
 								getField :: String -> Annex (Maybe String)
 								getField field = M.lookup field <$> getState fields
-												reorder repo parameters last

Many functions took the repo as their first parameter. Changing it
consistently to be the last parameter allows doing some useful things with
currying, that reduce boilerplate.

In particular, g <- gitRepo is almost never needed now, instead
use inRepo to run an IO action in the repo, and fromRepo to get
a value from the repo.

This also provides more opportunities to use monadic and applicative
combinators.

											
										
										
											2011-11-08 19:34:10 +00:00
+								{- Returns the annex's git repository. -}
-												more reorg, spiffed up state monad

											
										
										
											2010-10-14 07:18:11 +00:00
+								gitRepo :: Annex Git.Repo
-												successfully split Annex and AnnexState out of TypeInternals

											
										
										
											2011-01-26 01:49:04 +00:00
+								gitRepo = getState repo
-												reorder repo parameters last

Many functions took the repo as their first parameter. Changing it
consistently to be the last parameter allows doing some useful things with
currying, that reduce boilerplate.

In particular, g <- gitRepo is almost never needed now, instead
use inRepo to run an IO action in the repo, and fromRepo to get
a value from the repo.

This also provides more opportunities to use monadic and applicative
combinators.

											
										
										
											2011-11-08 19:34:10 +00:00
 								{- Runs an IO action in the annex's git repository. -}
 								inRepo :: (Git.Repo -> IO a) -> Annex a
-												golf

											
										
										
											2011-11-12 18:24:07 +00:00
+								inRepo a = liftIO . a =<< gitRepo
-												reorder repo parameters last

Many functions took the repo as their first parameter. Changing it
consistently to be the last parameter allows doing some useful things with
currying, that reduce boilerplate.

In particular, g <- gitRepo is almost never needed now, instead
use inRepo to run an IO action in the repo, and fromRepo to get
a value from the repo.

This also provides more opportunities to use monadic and applicative
combinators.

											
										
										
											2011-11-08 19:34:10 +00:00
 								{- Extracts a value from the annex's git repisitory. -}
 								fromRepo :: (Git.Repo -> a) -> Annex a
 								fromRepo a = a <$> gitRepo
-												type based git config handling

Now there's a Config type, that's extracted from the git config at startup.
Note that laziness means that individual config values are only looked up
and parsed on demand, and so we get implicit memoization for all of them.
So this is not only prettier and more type safe, it optimises several
places that didn't have explicit memoization before. As well as getting rid
of the ugly explicit memoization code.

Not yet done for annex.<remote>.* configuration settings.

											
										
										
											2012-12-30 03:10:18 +00:00
-												Switch to MonadCatchIO-transformers for better handling of state while catching exceptions.

As seen in this bug report, the lifted exception handling using the StateT
monad throws away state changes when an action throws an exception.
http://git-annex.branchable.com/bugs/git_annex_fork_bombs_on_gpg_file/
  .. Which can result in cached values being redundantly calculated, or other
     possibly worse bugs when the annex state gets out of sync with reality.

This switches from a StateT AnnexState to a ReaderT (MVar AnnexState).
All changes to the state go via the MVar. So when an Annex action is
running inside an exception handler, and it makes some changes, they
immediately go into affect in the MVar. If it then throws an exception
(or even crashes its thread!), the state changes are still in effect.

The MonadCatchIO-transformers change is actually only incidental.
I could have kept on using lifted-base for the exception handling.
However, I'd have needed to write a new instance of MonadBaseControl
for the new monad.. and I didn't write the old instance.. I begged Bas
and he kindly sent it to me. Happily, MonadCatchIO-transformers is
able to derive a MonadCatchIO instance for my monad.

This is a deep level change. It passes the test suite! What could it break?

Well.. The most likely breakage would be to code that runs an Annex action
in an exception handler, and *wants* state changes to be thrown away.
Perhaps the state changes leaves the state inconsistent, or wrong. Since
there are relatively few places in git-annex that catch exceptions in the
Annex monad, and the AnnexState is generally just used to cache calculated
data, this is unlikely to be a problem.

Oh yeah, this change also makes Assistant.Types.ThreadedMonad a bit
redundant. It's now entirely possible to run concurrent Annex actions in
different threads, all sharing access to the same state! The ThreadedMonad
just adds some extra work on top of that, with its own MVar, and avoids
such actions possibly stepping on one-another's toes. I have not gotten
rid of it, but might try that later. Being able to run concurrent Annex
actions would simplify parts of the Assistant code.

											
										
										
											2013-05-19 18:16:36 +00:00
+								{- Calculates a value from an annex's git repository and its GitConfig. -}
-												Use lower case hash directories for storing files on crippled filesystems, same as is already done for bare repositories.

* since this is a crippled filesystem anyway, git-annex doesn't use
  symlinks on it
* so there's no reason to use the mixed case hash directories that we're
  stuck using to avoid breaking everyone's symlinks to the content
* so we can do what is already done for all bare repos, and make non-bare
  repos on crippled filesystems use the all-lower case hash directories
* which are, happily, all 3 letters long, so they cannot conflict with
  mixed case hash directories
* so I was able to 100% fix this and even resuming `git annex add` in the
  test case will recover and it will all just work.

											
										
										
											2013-04-04 19:46:33 +00:00
+								calcRepo :: (Git.Repo -> GitConfig -> IO a) -> Annex a
 								calcRepo a = do
 									s <- getState id
 									liftIO $ a (repo s) (gitconfig s)
-												type based git config handling for remotes

Still a couple of places that use git config ad-hoc, but this is most of it
done.

											
										
										
											2013-01-01 17:52:47 +00:00
+								{- Gets the GitConfig settings. -}
 								getGitConfig :: Annex GitConfig
 								getGitConfig = getState gitconfig
-												type based git config handling

Now there's a Config type, that's extracted from the git config at startup.
Note that laziness means that individual config values are only looked up
and parsed on demand, and so we get implicit memoization for all of them.
So this is not only prettier and more type safe, it optimises several
places that didn't have explicit memoization before. As well as getting rid
of the ugly explicit memoization code.

Not yet done for annex.<remote>.* configuration settings.

											
										
										
											2012-12-30 03:10:18 +00:00
-												type based git config handling for remotes

Still a couple of places that use git config ad-hoc, but this is most of it
done.

											
										
										
											2013-01-01 17:52:47 +00:00
+								{- Modifies a GitConfig setting. -}
 								changeGitConfig :: (GitConfig -> GitConfig) -> Annex ()
 								changeGitConfig a = changeState $ \s -> s { gitconfig = a (gitconfig s) }
-												type based git config handling

Now there's a Config type, that's extracted from the git config at startup.
Note that laziness means that individual config values are only looked up
and parsed on demand, and so we get implicit memoization for all of them.
So this is not only prettier and more type safe, it optimises several
places that didn't have explicit memoization before. As well as getting rid
of the ugly explicit memoization code.

Not yet done for annex.<remote>.* configuration settings.

											
										
										
											2012-12-30 03:10:18 +00:00
-												type based git config handling for remotes

Still a couple of places that use git config ad-hoc, but this is most of it
done.

											
										
										
											2013-01-01 17:52:47 +00:00
+								{- Changing the git Repo data also involves re-extracting its GitConfig. -}
-												type based git config handling

Now there's a Config type, that's extracted from the git config at startup.
Note that laziness means that individual config values are only looked up
and parsed on demand, and so we get implicit memoization for all of them.
So this is not only prettier and more type safe, it optimises several
places that didn't have explicit memoization before. As well as getting rid
of the ugly explicit memoization code.

Not yet done for annex.<remote>.* configuration settings.

											
										
										
											2012-12-30 03:10:18 +00:00
+								changeGitRepo :: Git.Repo -> Annex ()
 								changeGitRepo r = changeState $ \s -> s
 									{ repo = r
-												type based git config handling for remotes

Still a couple of places that use git config ad-hoc, but this is most of it
done.

											
										
										
											2013-01-01 17:52:47 +00:00
+									, gitconfig = extractGitConfig r
-												type based git config handling

Now there's a Config type, that's extracted from the git config at startup.
Note that laziness means that individual config values are only looked up
and parsed on demand, and so we get implicit memoization for all of them.
So this is not only prettier and more type safe, it optimises several
places that didn't have explicit memoization before. As well as getting rid
of the ugly explicit memoization code.

Not yet done for annex.<remote>.* configuration settings.

											
										
										
											2012-12-30 03:10:18 +00:00
+									}
-												Bugfix: Fix bug in inode cache sentinal check, which broke copying to local repos if the repo being copied from had moved to a different filesystem or otherwise changed all its inodes'

											
										
										
											2013-03-12 20:41:54 +00:00
 								{- Converts an Annex action into an IO action, that runs with a copy
 								 - of the current Annex state.
 								 -
 								 - Use with caution; the action should not rely on changing the
 								 - state, as it will be thrown away. -}
 								withCurrentState :: Annex a -> Annex (IO a)
 								withCurrentState a = do
 									s <- getState id
 									return $ eval s a