Compare commits

...

No commits in common. "ci" and "alternatepipenullsplit" have entirely different histories.

13261 changed files with 398343 additions and 153 deletions

View file

@ -1,18 +0,0 @@
Support ghc-9.8 by widening a lot of constraints.
This patch can be removed once upstream supports ghc 9.8 offically.
diff -uprN git-annex-10.20240227.orig/cabal.project git-annex-10.20240227/cabal.project
--- git-annex-10.20240227.orig/cabal.project 1970-01-01 01:00:00.000000000 +0100
+++ git-annex-10.20240227/cabal.project 2024-04-28 13:30:14.061706299 +0200
@@ -0,0 +1,10 @@
+packages: *.cabal
+
+allow-newer: dav
+allow-newer: haskeline:filepath
+allow-newer: haskeline:directory
+allow-newer: xml-hamlet
+allow-newer: aws:filepath
+allow-newer: dbus:network
+allow-newer: dbus:filepath
+allow-newer: microstache:filepath

View file

@ -1,85 +0,0 @@
on:
workflow_dispatch:
inputs:
ref_name:
description: 'Tag or commit'
required: true
type: string
push:
tags:
- '*'
jobs:
cabal-config-edge:
name: Generate cabal config for edge
runs-on: x86_64
container:
image: alpine:edge
env:
CI_ALPINE_TARGET_RELEASE: edge
steps:
- name: Environment setup
run: apk add nodejs git cabal patch
- name: Repo pull
uses: actions/checkout@v4
with:
fetch-depth: 1
ref: ${{ inputs.ref_name }}
- name: Config generation
run: |
patch -p1 -i .forgejo/patches/ghc-9.8.patch
HOME="${{ github.workspace}}"/cabal_cache cabal update
HOME="${{ github.workspace}}"/cabal_cache cabal v2-freeze --shadow-installed-packages --strong-flags --flags="+assistant +webapp +pairing +production +torrentparser +magicmime +benchmark -debuglocks +dbus +networkbsd +gitlfs +httpclientrestricted"
mv cabal.project.freeze git-annex.config
- name: Package upload
uses: forgejo/upload-artifact@v3
with:
name: cabalconfigedge
path: git-annex*.config
cabal-config-v321:
name: Generate cabal config for v3.21
runs-on: x86_64
container:
image: alpine:3.21
env:
CI_ALPINE_TARGET_RELEASE: v3.21
steps:
- name: Environment setup
run: apk add nodejs git cabal patch
- name: Repo pull
uses: actions/checkout@v4
with:
fetch-depth: 1
ref: ${{ inputs.ref_name }}
- name: Config generation
run: |
patch -p1 -i .forgejo/patches/ghc-9.8.patch
HOME="${{ github.workspace }}"/cabal_cache cabal update
HOME="${{ github.workspace }}"/cabal_cache cabal v2-freeze --shadow-installed-packages --strong-flags --flags="+assistant +webapp +pairing +production +torrentparser +magicmime +benchmark -debuglocks +dbus +networkbsd +gitlfs +httpclientrestricted"
mv cabal.project.freeze git-annex.config
- name: Package upload
uses: forgejo/upload-artifact@v3
with:
name: cabalconfig321
path: git-annex*.config
upload-tarball:
name: Upload to generic repo
runs-on: x86_64
needs: [cabal-config-edge,cabal-config-v321]
container:
image: alpine:latest
steps:
- name: Environment setup
run: apk add nodejs curl findutils
- name: Package download
uses: forgejo/download-artifact@v3
- name: Package deployment
run: |
if test $GITHUB_REF_NAME == "ci" ; then
CI_REF_NAME=${{ inputs.ref_name }}
else
CI_REF_NAME=$GITHUB_REF_NAME
fi
curl --user ${{ vars.CODE_FORGEJO_USER }}:${{ secrets.CODE_FORGEJO_TOKEN }} --upload-file ./cabalconfigedge/git-annex.config ${{ github.server_url }}/api/packages/mirrors/generic/git-annex/$CI_REF_NAME/git-annex-$CI_REF_NAME-edge.cabal
curl --user ${{ vars.CODE_FORGEJO_USER }}:${{ secrets.CODE_FORGEJO_TOKEN }} --upload-file ./cabalconfig321/git-annex.config ${{ github.server_url }}/api/packages/mirrors/generic/git-annex/$CI_REF_NAME/git-annex-$CI_REF_NAME-v321.cabal

View file

@ -1,50 +0,0 @@
on:
workflow_dispatch:
schedule:
- cron: '@hourly'
jobs:
mirror:
name: Pull from upstream
runs-on: x86_64
container:
image: alpine:latest
env:
upstream: https://git.joeyh.name/git/git-annex.git
tags: '10.2025*'
steps:
- name: Environment setup
run: apk add grep git sed coreutils bash nodejs
- name: Fetch destination
uses: actions/checkout@v4
with:
fetch_depth: 1
ref: ci
token: ${{ secrets.CODE_FORGEJO_TOKEN }}
- name: Missing tag detecting
run: |
git ls-remote $upstream "refs/tags/$tags" | grep -v '{' | sed 's|.*/||' | sort > upstream_tags
git ls-remote ${{ github.server_url}}/${{ github.repository }} "refs/tags/$tags" | grep -v '{' | sed 's|.*/||' | sort > destination_tags
comm -23 upstream_tags destination_tags > missing_tags
echo "Missing tags:"
cat missing_tags
- name: Missing tag fetch
run: |
git remote add upstream $upstream
while read tag; do
git fetch upstream tag $tag --no-tags
done < missing_tags
- name: Packaging workflow injection
run: |
while read tag; do
git checkout $tag
git tag -d $tag
git checkout ci -- ./.forgejo
git config user.name "forgejo-actions[bot]"
git config user.email "dev@ayakael.net"
git commit -m 'Inject custom workflow'
git tag -a $tag -m $tag
done < missing_tags
- name: Push to destination
run: git push --force origin refs/tags/*:refs/tags/* --tags

4
.ghci Normal file
View file

@ -0,0 +1,4 @@
:load Common
:set -XLambdaCase
:set -XPackageImports
:set -fno-warn-tabs

1
.gitattributes vendored Normal file
View file

@ -0,0 +1 @@
CHANGELOG merge=dpkg-mergechangelogs

39
.gitignore vendored Normal file
View file

@ -0,0 +1,39 @@
tags
TAGS
Setup
*.hi
*.o
tmp
test
Build/SysConfig
Build/Version
Build/InstallDesktopFile
Build/Standalone
Build/BuildVersion
Build/MakeMans
git-annex
git-annex-shell
man
git-union-merge
git-union-merge.1
doc/.ikiwiki
html
*.tix
.hpc
dist
dist-newstyle
cabal.project.local
cabal.project.local~
result
# Sandboxed builds
cabal-dev
.cabal-sandbox
cabal.sandbox.config
.stack-work
stack.yaml.lock
# Project-local emacs configuration
.dir-locals.el
# OSX related
.DS_Store
.virthualenv
.tasty-rerun-log

29
.mailmap Normal file
View file

@ -0,0 +1,29 @@
Antoine Beaupré <anarcat@koumbit.org> anarcat <anarcat@web>
Antoine Beaupré <anarcat@koumbit.org> https://id.koumbit.net/anarcat <https://id.koumbit.net/anarcat@web>
Greg Grossmeier <greg@grossmeier.net> http://grossmeier.net/ <greg@web>
Jimmy Tang <jtang@tchpc.tcd.ie> jtang <jtang@web>
Joachim Breitner <mail@joachim-breitner.de> http://www.joachim-breitner.de/ <nomeata@web>
Joey Hess <id@joeyh.name> Joey Hess <joey@gnu.kitenet.net>
Joey Hess <id@joeyh.name> Joey Hess <joey@kitenet.net>
Joey Hess <id@joeyh.name> Joey Hess <joeyh@debian.org>
Joey Hess <id@joeyh.name> Joey Hess <joeyh@fischer.debian.org>
Joey Hess <id@joeyh.name> Joey Hess <joeyh@joeyh.name>
Joey Hess <id@joeyh.name> Joey Hess <joeyh@oberon.tam-lin.net>
Joey Hess <id@joeyh.name> Joey Hess <joeyh@oberon.underhill.private>
Joey Hess <id@joeyh.name> http://joey.kitenet.net/ <joey@web>
Joey Hess <id@joeyh.name> http://joeyh.name/ <http://joeyh.name/@web>
Joey Hess <id@joeyh.name> http://joeyh.name/ <joey@web>
Joey Hess <id@joeyh.name> https://www.google.com/accounts/o8/id?id=AItOawmJfIszzreLNvCqzqzvTayA9_9L6gb9RtY <Joey@web>
Johan Kiviniemi <devel@johan.kiviniemi.name> http://johan.kiviniemi.name/ <Johan@web>
Johan Kiviniemi <devel@johan.kiviniemi.name> http://johan.kiviniemi.name/ <http://johan.kiviniemi.name/@web>
Nicolas Pouillard <nicolas.pouillard@gmail.com> http://ertai.myopenid.com/ <npouillard@web>
Peter Simons <simons@cryp.to> Peter Simons <simons@ubuntu-12.04>
Peter Simons <simons@cryp.to> http://peter-simons.myopenid.com/ <http://peter-simons.myopenid.com/@web>
Philipp Kern <pkern@debian.org> http://phil.0x539.de/ <Philipp_Kern@web>
Richard Hartmann <richih@debian.org> https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U <Richard@web>
Yaroslav Halchenko <debian@onerussian.com>
Yaroslav Halchenko <debian@onerussian.com> http://yarikoptic.myopenid.com/ <site-myopenid@web>
Yaroslav Halchenko <debian@onerussian.com> https://www.google.com/accounts/o8/id?id=AItOawnx8kHW66N3BqmkVpgtXDlYMvr8TJ5VvfY <Yaroslav@web>
Yaroslav Halchenko <debian@onerussian.com> https://me.yahoo.com/a/EbvxpTI_xP9Aod7Mg4cwGhgjrCrdM5s-#7c0f4 <https://me.yahoo.com/a/EbvxpTI_xP9Aod7Mg4cwGhgjrCrdM5s-#7c0f4@web>
Øyvind A. Holm <sunny@sunbase.org> http://sunny256.sunbase.org/ <sunny256@web>
Øyvind A. Holm <sunny@sunbase.org> https://sunny256.wordpress.com/ <sunny256@web>

414
Annex.hs Normal file
View file

@ -0,0 +1,414 @@
{- git-annex monad
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE GeneralizedNewtypeDeriving, PackageImports, BangPatterns #-}
module Annex (
Annex,
AnnexState(..),
new,
run,
eval,
makeRunner,
getState,
changeState,
withState,
setFlag,
setField,
setOutput,
getFlag,
getField,
addCleanup,
gitRepo,
inRepo,
fromRepo,
calcRepo,
getGitConfig,
overrideGitConfig,
changeGitRepo,
adjustGitRepo,
addGitConfigOverride,
getRemoteGitConfig,
withCurrentState,
changeDirectory,
getGitRemotes,
incError,
) where
import Common
import qualified Git
import qualified Git.Config
import qualified Git.Construct
import Annex.Fixup
import Git.HashObject
import Git.CheckAttr
import Git.CheckIgnore
import qualified Git.Hook
import qualified Git.Queue
import Types.Key
import Types.Backend
import Types.GitConfig
import qualified Types.Remote
import Types.Crypto
import Types.BranchState
import Types.TrustLevel
import Types.Group
import Types.Messages
import Types.Concurrency
import Types.UUID
import Types.FileMatcher
import Types.NumCopies
import Types.LockCache
import Types.DesktopNotify
import Types.CleanupActions
import Types.AdjustedBranch
import Types.WorkerPool
import Types.IndexFiles
import Types.CatFileHandles
import Types.RemoteConfig
import qualified Database.Keys.Handle as Keys
import Utility.InodeCache
import Utility.Url
import Utility.ResourcePool
import "mtl" Control.Monad.Reader
import Control.Concurrent
import Control.Concurrent.STM
import qualified Control.Monad.Fail as Fail
import qualified Data.Map.Strict as M
import qualified Data.Set as S
{- git-annex's monad is a ReaderT around an AnnexState stored in a MVar.
- The MVar is not exposed outside this module.
-
- Note that when an Annex action fails and the exception is caught,
- any changes the action has made to the AnnexState are retained,
- due to the use of the MVar to store the state.
-}
newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState) IO a }
deriving (
Monad,
MonadIO,
MonadReader (MVar AnnexState),
MonadCatch,
MonadThrow,
MonadMask,
Fail.MonadFail,
Functor,
Applicative,
Alternative
)
-- internal state storage
data AnnexState = AnnexState
{ repo :: Git.Repo
, repoadjustment :: (Git.Repo -> IO Git.Repo)
, gitconfig :: GitConfig
, gitconfigadjustment :: (GitConfig -> GitConfig)
, gitremotes :: Maybe [Git.Repo]
, backend :: Maybe (BackendA Annex)
, remotes :: [Types.Remote.RemoteA Annex]
, output :: MessageState
, concurrency :: ConcurrencySetting
, force :: Bool
, fast :: Bool
, daemon :: Bool
, branchstate :: BranchState
, repoqueue :: Maybe (Git.Queue.Queue Annex)
, catfilehandles :: CatFileHandles
, hashobjecthandle :: Maybe HashObjectHandle
, checkattrhandle :: Maybe (ResourcePool CheckAttrHandle)
, checkignorehandle :: Maybe (ResourcePool CheckIgnoreHandle)
, forcebackend :: Maybe String
, globalnumcopies :: Maybe NumCopies
, forcenumcopies :: Maybe NumCopies
, limit :: ExpandableMatcher Annex
, uuiddescmap :: Maybe UUIDDescMap
, preferredcontentmap :: Maybe (FileMatcherMap Annex)
, requiredcontentmap :: Maybe (FileMatcherMap Annex)
, remoteconfigmap :: Maybe (M.Map UUID RemoteConfig)
, forcetrust :: TrustMap
, trustmap :: Maybe TrustMap
, groupmap :: Maybe GroupMap
, ciphers :: M.Map StorableCipher Cipher
, lockcache :: LockCache
, sshstalecleaned :: TMVar Bool
, flags :: M.Map String Bool
, fields :: M.Map String String
, cleanup :: M.Map CleanupAction (Annex ())
, sentinalstatus :: Maybe SentinalStatus
, useragent :: Maybe String
, errcounter :: Integer
, unusedkeys :: Maybe (S.Set Key)
, tempurls :: M.Map Key URLString
, existinghooks :: M.Map Git.Hook.Hook Bool
, desktopnotify :: DesktopNotify
, workers :: Maybe (TMVar (WorkerPool AnnexState))
, activekeys :: TVar (M.Map Key ThreadId)
, activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
, keysdbhandle :: Keys.DbHandle
, cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment))
, cachedgitenv :: Maybe (AltIndexFile, FilePath, [(String, String)])
, urloptions :: Maybe UrlOptions
, insmudgecleanfilter :: Bool
}
newState :: GitConfig -> Git.Repo -> IO AnnexState
newState c r = do
emptyactiveremotes <- newMVar M.empty
emptyactivekeys <- newTVarIO M.empty
o <- newMessageState
sc <- newTMVarIO False
kh <- Keys.newDbHandle
return $ AnnexState
{ repo = r
, repoadjustment = return
, gitconfig = c
, gitconfigadjustment = id
, gitremotes = Nothing
, backend = Nothing
, remotes = []
, output = o
, concurrency = ConcurrencyCmdLine NonConcurrent
, force = False
, fast = False
, daemon = False
, branchstate = startBranchState
, repoqueue = Nothing
, catfilehandles = catFileHandlesNonConcurrent
, hashobjecthandle = Nothing
, checkattrhandle = Nothing
, checkignorehandle = Nothing
, forcebackend = Nothing
, globalnumcopies = Nothing
, forcenumcopies = Nothing
, limit = BuildingMatcher []
, uuiddescmap = Nothing
, preferredcontentmap = Nothing
, requiredcontentmap = Nothing
, remoteconfigmap = Nothing
, forcetrust = M.empty
, trustmap = Nothing
, groupmap = Nothing
, ciphers = M.empty
, lockcache = M.empty
, sshstalecleaned = sc
, flags = M.empty
, fields = M.empty
, cleanup = M.empty
, sentinalstatus = Nothing
, useragent = Nothing
, errcounter = 0
, unusedkeys = Nothing
, tempurls = M.empty
, existinghooks = M.empty
, desktopnotify = mempty
, workers = Nothing
, activekeys = emptyactivekeys
, activeremotes = emptyactiveremotes
, keysdbhandle = kh
, cachedcurrentbranch = Nothing
, cachedgitenv = Nothing
, urloptions = Nothing
, insmudgecleanfilter = False
}
{- Makes an Annex state object for the specified git repo.
- Ensures the config is read, if it was not already, and performs
- any necessary git repo fixups. -}
new :: Git.Repo -> IO AnnexState
new r = do
r' <- Git.Config.read =<< Git.relPath r
let c = extractGitConfig FromGitConfig r'
newState c =<< fixupRepo r' c
{- Performs an action in the Annex monad from a starting state,
- returning a new state. -}
run :: AnnexState -> Annex a -> IO (a, AnnexState)
run s a = flip run' a =<< newMVar s
run' :: MVar AnnexState -> Annex a -> IO (a, AnnexState)
run' mvar a = do
r <- runReaderT (runAnnex a) mvar
`onException` (flush =<< readMVar mvar)
s' <- takeMVar mvar
flush s'
return (r, s')
where
flush = Keys.flushDbQueue . keysdbhandle
{- Performs an action in the Annex monad from a starting state,
- and throws away the new state. -}
eval :: AnnexState -> Annex a -> IO a
eval s a = fst <$> run s a
{- Makes a runner action, that allows diving into IO and from inside
- the IO action, running an Annex action. -}
makeRunner :: Annex (Annex a -> IO a)
makeRunner = do
mvar <- ask
return $ \a -> do
(r, s) <- run' mvar a
putMVar mvar s
return r
getState :: (AnnexState -> v) -> Annex v
getState selector = do
mvar <- ask
s <- liftIO $ readMVar mvar
return $ selector s
changeState :: (AnnexState -> AnnexState) -> Annex ()
changeState modifier = do
mvar <- ask
liftIO $ modifyMVar_ mvar $ return . modifier
withState :: (AnnexState -> IO (AnnexState, b)) -> Annex b
withState modifier = do
mvar <- ask
liftIO $ modifyMVar mvar modifier
{- Sets a flag to True -}
setFlag :: String -> Annex ()
setFlag flag = changeState $ \s ->
s { flags = M.insert flag True $ flags s }
{- Sets a field to a value -}
setField :: String -> String -> Annex ()
setField field value = changeState $ \s ->
s { fields = M.insert field value $ fields s }
{- Adds a cleanup action to perform. -}
addCleanup :: CleanupAction -> Annex () -> Annex ()
addCleanup k a = changeState $ \s ->
s { cleanup = M.insert k a $ cleanup s }
{- Sets the type of output to emit. -}
setOutput :: OutputType -> Annex ()
setOutput o = changeState $ \s ->
let m = output s
in s { output = m { outputType = adjustOutputType (outputType m) o } }
{- Checks if a flag was set. -}
getFlag :: String -> Annex Bool
getFlag flag = fromMaybe False . M.lookup flag <$> getState flags
{- Gets the value of a field. -}
getField :: String -> Annex (Maybe String)
getField field = M.lookup field <$> getState fields
{- Returns the annex's git repository. -}
gitRepo :: Annex Git.Repo
gitRepo = getState repo
{- Runs an IO action in the annex's git repository. -}
inRepo :: (Git.Repo -> IO a) -> Annex a
inRepo a = liftIO . a =<< gitRepo
{- Extracts a value from the annex's git repisitory. -}
fromRepo :: (Git.Repo -> a) -> Annex a
fromRepo a = a <$> gitRepo
{- Calculates a value from an annex's git repository and its GitConfig. -}
calcRepo :: (Git.Repo -> GitConfig -> IO a) -> Annex a
calcRepo a = do
s <- getState id
liftIO $ a (repo s) (gitconfig s)
{- Gets the GitConfig settings. -}
getGitConfig :: Annex GitConfig
getGitConfig = getState gitconfig
{- Overrides a GitConfig setting. The modification persists across
- reloads of the repo's config. -}
overrideGitConfig :: (GitConfig -> GitConfig) -> Annex ()
overrideGitConfig f = changeState $ \s -> s
{ gitconfigadjustment = gitconfigadjustment s . f
, gitconfig = f (gitconfig s)
}
{- Adds an adjustment to the Repo data. Adjustments persist across reloads
- of the repo's config.
-
- Note that the action may run more than once, and should avoid eg,
- appending the same value to a repo's config when run repeatedly.
-}
adjustGitRepo :: (Git.Repo -> IO Git.Repo) -> Annex ()
adjustGitRepo a = do
changeState $ \s -> s { repoadjustment = \r -> repoadjustment s r >>= a }
changeGitRepo =<< gitRepo
{- Adds git config setting, like "foo=bar". It will be passed with -c
- to git processes. The config setting is also recorded in the repo,
- and the GitConfig is updated. -}
addGitConfigOverride :: String -> Annex ()
addGitConfigOverride v = adjustGitRepo $ \r ->
Git.Config.store (encodeBS' v) Git.Config.ConfigList $
r { Git.gitGlobalOpts = go (Git.gitGlobalOpts r) }
where
-- Remove any prior occurrance of the setting to avoid
-- building up many of them when the adjustment is run repeatedly,
-- and add the setting to the end.
go [] = [Param "-c", Param v]
go (Param "-c": Param v':rest) | v' == v = go rest
go (c:rest) = c : go rest
{- Changing the git Repo data also involves re-extracting its GitConfig. -}
changeGitRepo :: Git.Repo -> Annex ()
changeGitRepo r = do
repoadjuster <- getState repoadjustment
gitconfigadjuster <- getState gitconfigadjustment
r' <- liftIO $ repoadjuster r
changeState $ \s -> s
{ repo = r'
, gitconfig = gitconfigadjuster $
extractGitConfig FromGitConfig r'
}
{- Gets the RemoteGitConfig from a remote, given the Git.Repo for that
- remote. -}
getRemoteGitConfig :: Git.Repo -> Annex RemoteGitConfig
getRemoteGitConfig r = do
g <- gitRepo
liftIO $ atomically $ extractRemoteGitConfig g (Git.repoDescribe r)
{- Converts an Annex action into an IO action, that runs with a copy
- of the current Annex state.
-
- Use with caution; the action should not rely on changing the
- state, as it will be thrown away. -}
withCurrentState :: Annex a -> Annex (IO a)
withCurrentState a = do
s <- getState id
return $ eval s a
{- It's not safe to use setCurrentDirectory in the Annex monad,
- because the git repo paths are stored relative.
- Instead, use this.
-}
changeDirectory :: FilePath -> Annex ()
changeDirectory d = do
r <- liftIO . Git.adjustPath absPath =<< gitRepo
liftIO $ setCurrentDirectory d
r' <- liftIO $ Git.relPath r
changeState $ \s -> s { repo = r' }
incError :: Annex ()
incError = changeState $ \s ->
let ! c = errcounter s + 1
! s' = s { errcounter = c }
in s'
getGitRemotes :: Annex [Git.Repo]
getGitRemotes = do
s <- getState id
case gitremotes s of
Just rs -> return rs
Nothing -> do
rs <- liftIO $ Git.Construct.fromRemotes (repo s)
changeState $ \s' -> s' { gitremotes = Just rs }
return rs

41
Annex/Action.hs Normal file
View file

@ -0,0 +1,41 @@
{- git-annex actions
-
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Action (
startup,
shutdown,
stopCoProcesses,
) where
import qualified Data.Map as M
import Annex.Common
import qualified Annex
import Annex.Content
import Annex.CatFile
import Annex.CheckAttr
import Annex.HashObject
import Annex.CheckIgnore
{- Actions to perform each time ran. -}
startup :: Annex ()
startup = return ()
{- Cleanup actions. -}
shutdown :: Bool -> Annex ()
shutdown nocommit = do
saveState nocommit
sequence_ =<< M.elems <$> Annex.getState Annex.cleanup
stopCoProcesses
{- Stops all long-running git query processes. -}
stopCoProcesses :: Annex ()
stopCoProcesses = do
catFileStop
checkAttrStop
hashObjectStop
checkIgnoreStop

642
Annex/AdjustedBranch.hs Normal file
View file

@ -0,0 +1,642 @@
{- adjusted branch
-
- Copyright 2016-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns, OverloadedStrings #-}
module Annex.AdjustedBranch (
Adjustment(..),
LinkAdjustment(..),
PresenceAdjustment(..),
adjustmentHidesFiles,
OrigBranch,
AdjBranch(..),
originalToAdjusted,
adjustedToOriginal,
fromAdjustedBranch,
getAdjustment,
enterAdjustedBranch,
updateAdjustedBranch,
adjustBranch,
adjustToCrippledFileSystem,
mergeToAdjustedBranch,
propigateAdjustedCommits,
AdjustedClone(..),
checkAdjustedClone,
checkVersionSupported,
isGitVersionSupported,
) where
import Annex.Common
import Types.AdjustedBranch
import Annex.AdjustedBranch.Name
import qualified Annex
import Git
import Git.Types
import qualified Git.Branch
import qualified Git.Ref
import qualified Git.Command
import qualified Git.Tree
import qualified Git.DiffTree
import qualified Git.Merge
import Git.Tree (TreeItem(..))
import Git.Sha
import Git.Env
import Git.Index
import Git.FilePath
import qualified Git.LockFile
import qualified Git.Version
import Annex.CatFile
import Annex.Link
import Annex.AutoMerge
import Annex.Content
import Annex.Tmp
import Annex.GitOverlay
import Utility.Tmp.Dir
import Utility.CopyFile
import qualified Database.Keys
import Config
import qualified Data.Map as M
import qualified Data.ByteString as S
-- How to perform various adjustments to a TreeItem.
class AdjustTreeItem t where
adjustTreeItem :: t -> TreeItem -> Annex (Maybe TreeItem)
instance AdjustTreeItem Adjustment where
adjustTreeItem (LinkAdjustment l) t = adjustTreeItem l t
adjustTreeItem (PresenceAdjustment p Nothing) t = adjustTreeItem p t
adjustTreeItem (PresenceAdjustment p (Just l)) t =
adjustTreeItem p t >>= \case
Nothing -> return Nothing
Just t' -> adjustTreeItem l t'
instance AdjustTreeItem LinkAdjustment where
adjustTreeItem UnlockAdjustment = ifSymlink adjustToPointer noAdjust
adjustTreeItem LockAdjustment = ifSymlink noAdjust adjustToSymlink
adjustTreeItem FixAdjustment = ifSymlink adjustToSymlink noAdjust
adjustTreeItem UnFixAdjustment = ifSymlink (adjustToSymlink' gitAnnexLinkCanonical) noAdjust
instance AdjustTreeItem PresenceAdjustment where
adjustTreeItem HideMissingAdjustment = \ti@(TreeItem _ _ s) ->
catKey s >>= \case
Just k -> ifM (inAnnex k)
( return (Just ti)
, return Nothing
)
Nothing -> return (Just ti)
adjustTreeItem ShowMissingAdjustment = noAdjust
ifSymlink :: (TreeItem -> Annex a) -> (TreeItem -> Annex a) -> TreeItem -> Annex a
ifSymlink issymlink notsymlink ti@(TreeItem _f m _s)
| toTreeItemType m == Just TreeSymlink = issymlink ti
| otherwise = notsymlink ti
noAdjust :: TreeItem -> Annex (Maybe TreeItem)
noAdjust = return . Just
adjustToPointer :: TreeItem -> Annex (Maybe TreeItem)
adjustToPointer ti@(TreeItem f _m s) = catKey s >>= \case
Just k -> do
Database.Keys.addAssociatedFile k f
Just . TreeItem f (fromTreeItemType TreeFile)
<$> hashPointerFile k
Nothing -> return (Just ti)
adjustToSymlink :: TreeItem -> Annex (Maybe TreeItem)
adjustToSymlink = adjustToSymlink' gitAnnexLink
adjustToSymlink' :: (FilePath -> Key -> Git.Repo -> GitConfig -> IO FilePath) -> TreeItem -> Annex (Maybe TreeItem)
adjustToSymlink' gitannexlink ti@(TreeItem f _m s) = catKey s >>= \case
Just k -> do
absf <- inRepo $ \r -> absPath $
fromRawFilePath $ fromTopFilePath f r
linktarget <- calcRepo $ gitannexlink absf k
Just . TreeItem f (fromTreeItemType TreeSymlink)
<$> hashSymlink linktarget
Nothing -> return (Just ti)
-- This is a hidden branch ref, that's used as the basis for the AdjBranch,
-- since pushes can overwrite the OrigBranch at any time. So, changes
-- are propigated from the AdjBranch to the head of the BasisBranch.
newtype BasisBranch = BasisBranch Ref
-- The basis for refs/heads/adjusted/master(unlocked) is
-- refs/basis/adjusted/master(unlocked).
basisBranch :: AdjBranch -> BasisBranch
basisBranch (AdjBranch adjbranch) = BasisBranch $
Ref ("refs/basis/" <> fromRef' (Git.Ref.base adjbranch))
getAdjustment :: Branch -> Maybe Adjustment
getAdjustment = fmap fst . adjustedToOriginal
fromAdjustedBranch :: Branch -> OrigBranch
fromAdjustedBranch b = maybe b snd (adjustedToOriginal b)
{- Enter an adjusted version of current branch (or, if already in an
- adjusted version of a branch, changes the adjustment of the original
- branch).
-
- Can fail, if no branch is checked out, or if the adjusted branch already
- exists, or if staged changes prevent a checkout.
-}
enterAdjustedBranch :: Adjustment -> Annex Bool
enterAdjustedBranch adj = inRepo Git.Branch.current >>= \case
Just currbranch -> case getAdjustment currbranch of
Just curradj | curradj == adj ->
updateAdjustedBranch adj (AdjBranch currbranch)
(fromAdjustedBranch currbranch)
_ -> go currbranch
Nothing -> do
warning "not on any branch!"
return False
where
go currbranch = do
let origbranch = fromAdjustedBranch currbranch
let adjbranch = adjBranch $ originalToAdjusted origbranch adj
ifM (inRepo (Git.Ref.exists adjbranch) <&&> (not <$> Annex.getState Annex.force))
( do
mapM_ (warning . unwords)
[ [ "adjusted branch"
, Git.Ref.describe adjbranch
, "already exists."
]
, [ "Aborting because that branch may have changes that have not yet reached"
, Git.Ref.describe origbranch
]
, [ "You can check out the adjusted branch manually to enter it,"
, "or add the --force option to overwrite the old branch."
]
]
return False
, do
b <- preventCommits $ const $
adjustBranch adj origbranch
checkoutAdjustedBranch b []
)
checkoutAdjustedBranch :: AdjBranch -> [CommandParam] -> Annex Bool
checkoutAdjustedBranch (AdjBranch b) checkoutparams = do
showOutput -- checkout can have output in large repos
inRepo $ Git.Command.runBool $
[ Param "checkout"
, Param $ fromRef $ Git.Ref.base b
-- always show checkout progress, even if --quiet is used
-- to suppress other messages
, Param "--progress"
] ++ checkoutparams
{- Already in a branch with this adjustment, but the user asked to enter it
- again. This should have the same result as checking out the original branch,
- deleting and rebuilding the adjusted branch, and then checking it out.
- But, it can be implemented more efficiently than that.
-}
updateAdjustedBranch :: Adjustment -> AdjBranch -> OrigBranch -> Annex Bool
updateAdjustedBranch adj@(PresenceAdjustment _ _) (AdjBranch currbranch) origbranch = do
b <- preventCommits $ \commitlck -> do
-- Avoid losing any commits that the adjusted branch has that
-- have not yet been propigated back to the origbranch.
_ <- propigateAdjustedCommits' origbranch adj commitlck
-- Git normally won't do anything when asked to check out the
-- currently checked out branch, even when its ref has
-- changed. Work around this by writing a raw sha to .git/HEAD.
inRepo (Git.Ref.sha currbranch) >>= \case
Just headsha -> inRepo $ \r ->
writeFile (Git.Ref.headFile r) (fromRef headsha)
_ -> noop
adjustBranch adj origbranch
-- Make git checkout quiet to avoid warnings about disconnected
-- branch tips being lost.
checkoutAdjustedBranch b [Param "--quiet"]
updateAdjustedBranch adj@(LinkAdjustment _) _ origbranch = preventCommits $ \commitlck -> do
-- Not really needed here, but done for consistency.
_ <- propigateAdjustedCommits' origbranch adj commitlck
-- No need to do anything else, because link adjustments are stable.
return True
adjustToCrippledFileSystem :: Annex ()
adjustToCrippledFileSystem = do
warning "Entering an adjusted branch where files are unlocked as this filesystem does not support locked files."
checkVersionSupported
whenM (isNothing <$> inRepo Git.Branch.current) $ do
cmode <- annexCommitMode <$> Annex.getGitConfig
void $ inRepo $ Git.Branch.commitCommand cmode
[ Param "--quiet"
, Param "--allow-empty"
, Param "-m"
, Param "commit before entering adjusted unlocked branch"
]
inRepo Git.Branch.current >>= \case
Just currbranch -> case getAdjustment currbranch of
Just curradj | curradj == adj -> return ()
_ -> do
let adjbranch = originalToAdjusted currbranch adj
ifM (inRepo (Git.Ref.exists $ adjBranch adjbranch))
( unlessM (checkoutAdjustedBranch adjbranch []) $
failedenter
, unlessM (enterAdjustedBranch adj) $
failedenter
)
Nothing -> failedenter
where
adj = LinkAdjustment UnlockAdjustment
failedenter = warning "Failed to enter adjusted branch!"
setBasisBranch :: BasisBranch -> Ref -> Annex ()
setBasisBranch (BasisBranch basis) new =
inRepo $ Git.Branch.update' basis new
setAdjustedBranch :: String -> AdjBranch -> Ref -> Annex ()
setAdjustedBranch msg (AdjBranch b) r = inRepo $ Git.Branch.update msg b r
adjustBranch :: Adjustment -> OrigBranch -> Annex AdjBranch
adjustBranch adj origbranch = do
-- Start basis off with the current value of the origbranch.
setBasisBranch basis origbranch
sha <- adjustCommit adj basis
setAdjustedBranch "entering adjusted branch" adjbranch sha
return adjbranch
where
adjbranch = originalToAdjusted origbranch adj
basis = basisBranch adjbranch
adjustCommit :: Adjustment -> BasisBranch -> Annex Sha
adjustCommit adj basis = do
treesha <- adjustTree adj basis
commitAdjustedTree treesha basis
adjustTree :: Adjustment -> BasisBranch -> Annex Sha
adjustTree adj (BasisBranch basis) = do
let toadj = adjustTreeItem adj
treesha <- Git.Tree.adjustTree
toadj
[]
(\_old new -> new)
[]
basis =<< Annex.gitRepo
return treesha
type CommitsPrevented = Git.LockFile.LockHandle
{- Locks git's index file, preventing git from making a commit, merge,
- or otherwise changing the HEAD ref while the action is run.
-
- Throws an IO exception if the index file is already locked.
-}
preventCommits :: (CommitsPrevented -> Annex a) -> Annex a
preventCommits = bracket setup cleanup
where
setup = do
lck <- fromRepo $ indexFileLock . indexFile
liftIO $ Git.LockFile.openLock lck
cleanup = liftIO . Git.LockFile.closeLock
{- Commits a given adjusted tree, with the provided parent ref.
-
- This should always yield the same value, even if performed in different
- clones of a repo, at different times. The commit message and other
- metadata is based on the parent.
-}
commitAdjustedTree :: Sha -> BasisBranch -> Annex Sha
commitAdjustedTree treesha parent@(BasisBranch b) =
commitAdjustedTree' treesha parent [b]
commitAdjustedTree' :: Sha -> BasisBranch -> [Ref] -> Annex Sha
commitAdjustedTree' treesha (BasisBranch basis) parents =
go =<< catCommit basis
where
go Nothing = do
cmode <- annexCommitMode <$> Annex.getGitConfig
inRepo $ mkcommit cmode
go (Just basiscommit) = do
cmode <- annexCommitMode <$> Annex.getGitConfig
inRepo $ commitWithMetaData
(commitAuthorMetaData basiscommit)
(commitCommitterMetaData basiscommit)
(mkcommit cmode)
mkcommit cmode = Git.Branch.commitTree cmode
adjustedBranchCommitMessage parents treesha
{- This message should never be changed. -}
adjustedBranchCommitMessage :: String
adjustedBranchCommitMessage = "git-annex adjusted branch"
findAdjustingCommit :: AdjBranch -> Annex (Maybe Commit)
findAdjustingCommit (AdjBranch b) = go =<< catCommit b
where
go Nothing = return Nothing
go (Just c)
| commitMessage c == adjustedBranchCommitMessage = return (Just c)
| otherwise = case commitParent c of
[p] -> go =<< catCommit p
_ -> return Nothing
{- Update the currently checked out adjusted branch, merging the provided
- branch into it. Note that the provided branch should be a non-adjusted
- branch. -}
mergeToAdjustedBranch :: Branch -> (OrigBranch, Adjustment) -> [Git.Merge.MergeConfig] -> Bool -> Git.Branch.CommitMode -> Annex Bool
mergeToAdjustedBranch tomerge (origbranch, adj) mergeconfig canresolvemerge commitmode = catchBoolIO $
join $ preventCommits go
where
adjbranch@(AdjBranch currbranch) = originalToAdjusted origbranch adj
basis = basisBranch adjbranch
go commitsprevented =
ifM (inRepo $ Git.Branch.changed currbranch tomerge)
( do
(updatedorig, _) <- propigateAdjustedCommits'
origbranch adj commitsprevented
changestomerge updatedorig
, nochangestomerge
)
nochangestomerge = return $ return True
{- Since the adjusted branch changes files, merging tomerge
- directly into it would likely result in unncessary merge
- conflicts. To avoid those conflicts, instead merge tomerge into
- updatedorig. The result of the merge can the be
- adjusted to yield the final adjusted branch.
-
- In order to do a merge into a ref that is not checked out,
- set the work tree to a temp directory, and set GIT_DIR
- to another temp directory, in which HEAD contains the
- updatedorig sha. GIT_COMMON_DIR is set to point to the real
- git directory, and so git can read and write objects from there,
- but will use GIT_DIR for HEAD and index.
-
- (Doing the merge this way also lets it run even though the main
- index file is currently locked.)
-}
changestomerge (Just updatedorig) = withOtherTmp $ \othertmpdir -> do
git_dir <- fromRawFilePath <$> fromRepo Git.localGitDir
tmpwt <- fromRepo gitAnnexMergeDir
withTmpDirIn othertmpdir "git" $ \tmpgit -> withWorkTreeRelated tmpgit $
withemptydir git_dir tmpwt $ withWorkTree tmpwt $ do
liftIO $ writeFile (tmpgit </> "HEAD") (fromRef updatedorig)
-- Copy in refs and packed-refs, to work
-- around bug in git 2.13.0, which
-- causes it not to look in GIT_DIR for refs.
refs <- liftIO $ dirContentsRecursive $
git_dir </> "refs"
let refs' = (git_dir </> "packed-refs") : refs
liftIO $ forM_ refs' $ \src ->
whenM (doesFileExist src) $ do
dest <- relPathDirToFile git_dir src
let dest' = tmpgit </> dest
createDirectoryUnder git_dir (takeDirectory dest')
void $ createLinkOrCopy src dest'
-- This reset makes git merge not care
-- that the work tree is empty; otherwise
-- it will think that all the files have
-- been staged for deletion, and sometimes
-- the merge includes these deletions
-- (for an unknown reason).
-- http://thread.gmane.org/gmane.comp.version-control.git/297237
inRepo $ Git.Command.run [Param "reset", Param "HEAD", Param "--quiet"]
showAction $ "Merging into " ++ fromRef (Git.Ref.base origbranch)
merged <- autoMergeFrom' tomerge Nothing mergeconfig commitmode True
(const $ resolveMerge (Just updatedorig) tomerge True)
if merged
then do
!mergecommit <- liftIO $ extractSha
<$> S.readFile (tmpgit </> "HEAD")
-- This is run after the commit lock is dropped.
return $ postmerge mergecommit
else return $ return False
changestomerge Nothing = return $ return False
withemptydir git_dir d a = bracketIO setup cleanup (const a)
where
setup = do
whenM (doesDirectoryExist d) $
removeDirectoryRecursive d
createDirectoryUnder git_dir d
cleanup _ = removeDirectoryRecursive d
{- A merge commit has been made between the basisbranch and
- tomerge. Update the basisbranch and origbranch to point
- to that commit, adjust it to get the new adjusted branch,
- and check it out.
-
- But, there may be unstaged work tree changes that conflict,
- so the check out is done by making a normal merge of
- the new adjusted branch.
-}
postmerge (Just mergecommit) = do
setBasisBranch basis mergecommit
inRepo $ Git.Branch.update' origbranch mergecommit
adjtree <- adjustTree adj (BasisBranch mergecommit)
adjmergecommit <- commitAdjustedTree adjtree (BasisBranch mergecommit)
-- Make currbranch be the parent, so that merging
-- this commit will be a fast-forward.
adjmergecommitff <- commitAdjustedTree' adjtree (BasisBranch mergecommit) [currbranch]
showAction "Merging into adjusted branch"
ifM (autoMergeFrom adjmergecommitff (Just currbranch) mergeconfig commitmode canresolvemerge)
( reparent adjtree adjmergecommit =<< getcurrentcommit
, return False
)
postmerge Nothing = return False
-- Now that the merge into the adjusted branch is complete,
-- take the tree from that merge, and attach it on top of the
-- adjmergecommit, if it's different.
reparent adjtree adjmergecommit (Just currentcommit) = do
if (commitTree currentcommit /= adjtree)
then do
cmode <- annexCommitMode <$> Annex.getGitConfig
c <- inRepo $ Git.Branch.commitTree cmode
("Merged " ++ fromRef tomerge) [adjmergecommit]
(commitTree currentcommit)
inRepo $ Git.Branch.update "updating adjusted branch" currbranch c
propigateAdjustedCommits origbranch adj
else inRepo $ Git.Branch.update "updating adjusted branch" currbranch adjmergecommit
return True
reparent _ _ Nothing = return False
getcurrentcommit = inRepo Git.Branch.currentUnsafe >>= \case
Nothing -> return Nothing
Just c -> catCommit c
{- Check for any commits present on the adjusted branch that have not yet
- been propigated to the basis branch, and propigate them to the basis
- branch and from there on to the orig branch.
-
- After propigating the commits back to the basis banch,
- rebase the adjusted branch on top of the updated basis branch.
-}
propigateAdjustedCommits :: OrigBranch -> Adjustment -> Annex ()
propigateAdjustedCommits origbranch adj =
preventCommits $ \commitsprevented ->
join $ snd <$> propigateAdjustedCommits' origbranch adj commitsprevented
{- Returns sha of updated basis branch, and action which will rebase
- the adjusted branch on top of the updated basis branch. -}
propigateAdjustedCommits'
:: OrigBranch
-> Adjustment
-> CommitsPrevented
-> Annex (Maybe Sha, Annex ())
propigateAdjustedCommits' origbranch adj _commitsprevented =
inRepo (Git.Ref.sha basis) >>= \case
Just origsha -> catCommit currbranch >>= \case
Just currcommit ->
newcommits >>= go origsha False >>= \case
Left e -> do
warning e
return (Nothing, return ())
Right newparent -> return
( Just newparent
, rebase currcommit newparent
)
Nothing -> return (Nothing, return ())
Nothing -> return (Nothing, return ())
where
(BasisBranch basis) = basisBranch adjbranch
adjbranch@(AdjBranch currbranch) = originalToAdjusted origbranch adj
newcommits = inRepo $ Git.Branch.changedCommits basis currbranch
-- Get commits oldest first, so they can be processed
-- in order made.
[Param "--reverse"]
go parent _ [] = do
setBasisBranch (BasisBranch basis) parent
inRepo $ Git.Branch.update' origbranch parent
return (Right parent)
go parent pastadjcommit (sha:l) = catCommit sha >>= \case
Just c
| commitMessage c == adjustedBranchCommitMessage ->
go parent True l
| pastadjcommit ->
reverseAdjustedCommit parent adj (sha, c) origbranch
>>= \case
Left e -> return (Left e)
Right commit -> go commit pastadjcommit l
_ -> go parent pastadjcommit l
rebase currcommit newparent = do
-- Reuse the current adjusted tree, and reparent it
-- on top of the newparent.
commitAdjustedTree (commitTree currcommit) (BasisBranch newparent)
>>= inRepo . Git.Branch.update rebaseOnTopMsg currbranch
rebaseOnTopMsg :: String
rebaseOnTopMsg = "rebasing adjusted branch on top of updated original branch"
{- Reverses an adjusted commit, and commit with provided commitparent,
- yielding a commit sha.
-
- Adjusts the tree of the commitparent, changing only the files that the
- commit changed, and reverse adjusting those changes.
-
- The commit message, and the author and committer metadata are
- copied over from the basiscommit. However, any gpg signature
- will be lost, and any other headers are not copied either. -}
reverseAdjustedCommit :: Sha -> Adjustment -> (Sha, Commit) -> OrigBranch -> Annex (Either String Sha)
reverseAdjustedCommit commitparent adj (csha, basiscommit) origbranch
| length (commitParent basiscommit) > 1 = return $
Left $ "unable to propigate merge commit " ++ show csha ++ " back to " ++ show origbranch
| otherwise = do
cmode <- annexCommitMode <$> Annex.getGitConfig
treesha <- reverseAdjustedTree commitparent adj csha
revadjcommit <- inRepo $ commitWithMetaData
(commitAuthorMetaData basiscommit)
(commitCommitterMetaData basiscommit) $
Git.Branch.commitTree cmode
(commitMessage basiscommit)
[commitparent] treesha
return (Right revadjcommit)
{- Adjusts the tree of the basis, changing only the files that the
- commit changed, and reverse adjusting those changes.
-
- commitDiff does not support merge commits, so the csha must not be a
- merge commit. -}
reverseAdjustedTree :: Sha -> Adjustment -> Sha -> Annex Sha
reverseAdjustedTree basis adj csha = do
(diff, cleanup) <- inRepo (Git.DiffTree.commitDiff csha)
let (adds, others) = partition (\dti -> Git.DiffTree.srcsha dti `elem` nullShas) diff
let (removes, changes) = partition (\dti -> Git.DiffTree.dstsha dti `elem` nullShas) others
adds' <- catMaybes <$>
mapM (adjustTreeItem reverseadj) (map diffTreeToTreeItem adds)
treesha <- Git.Tree.adjustTree
(propchanges changes)
adds'
(\_old new -> new)
(map Git.DiffTree.file removes)
basis
=<< Annex.gitRepo
void $ liftIO cleanup
return treesha
where
reverseadj = reverseAdjustment adj
propchanges changes ti@(TreeItem f _ _) =
case M.lookup (norm f) m of
Nothing -> return (Just ti) -- not changed
Just change -> adjustTreeItem reverseadj change
where
m = M.fromList $ map (\i@(TreeItem f' _ _) -> (norm f', i)) $
map diffTreeToTreeItem changes
norm = normalise . fromRawFilePath . getTopFilePath
diffTreeToTreeItem :: Git.DiffTree.DiffTreeItem -> TreeItem
diffTreeToTreeItem dti = TreeItem
(Git.DiffTree.file dti)
(Git.DiffTree.dstmode dti)
(Git.DiffTree.dstsha dti)
data AdjustedClone = InAdjustedClone | NotInAdjustedClone
{- Cloning a repository that has an adjusted branch checked out will
- result in the clone having the same adjusted branch checked out -- but
- the origbranch won't exist in the clone, nor will the basis. So
- to properly set up the adjusted branch, the origbranch and basis need
- to be set.
-
- We can't trust that the origin's origbranch matches up with the currently
- checked out adjusted branch; the origin could have the two branches
- out of sync (eg, due to another branch having been pushed to the origin's
- origbranch), or due to a commit on its adjusted branch not having been
- propigated back to origbranch.
-
- So, find the adjusting commit on the currently checked out adjusted
- branch, and use the parent of that commit as the basis, and set the
- origbranch to it.
-
- The repository may also need to be upgraded to a new version, if the
- current version is too old to support adjusted branches. -}
checkAdjustedClone :: Annex AdjustedClone
checkAdjustedClone = ifM isBareRepo
( return NotInAdjustedClone
, go =<< inRepo Git.Branch.current
)
where
go Nothing = return NotInAdjustedClone
go (Just currbranch) = case adjustedToOriginal currbranch of
Nothing -> return NotInAdjustedClone
Just (adj, origbranch) -> do
let basis@(BasisBranch bb) = basisBranch (originalToAdjusted origbranch adj)
unlessM (inRepo $ Git.Ref.exists bb) $ do
unlessM (inRepo $ Git.Ref.exists origbranch) $ do
let remotebranch = Git.Ref.underBase "refs/remotes/origin" origbranch
inRepo $ Git.Branch.update' origbranch remotebranch
aps <- fmap commitParent <$> findAdjustingCommit (AdjBranch currbranch)
case aps of
Just [p] -> setBasisBranch basis p
_ -> giveup $ "Unable to clean up from clone of adjusted branch; perhaps you should check out " ++ Git.Ref.describe origbranch
return InAdjustedClone
checkVersionSupported :: Annex ()
checkVersionSupported =
unlessM (liftIO isGitVersionSupported) $
giveup "Your version of git is too old; upgrade it to 2.2.0 or newer to use adjusted branches."
-- git 2.2.0 needed for GIT_COMMON_DIR which is needed
-- by updateAdjustedBranch to use withWorkTreeRelated.
isGitVersionSupported :: IO Bool
isGitVersionSupported = not <$> Git.Version.older "2.2.0"

View file

@ -0,0 +1,88 @@
{- adjusted branch names
-
- Copyright 2016-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.AdjustedBranch.Name (
originalToAdjusted,
adjustedToOriginal,
AdjBranch(..),
OrigBranch,
) where
import Types.AdjustedBranch
import Git
import qualified Git.Ref
import Utility.Misc
import Control.Applicative
import Data.Char
import qualified Data.ByteString as S
adjustedBranchPrefix :: S.ByteString
adjustedBranchPrefix = "refs/heads/adjusted/"
class SerializeAdjustment t where
serializeAdjustment :: t -> S.ByteString
deserializeAdjustment :: S.ByteString -> Maybe t
instance SerializeAdjustment Adjustment where
serializeAdjustment (LinkAdjustment l) =
serializeAdjustment l
serializeAdjustment (PresenceAdjustment p Nothing) =
serializeAdjustment p
serializeAdjustment (PresenceAdjustment p (Just l)) =
serializeAdjustment p <> "-" <> serializeAdjustment l
deserializeAdjustment s =
(LinkAdjustment <$> deserializeAdjustment s)
<|>
(PresenceAdjustment <$> deserializeAdjustment s1 <*> pure (deserializeAdjustment s2))
<|>
(PresenceAdjustment <$> deserializeAdjustment s <*> pure Nothing)
where
(s1, s2) = separate' (== (fromIntegral (ord '-'))) s
instance SerializeAdjustment LinkAdjustment where
serializeAdjustment UnlockAdjustment = "unlocked"
serializeAdjustment LockAdjustment = "locked"
serializeAdjustment FixAdjustment = "fixed"
serializeAdjustment UnFixAdjustment = "unfixed"
deserializeAdjustment "unlocked" = Just UnlockAdjustment
deserializeAdjustment "locked" = Just LockAdjustment
deserializeAdjustment "fixed" = Just FixAdjustment
deserializeAdjustment "unfixed" = Just UnFixAdjustment
deserializeAdjustment _ = Nothing
instance SerializeAdjustment PresenceAdjustment where
serializeAdjustment HideMissingAdjustment = "hidemissing"
serializeAdjustment ShowMissingAdjustment = "showmissing"
deserializeAdjustment "hidemissing" = Just HideMissingAdjustment
deserializeAdjustment "showmissing" = Just ShowMissingAdjustment
deserializeAdjustment _ = Nothing
newtype AdjBranch = AdjBranch { adjBranch :: Branch }
originalToAdjusted :: OrigBranch -> Adjustment -> AdjBranch
originalToAdjusted orig adj = AdjBranch $ Ref $
adjustedBranchPrefix <> base <> "(" <> serializeAdjustment adj <> ")"
where
base = fromRef' (Git.Ref.base orig)
type OrigBranch = Branch
adjustedToOriginal :: Branch -> Maybe (Adjustment, OrigBranch)
adjustedToOriginal b
| adjustedBranchPrefix `S.isPrefixOf` bs = do
let (base, as) = separate' (== openparen) (S.drop prefixlen bs)
adj <- deserializeAdjustment (S.takeWhile (/= closeparen) as)
Just (adj, Git.Ref.branchRef (Ref base))
| otherwise = Nothing
where
bs = fromRef' b
prefixlen = S.length adjustedBranchPrefix
openparen = fromIntegral (ord '(')
closeparen = fromIntegral (ord ')')

371
Annex/AutoMerge.hs Normal file
View file

@ -0,0 +1,371 @@
{- git-annex automatic merge conflict resolution
-
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.AutoMerge
( autoMergeFrom
, autoMergeFrom'
, resolveMerge
, commitResolvedMerge
) where
import Annex.Common
import qualified Annex
import qualified Annex.Queue
import Annex.CatFile
import Annex.Link
import Annex.Content
import qualified Git.LsFiles as LsFiles
import qualified Git.UpdateIndex as UpdateIndex
import qualified Git.Merge
import qualified Git.Ref
import qualified Git
import qualified Git.Branch
import Git.Types (TreeItemType(..), fromTreeItemType)
import Git.FilePath
import Annex.ReplaceFile
import Annex.VariantFile
import qualified Database.Keys
import Annex.InodeSentinal
import Utility.InodeCache
import Utility.FileMode
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
import qualified Utility.RawFilePath as R
{- Merges from a branch into the current branch (which may not exist yet),
- with automatic merge conflict resolution.
-
- Callers should use Git.Branch.changed first, to make sure that
- there are changes from the current branch to the branch being merged in.
-}
autoMergeFrom :: Git.Ref -> Maybe Git.Ref -> [Git.Merge.MergeConfig] -> Git.Branch.CommitMode -> Bool -> Annex Bool
autoMergeFrom branch currbranch mergeconfig commitmode canresolvemerge =
autoMergeFrom' branch currbranch mergeconfig commitmode canresolvemerge resolvemerge
where
resolvemerge old
| canresolvemerge = resolveMerge old branch False
| otherwise = return False
autoMergeFrom' :: Git.Ref -> Maybe Git.Ref -> [Git.Merge.MergeConfig] -> Git.Branch.CommitMode -> Bool -> (Maybe Git.Ref -> Annex Bool) -> Annex Bool
autoMergeFrom' branch currbranch mergeconfig commitmode willresolvemerge toresolvemerge = do
showOutput
case currbranch of
Nothing -> go Nothing
Just b -> go =<< inRepo (Git.Ref.sha b)
where
go old = do
-- merge.directoryRenames=conflict plus automatic
-- merge conflict resolution results in files in a
-- "renamed" directory getting variant names,
-- so is not a great combination. If the user has
-- explicitly set it, use it, but otherwise when
-- merge conflicts will be resolved, override
-- to merge.directoryRenames=false.
overridedirectoryrenames <- if willresolvemerge
then isNothing . mergeDirectoryRenames
<$> Annex.getGitConfig
else pure False
let f r
| overridedirectoryrenames = r
{ Git.gitGlobalOpts =
Param "-c"
: Param "merge.directoryRenames=false"
: Git.gitGlobalOpts r
}
| otherwise = r
r <- inRepo (Git.Merge.merge branch mergeconfig commitmode . f)
<||> (toresolvemerge old <&&> commitResolvedMerge commitmode)
-- Merging can cause new associated files to appear
-- and the smudge filter will add them to the database.
-- To ensure that this process sees those changes,
-- close the database if it was open.
Database.Keys.closeDb
return r
{- Resolves a conflicted merge. It's important that any conflicts be
- resolved in a way that itself avoids later merge conflicts, since
- multiple repositories may be doing this concurrently.
-
- Only merge conflicts where at least one side is an annexed file
- is resolved.
-
- This uses the Keys pointed to by the files to construct new
- filenames. So when both sides modified annexed file foo,
- it will be deleted, and replaced with files foo.variant-A and
- foo.variant-B.
-
- On the other hand, when one side deleted foo, and the other modified it,
- it will be deleted, and the modified version stored as file
- foo.variant-A (or B).
-
- It's also possible that one side has foo as an annexed file, and
- the other as a directory or non-annexed file. The annexed file
- is renamed to resolve the merge, and the other object is preserved as-is.
-
- The merge is resolved in the work tree and files
- staged, to clean up from a conflicted merge that was run in the work
- tree.
-
- This is complicated by needing to support merges run in an overlay
- work tree, in which case the CWD won't be within the work tree.
- In this mode, there is no need to update the work tree at all,
- as the overlay work tree will get deleted.
-
- Unlocked files remain unlocked after merging, and locked files
- remain locked. When the merge conflict is between a locked and unlocked
- file, that otherwise point to the same content, the unlocked mode wins.
- This is done because only unlocked files work in filesystems that don't
- support symlinks.
-
- Returns false when there are no merge conflicts to resolve.
- A git merge can fail for other reasons, and this allows detecting
- such failures.
-}
resolveMerge :: Maybe Git.Ref -> Git.Ref -> Bool -> Annex Bool
resolveMerge us them inoverlay = do
top <- if inoverlay
then pure "."
else fromRepo Git.repoPath
(fs, cleanup) <- inRepo (LsFiles.unmerged [top])
srcmap <- if inoverlay
then pure M.empty
else inodeMap $ pure (map LsFiles.unmergedFile fs, return True)
(mergedks, mergedfs) <- unzip <$> mapM (resolveMerge' srcmap us them inoverlay) fs
let mergedks' = concat mergedks
let mergedfs' = catMaybes mergedfs
let merged = not (null mergedfs')
void $ liftIO cleanup
unless inoverlay $ do
(deleted, cleanup2) <- inRepo (LsFiles.deleted [] [top])
unless (null deleted) $
Annex.Queue.addCommand "rm"
[Param "--quiet", Param "-f", Param "--"]
(map fromRawFilePath deleted)
void $ liftIO cleanup2
when merged $ do
Annex.Queue.flush
unless inoverlay $ do
unstagedmap <- inodeMap $ inRepo $
LsFiles.notInRepo [] False [top]
cleanConflictCruft mergedks' mergedfs' unstagedmap
showLongNote "Merge conflict was automatically resolved; you may want to examine the result."
return merged
resolveMerge' :: InodeMap -> Maybe Git.Ref -> Git.Ref -> Bool -> LsFiles.Unmerged -> Annex ([Key], Maybe FilePath)
resolveMerge' _ Nothing _ _ _ = return ([], Nothing)
resolveMerge' unstagedmap (Just us) them inoverlay u = do
kus <- getkey LsFiles.valUs
kthem <- getkey LsFiles.valThem
case (kus, kthem) of
-- Both sides of conflict are annexed files
(Just keyUs, Just keyThem)
| keyUs /= keyThem -> resolveby [keyUs, keyThem] $ do
makevariantannexlink keyUs LsFiles.valUs
makevariantannexlink keyThem LsFiles.valThem
-- cleanConflictCruft can't handle unlocked
-- files, so delete here.
unless inoverlay $
unless (islocked LsFiles.valUs) $
liftIO $ nukeFile file
| otherwise -> do
-- Only resolve using symlink when both
-- were locked, otherwise use unlocked
-- pointer.
-- In either case, keep original filename.
if islocked LsFiles.valUs && islocked LsFiles.valThem
then makesymlink keyUs file
else makepointer keyUs file (combinedmodes)
return ([keyUs, keyThem], Just file)
-- Our side is annexed file, other side is not.
-- Make the annexed file into a variant file and graft in the
-- other file/directory as it was.
(Just keyUs, Nothing) -> resolveby [keyUs] $ do
graftin them file LsFiles.valThem LsFiles.valThem LsFiles.valUs
makevariantannexlink keyUs LsFiles.valUs
-- Our side is not annexed file, other side is.
(Nothing, Just keyThem) -> resolveby [keyThem] $ do
graftin us file LsFiles.valUs LsFiles.valUs LsFiles.valThem
makevariantannexlink keyThem LsFiles.valThem
-- Neither side is annexed file; cannot resolve.
(Nothing, Nothing) -> return ([], Nothing)
where
file = fromRawFilePath $ LsFiles.unmergedFile u
getkey select =
case select (LsFiles.unmergedSha u) of
Just sha -> catKey sha
Nothing -> pure Nothing
islocked select = select (LsFiles.unmergedTreeItemType u) == Just TreeSymlink
combinedmodes = case catMaybes [ourmode, theirmode] of
[] -> Nothing
l -> Just (combineModes l)
where
ourmode = fromTreeItemType
<$> LsFiles.valUs (LsFiles.unmergedTreeItemType u)
theirmode = fromTreeItemType
<$> LsFiles.valThem (LsFiles.unmergedTreeItemType u)
makevariantannexlink key select
| islocked select = makesymlink key dest
| otherwise = makepointer key dest destmode
where
dest = variantFile file key
destmode = fromTreeItemType <$> select (LsFiles.unmergedTreeItemType u)
stagefile :: FilePath -> Annex FilePath
stagefile f
| inoverlay = (</> f) . fromRawFilePath <$> fromRepo Git.repoPath
| otherwise = pure f
makesymlink key dest = do
l <- calcRepo $ gitAnnexLink dest key
unless inoverlay $ replacewithsymlink dest l
dest' <- toRawFilePath <$> stagefile dest
stageSymlink dest' =<< hashSymlink l
replacewithsymlink dest link = replaceWorkTreeFile dest $
makeGitLink link . toRawFilePath
makepointer key dest destmode = do
unless inoverlay $
unlessM (reuseOldFile unstagedmap key file dest) $
linkFromAnnex key dest destmode >>= \case
LinkAnnexFailed -> liftIO $
writePointerFile (toRawFilePath dest) key destmode
_ -> noop
dest' <- toRawFilePath <$> stagefile dest
stagePointerFile dest' destmode =<< hashPointerFile key
unless inoverlay $
Database.Keys.addAssociatedFile key
=<< inRepo (toTopFilePath (toRawFilePath dest))
{- Stage a graft of a directory or file from a branch
- and update the work tree. -}
graftin b item selectwant selectwant' selectunwant = do
Annex.Queue.addUpdateIndex
=<< fromRepo (UpdateIndex.lsSubTree b item)
-- Update the work tree to reflect the graft.
unless inoverlay $ case (selectwant (LsFiles.unmergedTreeItemType u), selectunwant (LsFiles.unmergedTreeItemType u)) of
-- Symlinks are never left in work tree when
-- there's a conflict with anything else.
-- So, when grafting in a symlink, we must create it:
(Just TreeSymlink, _) -> do
case selectwant' (LsFiles.unmergedSha u) of
Nothing -> noop
Just sha -> do
link <- catSymLinkTarget sha
replacewithsymlink item (fromRawFilePath link)
-- And when grafting in anything else vs a symlink,
-- the work tree already contains what we want.
(_, Just TreeSymlink) -> noop
_ -> ifM (liftIO $ doesDirectoryExist item)
-- a conflict between a file and a directory
-- leaves the directory, so since a directory
-- is there, it must be what was wanted
( noop
-- probably a file with conflict markers is
-- in the work tree; replace with grafted
-- file content
, case selectwant' (LsFiles.unmergedSha u) of
Nothing -> noop
Just sha -> replaceWorkTreeFile item $ \tmp -> do
c <- catObject sha
liftIO $ L.writeFile tmp c
)
resolveby ks a = do
{- Remove conflicted file from index so merge can be resolved. -}
Annex.Queue.addCommand "rm"
[Param "--quiet", Param "-f", Param "--cached", Param "--"] [file]
void a
return (ks, Just file)
{- git-merge moves conflicting files away to files
- named something like f~HEAD or f~branch or just f, but the
- exact name chosen can vary. Once the conflict is resolved,
- this cruft can be deleted. To avoid deleting legitimate
- files that look like this, only delete files that are
- A) not staged in git and
- B) have a name related to the merged files and
- C) are pointers to or have the content of keys that were involved
- in the merge.
-}
cleanConflictCruft :: [Key] -> [FilePath] -> InodeMap -> Annex ()
cleanConflictCruft resolvedks resolvedfs unstagedmap = do
is <- S.fromList . map (inodeCacheToKey Strongly) . concat
<$> mapM Database.Keys.getInodeCaches resolvedks
forM_ (M.toList unstagedmap) $ \(i, f) ->
whenM (matchesresolved is i f) $
liftIO $ nukeFile f
where
fs = S.fromList resolvedfs
ks = S.fromList resolvedks
inks = maybe False (flip S.member ks)
matchesresolved is i f
| S.member f fs || S.member (conflictCruftBase f) fs = anyM id
[ pure $ either (const False) (`S.member` is) i
, inks <$> isAnnexLink (toRawFilePath f)
, inks <$> liftIO (isPointerFile (toRawFilePath f))
]
| otherwise = return False
conflictCruftBase :: FilePath -> FilePath
conflictCruftBase f = reverse $ drop 1 $ dropWhile (/= '~') $ reverse f
{- When possible, reuse an existing file from the srcmap as the
- content of a worktree file in the resolved merge. It must have the
- same name as the origfile, or a name that git would use for conflict
- cruft. And, its inode cache must be a known one for the key. -}
reuseOldFile :: InodeMap -> Key -> FilePath -> FilePath -> Annex Bool
reuseOldFile srcmap key origfile destfile = do
is <- map (inodeCacheToKey Strongly)
<$> Database.Keys.getInodeCaches key
liftIO $ go $ mapMaybe (\i -> M.lookup (Right i) srcmap) is
where
go [] = return False
go (f:fs)
| f == origfile || conflictCruftBase f == origfile =
ifM (doesFileExist f)
( do
renameFile f destfile
return True
, go fs
)
| otherwise = go fs
commitResolvedMerge :: Git.Branch.CommitMode -> Annex Bool
commitResolvedMerge commitmode = inRepo $ Git.Branch.commitCommand commitmode
[ Param "--no-verify"
, Param "-m"
, Param "git-annex automatic merge conflict fix"
]
type InodeMap = M.Map (Either FilePath InodeCacheKey) FilePath
inodeMap :: Annex ([RawFilePath], IO Bool) -> Annex InodeMap
inodeMap getfiles = do
(fs, cleanup) <- getfiles
fsis <- forM fs $ \f -> do
s <- liftIO $ R.getSymbolicLinkStatus f
let f' = fromRawFilePath f
if isSymbolicLink s
then pure $ Just (Left f', f')
else withTSDelta (\d -> liftIO $ toInodeCache d f' s)
>>= return . \case
Just i -> Just (Right (inodeCacheToKey Strongly i), f')
Nothing -> Nothing
void $ liftIO cleanup
return $ M.fromList $ catMaybes fsis

53
Annex/BloomFilter.hs Normal file
View file

@ -0,0 +1,53 @@
{- git-annex bloom filter
-
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.BloomFilter where
import Annex.Common
import qualified Annex
import Utility.Bloom
import Control.Monad.ST
{- A bloom filter capable of holding half a million keys with a
- false positive rate of 1 in 10000000 uses around 16 mb of memory,
- so will easily fit on even my lowest memory systems.
-}
bloomCapacity :: Annex Int
bloomCapacity = fromMaybe 500000 . annexBloomCapacity <$> Annex.getGitConfig
bloomAccuracy :: Annex Int
bloomAccuracy = fromMaybe 10000000 . annexBloomAccuracy <$> Annex.getGitConfig
bloomBitsHashes :: Annex (Int, Int)
bloomBitsHashes = do
capacity <- bloomCapacity
accuracy <- bloomAccuracy
case safeSuggestSizing capacity (1 / fromIntegral accuracy) of
Left e -> do
warning $ "bloomfilter " ++ e ++ "; falling back to sane value"
-- precaulculated value for 500000 (1/10000000)
return (16777216,23)
Right v -> return v
{- Creates a bloom filter, and runs an action to populate it.
-
- The action is passed a callback that it can use to feed values into the
- bloom filter.
-
- Once the action completes, the mutable filter is frozen
- for later use.
-}
genBloomFilter :: Hashable v => ((v -> Annex ()) -> Annex ()) -> Annex (Bloom v)
genBloomFilter populate = do
(numbits, numhashes) <- bloomBitsHashes
bloom <- lift $ newMB (cheapHashes numhashes) numbits
populate $ \v -> lift $ insertMB bloom v
lift $ unsafeFreezeMB bloom
where
lift = liftIO . stToIO
bloomFilter :: [v] -> Bloom v -> [v]
bloomFilter l bloom = filter (\v -> v `notElemB` bloom) l

716
Annex/Branch.hs Normal file
View file

@ -0,0 +1,716 @@
{- management of the git-annex branch
-
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Branch (
fullname,
name,
hasOrigin,
hasSibling,
siblingBranches,
create,
UpdateMade(..),
update,
forceUpdate,
updateTo,
get,
getHistorical,
change,
maybeChange,
commitMessage,
commit,
forceCommit,
getBranch,
files,
rememberTreeish,
performTransitions,
withIndex,
) where
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Char8 as B8
import qualified Data.Set as S
import qualified Data.Map as M
import Data.Function
import Data.Char
import Data.ByteString.Builder
import Control.Concurrent (threadDelay)
import Annex.Common
import Types.BranchState
import Annex.BranchState
import Annex.Journal
import Annex.GitOverlay
import Annex.Tmp
import qualified Git
import qualified Git.Command
import qualified Git.Ref
import qualified Git.RefLog
import qualified Git.Sha
import qualified Git.Branch
import qualified Git.UnionMerge
import qualified Git.UpdateIndex
import qualified Git.Tree
import qualified Git.LsTree
import Git.LsTree (lsTreeParams)
import qualified Git.HashObject
import Annex.HashObject
import Git.Types (Ref(..), fromRef, fromRef', RefDate, TreeItemType(..))
import Git.FilePath
import Annex.CatFile
import Annex.Perms
import Logs
import Logs.Transitions
import Logs.File
import Logs.Trust.Pure
import Logs.Remote.Pure
import Logs.Difference.Pure
import qualified Annex.Queue
import Annex.Branch.Transitions
import qualified Annex
import Annex.Hook
import Utility.Directory.Stream
{- Name of the branch that is used to store git-annex's information. -}
name :: Git.Ref
name = Git.Ref "git-annex"
{- Fully qualified name of the branch. -}
fullname :: Git.Ref
fullname = Git.Ref $ "refs/heads/" <> fromRef' name
{- Branch's name in origin. -}
originname :: Git.Ref
originname = Git.Ref $ "origin/" <> fromRef' name
{- Does origin/git-annex exist? -}
hasOrigin :: Annex Bool
hasOrigin = inRepo $ Git.Ref.exists originname
{- Does the git-annex branch or a sibling foo/git-annex branch exist? -}
hasSibling :: Annex Bool
hasSibling = not . null <$> siblingBranches
{- List of git-annex (shas, branches), including the main one and any
- from remotes. Duplicates are filtered out. -}
siblingBranches :: Annex [(Git.Sha, Git.Branch)]
siblingBranches = inRepo $ Git.Ref.matchingUniq [name]
{- Creates the branch, if it does not already exist. -}
create :: Annex ()
create = void getBranch
{- Returns the ref of the branch, creating it first if necessary. -}
getBranch :: Annex Git.Ref
getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha
where
go True = do
inRepo $ Git.Command.run
[Param "branch", Param $ fromRef name, Param $ fromRef originname]
fromMaybe (error $ "failed to create " ++ fromRef name)
<$> branchsha
go False = withIndex' True $ do
cmode <- annexCommitMode <$> Annex.getGitConfig
inRepo $ Git.Branch.commitAlways cmode "branch created" fullname []
use sha = do
setIndexSha sha
return sha
branchsha = inRepo $ Git.Ref.sha fullname
{- Ensures that the branch and index are up-to-date; should be
- called before data is read from it. Runs only once per git-annex run. -}
update :: Annex BranchState
update = runUpdateOnce $ journalClean <$$> updateTo =<< siblingBranches
{- Forces an update even if one has already been run. -}
forceUpdate :: Annex UpdateMade
forceUpdate = updateTo =<< siblingBranches
data UpdateMade = UpdateMade
{ refsWereMerged :: Bool
, journalClean :: Bool
}
{- Merges the specified Refs into the index, if they have any changes not
- already in it. The Branch names are only used in the commit message;
- it's even possible that the provided Branches have not been updated to
- point to the Refs yet.
-
- The branch is fast-forwarded if possible, otherwise a merge commit is
- made.
-
- Before Refs are merged into the index, it's important to first stage the
- journal into the index. Otherwise, any changes in the journal would
- later get staged, and might overwrite changes made during the merge.
- This is only done if some of the Refs do need to be merged.
-
- Also handles performing any Transitions that have not yet been
- performed, in either the local branch, or the Refs.
-
- Returns True if any refs were merged in, False otherwise.
-}
updateTo :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade
updateTo pairs = ifM (annexMergeAnnexBranches <$> Annex.getGitConfig)
( updateTo' pairs
, return (UpdateMade False False)
)
updateTo' :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade
updateTo' pairs = do
-- ensure branch exists, and get its current ref
branchref <- getBranch
dirty <- journalDirty
ignoredrefs <- getIgnoredRefs
let unignoredrefs = excludeset ignoredrefs pairs
tomerge <- if null unignoredrefs
then return []
else do
mergedrefs <- getMergedRefs
filterM isnewer (excludeset mergedrefs unignoredrefs)
journalclean <- if null tomerge
{- Even when no refs need to be merged, the index
- may still be updated if the branch has gotten ahead
- of the index, or just if the journal is dirty. -}
then ifM (needUpdateIndex branchref)
( lockJournal $ \jl -> do
forceUpdateIndex jl branchref
{- When there are journalled changes
- as well as the branch being updated,
- a commit needs to be done. -}
when dirty $
go branchref dirty [] jl
return True
, if dirty
then ifM (annexAlwaysCommit <$> Annex.getGitConfig)
( do
lockJournal $ go branchref dirty []
return True
, return False
)
else return True
)
else do
lockJournal $ go branchref dirty tomerge
return True
return $ UpdateMade
{ refsWereMerged = not (null tomerge)
, journalClean = journalclean
}
where
excludeset s = filter (\(r, _) -> S.notMember r s)
isnewer (r, _) = inRepo $ Git.Branch.changed fullname r
go branchref dirty tomerge jl = stagejournalwhen dirty jl $ do
let (refs, branches) = unzip tomerge
merge_desc <- if null tomerge
then commitMessage
else return $ "merging " ++
unwords (map Git.Ref.describe branches) ++
" into " ++ fromRef name
localtransitions <- parseTransitionsStrictly "local"
<$> getLocal transitionsLog
unless (null tomerge) $ do
showSideAction merge_desc
mapM_ checkBranchDifferences refs
mergeIndex jl refs
let commitrefs = nub $ fullname:refs
ifM (handleTransitions jl localtransitions commitrefs)
( runAnnexHook postUpdateAnnexHook
, do
ff <- if dirty
then return False
else inRepo $ Git.Branch.fastForward fullname refs
if ff
then updateIndex jl branchref
else commitIndex jl branchref merge_desc commitrefs
)
addMergedRefs tomerge
invalidateCache
stagejournalwhen dirty jl a
| dirty = stageJournal jl a
| otherwise = withIndex a
{- Gets the content of a file, which may be in the journal, or in the index
- (and committed to the branch).
-
- Updates the branch if necessary, to ensure the most up-to-date available
- content is returned.
-
- Returns an empty string if the file doesn't exist yet. -}
get :: RawFilePath -> Annex L.ByteString
get file = getCache file >>= \case
Just content -> return content
Nothing -> do
st <- update
content <- if journalIgnorable st
then getRef fullname file
else getLocal file
setCache file content
return content
{- Like get, but does not merge the branch, so the info returned may not
- reflect changes in remotes.
- (Changing the value this returns, and then merging is always the
- same as using get, and then changing its value.) -}
getLocal :: RawFilePath -> Annex L.ByteString
getLocal file = go =<< getJournalFileStale file
where
go (Just journalcontent) = return journalcontent
go Nothing = getRef fullname file
{- Gets the content of a file as staged in the branch's index. -}
getStaged :: RawFilePath -> Annex L.ByteString
getStaged = getRef indexref
where
-- This makes git cat-file be run with ":file",
-- so it looks at the index.
indexref = Ref ""
getHistorical :: RefDate -> RawFilePath -> Annex L.ByteString
getHistorical date file =
-- This check avoids some ugly error messages when the reflog
-- is empty.
ifM (null <$> inRepo (Git.RefLog.get' [Param (fromRef fullname), Param "-n1"]))
( giveup ("No reflog for " ++ fromRef fullname)
, getRef (Git.Ref.dateRef fullname date) file
)
getRef :: Ref -> RawFilePath -> Annex L.ByteString
getRef ref file = withIndex $ catFile ref file
{- Applies a function to modify the content of a file.
-
- Note that this does not cause the branch to be merged, it only
- modifes the current content of the file on the branch.
-}
change :: Journalable content => RawFilePath -> (L.ByteString -> content) -> Annex ()
change file f = lockJournal $ \jl -> f <$> getLocal file >>= set jl file
{- Applies a function which can modify the content of a file, or not. -}
maybeChange :: Journalable content => RawFilePath -> (L.ByteString -> Maybe content) -> Annex ()
maybeChange file f = lockJournal $ \jl -> do
v <- getLocal file
case f v of
Just jv ->
let b = journalableByteString jv
in when (v /= b) $ set jl file b
_ -> noop
{- Records new content of a file into the journal -}
set :: Journalable content => JournalLocked -> RawFilePath -> content -> Annex ()
set jl f c = do
journalChanged
setJournalFile jl f c
-- Could cache the new content, but it would involve
-- evaluating a Journalable Builder twice, which is not very
-- efficient. Instead, assume that it's not common to need to read
-- a log file immediately after writing it.
invalidateCache
{- Commit message used when making a commit of whatever data has changed
- to the git-annex brach. -}
commitMessage :: Annex String
commitMessage = fromMaybe "update" . annexCommitMessage <$> Annex.getGitConfig
{- Stages the journal, and commits staged changes to the branch. -}
commit :: String -> Annex ()
commit = whenM journalDirty . forceCommit
{- Commits the current index to the branch even without any journalled
- changes. -}
forceCommit :: String -> Annex ()
forceCommit message = lockJournal $ \jl ->
stageJournal jl $ do
ref <- getBranch
commitIndex jl ref message [fullname]
{- Commits the staged changes in the index to the branch.
-
- Ensures that the branch's index file is first updated to merge the state
- of the branch at branchref, before running the commit action. This
- is needed because the branch may have had changes pushed to it, that
- are not yet reflected in the index.
-
- The branchref value can have been obtained using getBranch at any
- previous point, though getting it a long time ago makes the race
- more likely to occur.
-
- Note that changes may be pushed to the branch at any point in time!
- So, there's a race. If the commit is made using the newly pushed tip of
- the branch as its parent, and that ref has not yet been merged into the
- index, then the result is that the commit will revert the pushed
- changes, since they have not been merged into the index. This race
- is detected and another commit made to fix it.
-
- (It's also possible for the branch to be overwritten,
- losing the commit made here. But that's ok; the data is still in the
- index and will get committed again later.)
-}
commitIndex :: JournalLocked -> Git.Ref -> String -> [Git.Ref] -> Annex ()
commitIndex jl branchref message parents = do
showStoringStateAction
commitIndex' jl branchref message message 0 parents
commitIndex' :: JournalLocked -> Git.Ref -> String -> String -> Integer -> [Git.Ref] -> Annex ()
commitIndex' jl branchref message basemessage retrynum parents = do
updateIndex jl branchref
cmode <- annexCommitMode <$> Annex.getGitConfig
committedref <- inRepo $ Git.Branch.commitAlways cmode message fullname parents
setIndexSha committedref
parentrefs <- commitparents <$> catObject committedref
when (racedetected branchref parentrefs) $
fixrace committedref parentrefs
where
-- look for "parent ref" lines and return the refs
commitparents = map (Git.Ref . snd) . filter isparent .
map (toassoc . L.toStrict) . L.split newline
newline = fromIntegral (ord '\n')
toassoc = separate' (== (fromIntegral (ord ' ')))
isparent (k,_) = k == "parent"
{- The race can be detected by checking the commit's
- parent, which will be the newly pushed branch,
- instead of the expected ref that the index was updated to. -}
racedetected expectedref parentrefs
| expectedref `elem` parentrefs = False -- good parent
| otherwise = True -- race!
{- To recover from the race, union merge the lost refs
- into the index. -}
fixrace committedref lostrefs = do
showSideAction "recovering from race"
let retrynum' = retrynum+1
-- small sleep to let any activity that caused
-- the race settle down
liftIO $ threadDelay (100000 + fromInteger retrynum')
mergeIndex jl lostrefs
let racemessage = basemessage ++ " (recovery from race #" ++ show retrynum' ++ "; expected commit parent " ++ show branchref ++ " but found " ++ show lostrefs ++ " )"
commitIndex' jl committedref racemessage basemessage retrynum' [committedref]
{- Lists all files on the branch. including ones in the journal
- that have not been committed yet. There may be duplicates in the list. -}
files :: Annex ([RawFilePath], IO Bool)
files = do
_ <- update
(bfs, cleanup) <- branchFiles
-- ++ forces the content of the first list to be buffered in memory,
-- so use getJournalledFilesStale which should be much smaller most
-- of the time. branchFiles will stream as the list is consumed.
l <- (++)
<$> (map toRawFilePath <$> getJournalledFilesStale)
<*> pure bfs
return (l, cleanup)
{- Files in the branch, not including any from journalled changes,
- and without updating the branch. -}
branchFiles :: Annex ([RawFilePath], IO Bool)
branchFiles = withIndex $ inRepo branchFiles'
branchFiles' :: Git.Repo -> IO ([RawFilePath], IO Bool)
branchFiles' = Git.Command.pipeNullSplit' $
lsTreeParams Git.LsTree.LsTreeRecursive fullname [Param "--name-only"]
{- Populates the branch's index file with the current branch contents.
-
- This is only done when the index doesn't yet exist, and the index
- is used to build up changes to be commited to the branch, and merge
- in changes from other branches.
-}
genIndex :: Git.Repo -> IO ()
genIndex g = Git.UpdateIndex.streamUpdateIndex g
[Git.UpdateIndex.lsTree fullname g]
{- Merges the specified refs into the index.
- Any changes staged in the index will be preserved. -}
mergeIndex :: JournalLocked -> [Git.Ref] -> Annex ()
mergeIndex jl branches = do
prepareModifyIndex jl
hashhandle <- hashObjectHandle
withCatFileHandle $ \ch ->
inRepo $ \g -> Git.UnionMerge.mergeIndex hashhandle ch g branches
{- Removes any stale git lock file, to avoid git falling over when
- updating the index.
-
- Since all modifications of the index are performed inside this module,
- and only when the journal is locked, the fact that the journal has to be
- locked when this is called ensures that no other process is currently
- modifying the index. So any index.lock file must be stale, caused
- by git running when the system crashed, or the repository's disk was
- removed, etc.
-}
prepareModifyIndex :: JournalLocked -> Annex ()
prepareModifyIndex _jl = do
index <- fromRepo gitAnnexIndex
void $ liftIO $ tryIO $ removeFile $ index ++ ".lock"
{- Runs an action using the branch's index file. -}
withIndex :: Annex a -> Annex a
withIndex = withIndex' False
withIndex' :: Bool -> Annex a -> Annex a
withIndex' bootstrapping a = withIndexFile AnnexIndexFile $ \f -> do
checkIndexOnce $ unlessM (liftIO $ doesFileExist f) $ do
unless bootstrapping create
createAnnexDirectory $ takeDirectory f
unless bootstrapping $ inRepo genIndex
a
{- Updates the branch's index to reflect the current contents of the branch.
- Any changes staged in the index will be preserved.
-
- Compares the ref stored in the lock file with the current
- ref of the branch to see if an update is needed.
-}
updateIndex :: JournalLocked -> Git.Ref -> Annex ()
updateIndex jl branchref = whenM (needUpdateIndex branchref) $
forceUpdateIndex jl branchref
forceUpdateIndex :: JournalLocked -> Git.Ref -> Annex ()
forceUpdateIndex jl branchref = do
withIndex $ mergeIndex jl [fullname]
setIndexSha branchref
{- Checks if the index needs to be updated. -}
needUpdateIndex :: Git.Ref -> Annex Bool
needUpdateIndex branchref = do
f <- fromRepo gitAnnexIndexStatus
committedref <- Git.Ref . firstLine' <$>
liftIO (catchDefaultIO mempty $ B.readFile f)
return (committedref /= branchref)
{- Record that the branch's index has been updated to correspond to a
- given ref of the branch. -}
setIndexSha :: Git.Ref -> Annex ()
setIndexSha ref = do
f <- fromRepo gitAnnexIndexStatus
writeLogFile f $ fromRef ref ++ "\n"
runAnnexHook postUpdateAnnexHook
{- Stages the journal into the index, and runs an action that
- commits the index to the branch. Note that the action is run
- inside withIndex so will automatically use the branch's index.
-
- Before staging, this removes any existing git index file lock.
- This is safe to do because stageJournal is the only thing that
- modifies this index file, and only one can run at a time, because
- the journal is locked. So any existing git index file lock must be
- stale, and the journal must contain any data that was in the process
- of being written to the index file when it crashed.
-}
stageJournal :: JournalLocked -> Annex () -> Annex ()
stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do
prepareModifyIndex jl
g <- gitRepo
let dir = gitAnnexJournalDir g
(jlogf, jlogh) <- openjlog tmpdir
h <- hashObjectHandle
withJournalHandle $ \jh ->
Git.UpdateIndex.streamUpdateIndex g
[genstream dir h jh jlogh]
commitindex
liftIO $ cleanup dir jlogh jlogf
where
genstream dir h jh jlogh streamer = readDirectory jh >>= \case
Nothing -> return ()
Just file -> do
unless (dirCruft file) $ do
let path = dir </> file
sha <- Git.HashObject.hashFile h path
hPutStrLn jlogh file
streamer $ Git.UpdateIndex.updateIndexLine
sha TreeFile (asTopFilePath $ fileJournal $ toRawFilePath file)
genstream dir h jh jlogh streamer
-- Clean up the staged files, as listed in the temp log file.
-- The temp file is used to avoid needing to buffer all the
-- filenames in memory.
cleanup dir jlogh jlogf = do
hFlush jlogh
hSeek jlogh AbsoluteSeek 0
stagedfs <- lines <$> hGetContents jlogh
mapM_ (removeFile . (dir </>)) stagedfs
hClose jlogh
nukeFile jlogf
openjlog tmpdir = liftIO $ openTempFile tmpdir "jlog"
{- This is run after the refs have been merged into the index,
- but before the result is committed to the branch.
- (Which is why it's passed the contents of the local branches's
- transition log before that merge took place.)
-
- When the refs contain transitions that have not yet been done locally,
- the transitions are performed on the index, and a new branch
- is created from the result.
-
- When there are transitions recorded locally that have not been done
- to the remote refs, the transitions are performed in the index,
- and committed to the existing branch. In this case, the untransitioned
- remote refs cannot be merged into the branch (since transitions
- throw away history), so they are added to the list of refs to ignore,
- to avoid re-merging content from them again.
-}
handleTransitions :: JournalLocked -> Transitions -> [Git.Ref] -> Annex Bool
handleTransitions jl localts refs = do
m <- M.fromList <$> mapM getreftransition refs
let remotets = M.elems m
if all (localts ==) remotets
then return False
else do
let allts = combineTransitions (localts:remotets)
let (transitionedrefs, untransitionedrefs) =
partition (\r -> M.lookup r m == Just allts) refs
performTransitionsLocked jl allts (localts /= allts) transitionedrefs
ignoreRefs untransitionedrefs
return True
where
getreftransition ref = do
ts <- parseTransitionsStrictly "remote"
<$> catFile ref transitionsLog
return (ref, ts)
{- Performs the specified transitions on the contents of the index file,
- commits it to the branch, or creates a new branch.
-}
performTransitions :: Transitions -> Bool -> [Ref] -> Annex ()
performTransitions ts neednewlocalbranch transitionedrefs = lockJournal $ \jl ->
performTransitionsLocked jl ts neednewlocalbranch transitionedrefs
performTransitionsLocked :: JournalLocked -> Transitions -> Bool -> [Ref] -> Annex ()
performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do
-- For simplicity & speed, we're going to use the Annex.Queue to
-- update the git-annex branch, while it usually holds changes
-- for the head branch. Flush any such changes.
Annex.Queue.flush
-- Stop any running git cat-files, to ensure that the
-- getStaged calls below use the current index, and not some older
-- one.
catFileStop
withIndex $ do
prepareModifyIndex jl
run $ mapMaybe getTransitionCalculator tlist
Annex.Queue.flush
if neednewlocalbranch
then do
cmode <- annexCommitMode <$> Annex.getGitConfig
committedref <- inRepo $ Git.Branch.commitAlways cmode message fullname transitionedrefs
setIndexSha committedref
else do
ref <- getBranch
commitIndex jl ref message (nub $ fullname:transitionedrefs)
where
message
| neednewlocalbranch && null transitionedrefs = "new branch for transition " ++ tdesc
| otherwise = "continuing transition " ++ tdesc
tdesc = show $ map describeTransition tlist
tlist = knownTransitionList ts
{- The changes to make to the branch are calculated and applied to
- the branch directly, rather than going through the journal,
- which would be innefficient. (And the journal is not designed
- to hold changes to every file in the branch at once.)
-
- When a file in the branch is changed by transition code,
- its new content is remembered and fed into the code for subsequent
- transitions.
-}
run [] = noop
run changers = do
config <- Annex.getGitConfig
trustmap <- calcTrustMap <$> getStaged trustLog
remoteconfigmap <- calcRemoteConfigMap <$> getStaged remoteLog
-- partially apply, improves performance
let changers' = map (\c -> c config trustmap remoteconfigmap) changers
(fs, cleanup) <- branchFiles
forM_ fs $ \f -> do
content <- getStaged f
apply changers' f content
liftIO $ void cleanup
apply [] _ _ = return ()
apply (changer:rest) file content = case changer file content of
PreserveFile -> apply rest file content
ChangeFile builder -> do
let content' = toLazyByteString builder
if L.null content'
then do
Annex.Queue.addUpdateIndex
=<< inRepo (Git.UpdateIndex.unstageFile (fromRawFilePath file))
-- File is deleted; can't run any other
-- transitions on it.
return ()
else do
sha <- hashBlob content'
Annex.Queue.addUpdateIndex $ Git.UpdateIndex.pureStreamer $
Git.UpdateIndex.updateIndexLine sha TreeFile (asTopFilePath file)
apply rest file content'
checkBranchDifferences :: Git.Ref -> Annex ()
checkBranchDifferences ref = do
theirdiffs <- allDifferences . parseDifferencesLog
<$> catFile ref differenceLog
mydiffs <- annexDifferences <$> Annex.getGitConfig
when (theirdiffs /= mydiffs) $
giveup "Remote repository is tuned in incompatible way; cannot be merged with local repository."
ignoreRefs :: [Git.Sha] -> Annex ()
ignoreRefs rs = do
old <- getIgnoredRefs
let s = S.unions [old, S.fromList rs]
f <- fromRepo gitAnnexIgnoredRefs
writeLogFile f $
unlines $ map fromRef $ S.elems s
getIgnoredRefs :: Annex (S.Set Git.Sha)
getIgnoredRefs =
S.fromList . mapMaybe Git.Sha.extractSha . B8.lines <$> content
where
content = do
f <- fromRepo gitAnnexIgnoredRefs
liftIO $ catchDefaultIO mempty $ B.readFile f
addMergedRefs :: [(Git.Sha, Git.Branch)] -> Annex ()
addMergedRefs [] = return ()
addMergedRefs new = do
old <- getMergedRefs'
-- Keep only the newest sha for each branch.
let l = nubBy ((==) `on` snd) (new ++ old)
f <- fromRepo gitAnnexMergedRefs
writeLogFile f $
unlines $ map (\(s, b) -> fromRef s ++ '\t' : fromRef b) l
getMergedRefs :: Annex (S.Set Git.Sha)
getMergedRefs = S.fromList . map fst <$> getMergedRefs'
getMergedRefs' :: Annex [(Git.Sha, Git.Branch)]
getMergedRefs' = do
f <- fromRepo gitAnnexMergedRefs
s <- liftIO $ catchDefaultIO mempty $ B.readFile f
return $ map parse $ B8.lines s
where
parse l =
let (s, b) = separate' (== (fromIntegral (ord '\t'))) l
in (Ref s, Ref b)
{- Grafts a treeish into the branch at the specified location,
- and then removes it. This ensures that the treeish won't get garbage
- collected, and will always be available as long as the git-annex branch
- is available. -}
rememberTreeish :: Git.Ref -> TopFilePath -> Annex ()
rememberTreeish treeish graftpoint = lockJournal $ \jl -> do
branchref <- getBranch
updateIndex jl branchref
origtree <- fromMaybe (giveup "unable to determine git-annex branch tree") <$>
inRepo (Git.Ref.tree branchref)
addedt <- inRepo $ Git.Tree.graftTree treeish graftpoint origtree
cmode <- annexCommitMode <$> Annex.getGitConfig
c <- inRepo $ Git.Branch.commitTree cmode
"graft" [branchref] addedt
c' <- inRepo $ Git.Branch.commitTree cmode
"graft cleanup" [c] origtree
inRepo $ Git.Branch.update' fullname c'
-- The tree in c' is the same as the tree in branchref,
-- and the index was updated to that above, so it's safe to
-- say that the index contains c'.
setIndexSha c'

114
Annex/Branch/Transitions.hs Normal file
View file

@ -0,0 +1,114 @@
{- git-annex branch transitions
-
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Branch.Transitions (
FileTransition(..),
getTransitionCalculator
) where
import Common
import Logs
import Logs.Transitions
import qualified Logs.UUIDBased as UUIDBased
import qualified Logs.Presence.Pure as Presence
import qualified Logs.Chunk.Pure as Chunk
import qualified Logs.MetaData.Pure as MetaData
import qualified Logs.Remote.Pure as Remote
import Types.TrustLevel
import Types.UUID
import Types.MetaData
import Types.Remote
import Types.GitConfig (GitConfig)
import Types.ProposedAccepted
import Annex.SpecialRemote.Config
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
data FileTransition
= ChangeFile Builder
| PreserveFile
type TransitionCalculator = GitConfig -> TrustMap -> M.Map UUID RemoteConfig -> RawFilePath -> L.ByteString -> FileTransition
getTransitionCalculator :: Transition -> Maybe TransitionCalculator
getTransitionCalculator ForgetGitHistory = Nothing
getTransitionCalculator ForgetDeadRemotes = Just dropDead
-- Removes data about all dead repos.
--
-- The trust log is not changed, because other, unmerged clones
-- may contain other data about the dead repos. So we need to rememebr
-- which are dead to later remove that.
--
-- When the remote log contains a sameas-uuid pointing to a dead uuid,
-- the uuid of that remote configuration is also effectively dead,
-- though not in the trust log. There may be per-remote state stored using
-- the latter uuid, that also needs to be removed. The sameas-uuid
-- is not removed from the remote log, for the same reason the trust log
-- is not changed.
dropDead :: TransitionCalculator
dropDead gc trustmap remoteconfigmap f content = case getLogVariety gc f of
Just OldUUIDBasedLog
| f == trustLog -> PreserveFile
| f == remoteLog -> ChangeFile $
Remote.buildRemoteConfigLog $
M.mapWithKey minimizesameasdead $
dropDeadFromMapLog trustmap id $
Remote.parseRemoteConfigLog content
| otherwise -> ChangeFile $
UUIDBased.buildLogOld byteString $
dropDeadFromMapLog trustmap' id $
UUIDBased.parseLogOld A.takeByteString content
Just NewUUIDBasedLog -> ChangeFile $
UUIDBased.buildLogNew byteString $
dropDeadFromMapLog trustmap' id $
UUIDBased.parseLogNew A.takeByteString content
Just (ChunkLog _) -> ChangeFile $
Chunk.buildLog $ dropDeadFromMapLog trustmap' fst $
Chunk.parseLog content
Just (PresenceLog _) -> ChangeFile $ Presence.buildLog $
Presence.compactLog $
dropDeadFromPresenceLog trustmap' $
Presence.parseLog content
Just RemoteMetaDataLog -> ChangeFile $ MetaData.buildLog $
dropDeadFromRemoteMetaDataLog trustmap' $
MetaData.simplifyLog $ MetaData.parseLog content
Just OtherLog -> PreserveFile
Nothing -> PreserveFile
where
trustmap' = trustmap `M.union`
M.map (const DeadTrusted) (M.filter sameasdead remoteconfigmap)
sameasdead cm =
case toUUID . fromProposedAccepted <$> M.lookup sameasUUIDField cm of
Nothing -> False
Just u' -> M.lookup u' trustmap == Just DeadTrusted
minimizesameasdead u l
| M.lookup u trustmap' == Just DeadTrusted =
l { UUIDBased.value = minimizesameasdead' (UUIDBased.value l) }
| otherwise = l
minimizesameasdead' c = M.restrictKeys c (S.singleton sameasUUIDField)
dropDeadFromMapLog :: TrustMap -> (k -> UUID) -> M.Map k v -> M.Map k v
dropDeadFromMapLog trustmap getuuid =
M.filterWithKey $ \k _v -> notDead trustmap getuuid k
{- Presence logs can contain UUIDs or other values. Any line that matches
- a dead uuid is dropped; any other values are passed through. -}
dropDeadFromPresenceLog :: TrustMap -> [Presence.LogLine] -> [Presence.LogLine]
dropDeadFromPresenceLog trustmap =
filter $ notDead trustmap (toUUID . Presence.fromLogInfo . Presence.info)
dropDeadFromRemoteMetaDataLog :: TrustMap -> MetaData.Log MetaData -> MetaData.Log MetaData
dropDeadFromRemoteMetaDataLog trustmap =
MetaData.filterOutEmpty . MetaData.filterRemoteMetaData (notDead trustmap id)
notDead :: TrustMap -> (v -> UUID) -> v -> Bool
notDead trustmap a v = M.findWithDefault def (a v) trustmap /= DeadTrusted

107
Annex/BranchState.hs Normal file
View file

@ -0,0 +1,107 @@
{- git-annex branch state management
-
- Runtime state about the git-annex branch, and a small cache.
-
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.BranchState where
import Annex.Common
import Types.BranchState
import qualified Annex
import Logs
import qualified Data.ByteString.Lazy as L
getState :: Annex BranchState
getState = Annex.getState Annex.branchstate
changeState :: (BranchState -> BranchState) -> Annex ()
changeState changer = Annex.changeState $ \s ->
s { Annex.branchstate = changer (Annex.branchstate s) }
{- Runs an action to check that the index file exists, if it's not been
- checked before in this run of git-annex. -}
checkIndexOnce :: Annex () -> Annex ()
checkIndexOnce a = unlessM (indexChecked <$> getState) $ do
a
changeState $ \s -> s { indexChecked = True }
{- Runs an action to update the branch, if it's not been updated before
- in this run of git-annex.
-
- The action should return True if anything that was in the journal
- before got staged (or if the journal was empty). That lets an opmisation
- be done: The journal then does not need to be checked going forward,
- until new information gets written to it.
-}
runUpdateOnce :: Annex Bool -> Annex BranchState
runUpdateOnce a = do
st <- getState
if branchUpdated st
then return st
else do
journalstaged <- a
let stf = \st' -> st'
{ branchUpdated = True
, journalIgnorable = journalstaged
&& not (needInteractiveAccess st')
}
changeState stf
return (stf st)
{- Avoids updating the branch. A useful optimisation when the branch
- is known to have not changed, or git-annex won't be relying on info
- from it. -}
disableUpdate :: Annex ()
disableUpdate = changeState $ \s -> s { branchUpdated = True }
{- Called when a change is made to the journal. -}
journalChanged :: Annex ()
journalChanged = do
-- Optimisation: Typically journalIgnorable will already be True
-- (when one thing gets journalled, often other things do to),
-- so avoid an unnecessary write to the MVar that changeState
-- would do.
--
-- This assumes that another thread is not changing journalIgnorable
-- at the same time, but since runUpdateOnce is the only
-- thing that changes it, and it only runs once, that
-- should not happen.
st <- getState
when (journalIgnorable st) $
changeState $ \st' -> st' { journalIgnorable = False }
{- When git-annex is somehow interactive, eg in --batch mode,
- and needs to always notice changes made to the journal by other
- processes, this disables optimisations that avoid normally reading the
- journal.
-
- It also avoids using the cache, so changes committed by other processes
- will be seen.
-}
enableInteractiveBranchAccess :: Annex ()
enableInteractiveBranchAccess = changeState $
\s -> s { needInteractiveAccess = True }
setCache :: RawFilePath -> L.ByteString -> Annex ()
setCache file content = changeState $ \s -> s
{ cachedFileContents = add (cachedFileContents s) }
where
add l
| length l < logFilesToCache = (file, content) : l
| otherwise = (file, content) : Prelude.init l
getCache :: RawFilePath -> Annex (Maybe L.ByteString)
getCache file = (\st -> go (cachedFileContents st) st) <$> getState
where
go [] _ = Nothing
go ((f,c):rest) state
| f == file && not (needInteractiveAccess state) = Just c
| otherwise = go rest state
invalidateCache :: Annex ()
invalidateCache = changeState $ \s -> s { cachedFileContents = [] }

196
Annex/CatFile.hs Normal file
View file

@ -0,0 +1,196 @@
{- git cat-file interface, with handle automatically stored in the Annex monad
-
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
module Annex.CatFile (
catFile,
catFileDetails,
catObject,
catTree,
catCommit,
catObjectDetails,
withCatFileHandle,
catObjectMetaData,
catFileStop,
catKey,
catKey',
catSymLinkTarget,
catKeyFile,
catKeyFileHEAD,
catKeyFileHidden,
catObjectMetaDataHidden,
) where
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import System.PosixCompat.Types
import Control.Concurrent.STM
import Annex.Common
import qualified Git
import qualified Git.CatFile
import qualified Annex
import Git.Types
import Git.FilePath
import Git.Index
import qualified Git.Ref
import Annex.Link
import Annex.CurrentBranch
import Types.AdjustedBranch
import Types.CatFileHandles
import Utility.ResourcePool
catFile :: Git.Branch -> RawFilePath -> Annex L.ByteString
catFile branch file = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catFile h branch file
catFileDetails :: Git.Branch -> RawFilePath -> Annex (Maybe (L.ByteString, Sha, ObjectType))
catFileDetails branch file = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catFileDetails h branch file
catObject :: Git.Ref -> Annex L.ByteString
catObject ref = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catObject h ref
catObjectMetaData :: Git.Ref -> Annex (Maybe (Sha, Integer, ObjectType))
catObjectMetaData ref = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catObjectMetaData h ref
catTree :: Git.Ref -> Annex [(FilePath, FileMode)]
catTree ref = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catTree h ref
catCommit :: Git.Ref -> Annex (Maybe Commit)
catCommit ref = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catCommit h ref
catObjectDetails :: Git.Ref -> Annex (Maybe (L.ByteString, Sha, ObjectType))
catObjectDetails ref = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catObjectDetails h ref
{- There can be multiple index files, and a different cat-file is needed
- for each. That is selected by setting GIT_INDEX_FILE in the gitEnv
- before running this. -}
withCatFileHandle :: (Git.CatFile.CatFileHandle -> Annex a) -> Annex a
withCatFileHandle a = do
cfh <- Annex.getState Annex.catfilehandles
indexfile <- fromMaybe "" . maybe Nothing (lookup indexEnv)
<$> fromRepo gitEnv
p <- case cfh of
CatFileHandlesNonConcurrent m -> case M.lookup indexfile m of
Just p -> return p
Nothing -> do
p <- mkResourcePoolNonConcurrent startcatfile
let !m' = M.insert indexfile p m
Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent m' }
return p
CatFileHandlesPool tm -> do
m <- liftIO $ atomically $ takeTMVar tm
case M.lookup indexfile m of
Just p -> do
liftIO $ atomically $ putTMVar tm m
return p
Nothing -> do
p <- mkResourcePool maxCatFiles
let !m' = M.insert indexfile p m
liftIO $ atomically $ putTMVar tm m'
return p
withResourcePool p startcatfile a
where
startcatfile = inRepo Git.CatFile.catFileStart
{- A lot of git cat-file processes are unlikely to improve concurrency,
- because a query to them takes only a little bit of CPU, and tends to be
- bottlenecked on disk. Also, they each open a number of files, so
- using too many might run out of file handles. So, only start a maximum
- of 2.
-
- Note that each different index file gets its own pool of cat-files;
- this is the size of each pool. In all, 4 times this many cat-files
- may end up running.
-}
maxCatFiles :: Int
maxCatFiles = 2
{- Stops all running cat-files. Should only be run when it's known that
- nothing is using the handles, eg at shutdown. -}
catFileStop :: Annex ()
catFileStop = do
cfh <- Annex.getState Annex.catfilehandles
m <- case cfh of
CatFileHandlesNonConcurrent m -> do
Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent M.empty }
return m
CatFileHandlesPool tm ->
liftIO $ atomically $ swapTMVar tm M.empty
liftIO $ forM_ (M.elems m) $ \p ->
freeResourcePool p Git.CatFile.catFileStop
{- From ref to a symlink or a pointer file, get the key. -}
catKey :: Ref -> Annex (Maybe Key)
catKey ref = catKey' ref =<< catObjectMetaData ref
catKey' :: Ref -> Maybe (Sha, Integer, ObjectType) -> Annex (Maybe Key)
catKey' ref (Just (_, sz, _))
-- Avoid catting large files, that cannot be symlinks or
-- pointer files, which would require buffering their
-- content in memory, as well as a lot of IO.
| sz <= maxPointerSz =
parseLinkTargetOrPointer . L.toStrict <$> catObject ref
catKey' _ _ = return Nothing
{- Gets a symlink target. -}
catSymLinkTarget :: Sha -> Annex RawFilePath
catSymLinkTarget sha = fromInternalGitPath . L.toStrict <$> get
where
-- Avoid buffering the whole file content, which might be large.
-- 8192 is enough if it really is a symlink.
get = L.take 8192 <$> catObject sha
{- From a file in the repository back to the key.
-
- Ideally, this should reflect the key that's staged in the index,
- not the key that's committed to HEAD. Unfortunately, git cat-file
- does not refresh the index file after it's started up, so things
- newly staged in the index won't show up. It does, however, notice
- when branches change.
-
- For command-line git-annex use, that doesn't matter. It's perfectly
- reasonable for things staged in the index after the currently running
- git-annex process to not be noticed by it. However, we do want to see
- what's in the index, since it may have uncommitted changes not in HEAD
-
- For the assistant, this is much more of a problem, since it commits
- files and then needs to be able to immediately look up their keys.
- OTOH, the assistant doesn't keep changes staged in the index for very
- long at all before committing them -- and it won't look at the keys
- of files until after committing them.
-
- So, this gets info from the index, unless running as a daemon.
-}
catKeyFile :: RawFilePath -> Annex (Maybe Key)
catKeyFile f = ifM (Annex.getState Annex.daemon)
( catKeyFileHEAD f
, catKey $ Git.Ref.fileRef f
)
catKeyFileHEAD :: RawFilePath -> Annex (Maybe Key)
catKeyFileHEAD f = catKey $ Git.Ref.fileFromRef Git.Ref.headRef f
{- Look in the original branch from whence an adjusted branch is based
- to find the file. But only when the adjustment hides some files. -}
catKeyFileHidden :: RawFilePath -> CurrBranch -> Annex (Maybe Key)
catKeyFileHidden = hiddenCat catKey
catObjectMetaDataHidden :: RawFilePath -> CurrBranch -> Annex (Maybe (Sha, Integer, ObjectType))
catObjectMetaDataHidden = hiddenCat catObjectMetaData
hiddenCat :: (Ref -> Annex (Maybe a)) -> RawFilePath -> CurrBranch -> Annex (Maybe a)
hiddenCat a f (Just origbranch, Just adj)
| adjustmentHidesFiles adj = a (Git.Ref.fileFromRef origbranch f)
hiddenCat _ _ _ = return Nothing

105
Annex/ChangedRefs.hs Normal file
View file

@ -0,0 +1,105 @@
{- Waiting for changed git refs
-
- Copyright 2014-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.ChangedRefs
( ChangedRefs(..)
, ChangedRefsHandle
, waitChangedRefs
, drainChangedRefs
, stopWatchingChangedRefs
, watchChangedRefs
) where
import Annex.Common
import Utility.DirWatcher
import Utility.DirWatcher.Types
import qualified Git
import Git.Sha
import qualified Utility.SimpleProtocol as Proto
import Control.Concurrent
import Control.Concurrent.STM
import Control.Concurrent.STM.TBMChan
import qualified Data.ByteString as S
newtype ChangedRefs = ChangedRefs [Git.Ref]
deriving (Show)
instance Proto.Serializable ChangedRefs where
serialize (ChangedRefs l) = unwords $ map Git.fromRef l
deserialize = Just . ChangedRefs . map (Git.Ref . encodeBS) . words
data ChangedRefsHandle = ChangedRefsHandle DirWatcherHandle (TBMChan Git.Sha)
-- | Wait for one or more git refs to change.
--
-- When possible, coalesce ref writes that occur closely together
-- in time. Delay up to 0.05 seconds to get more ref writes.
waitChangedRefs :: ChangedRefsHandle -> IO ChangedRefs
waitChangedRefs (ChangedRefsHandle _ chan) =
atomically (readTBMChan chan) >>= \case
Nothing -> return $ ChangedRefs []
Just r -> do
threadDelay 50000
rs <- atomically $ loop []
return $ ChangedRefs (r:rs)
where
loop rs = tryReadTBMChan chan >>= \case
Just (Just r) -> loop (r:rs)
_ -> return rs
-- | Remove any changes that might be buffered in the channel,
-- without waiting for any new changes.
drainChangedRefs :: ChangedRefsHandle -> IO ()
drainChangedRefs (ChangedRefsHandle _ chan) = atomically go
where
go = tryReadTBMChan chan >>= \case
Just (Just _) -> go
_ -> return ()
stopWatchingChangedRefs :: ChangedRefsHandle -> IO ()
stopWatchingChangedRefs h@(ChangedRefsHandle wh chan) = do
stopWatchDir wh
atomically $ closeTBMChan chan
drainChangedRefs h
watchChangedRefs :: Annex (Maybe ChangedRefsHandle)
watchChangedRefs = do
-- This channel is used to accumulate notifications,
-- because the DirWatcher might have multiple threads that find
-- changes at the same time. It is bounded to allow a watcher
-- to be started once and reused, without too many changes being
-- buffered in memory.
chan <- liftIO $ newTBMChanIO 100
g <- gitRepo
let gittop = fromRawFilePath (Git.localGitDir g)
let refdir = gittop </> "refs"
liftIO $ createDirectoryUnder gittop refdir
let notifyhook = Just $ notifyHook chan
let hooks = mkWatchHooks
{ addHook = notifyhook
, modifyHook = notifyhook
}
if canWatch
then do
h <- liftIO $ watchDir refdir (const False) True hooks id
return $ Just $ ChangedRefsHandle h chan
else return Nothing
notifyHook :: TBMChan Git.Sha -> FilePath -> Maybe FileStatus -> IO ()
notifyHook chan reffile _
| ".lock" `isSuffixOf` reffile = noop
| otherwise = void $ do
sha <- catchDefaultIO Nothing $
extractSha <$> S.readFile reffile
-- When the channel is full, there is probably no reader
-- running, or ref changes have been occuring very fast,
-- so it's ok to not write the change to it.
maybe noop (void . atomically . tryWriteTBMChan chan) sha

64
Annex/CheckAttr.hs Normal file
View file

@ -0,0 +1,64 @@
{- git check-attr interface, with handle automatically stored in the Annex monad
-
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.CheckAttr (
checkAttr,
checkAttrStop,
mkConcurrentCheckAttrHandle,
) where
import Annex.Common
import qualified Git.CheckAttr as Git
import qualified Annex
import Utility.ResourcePool
import Types.Concurrency
import Annex.Concurrent.Utility
{- All gitattributes used by git-annex. -}
annexAttrs :: [Git.Attr]
annexAttrs =
[ "annex.backend"
, "annex.numcopies"
, "annex.largefiles"
]
checkAttr :: Git.Attr -> FilePath -> Annex String
checkAttr attr file = withCheckAttrHandle $ \h ->
liftIO $ Git.checkAttr h attr file
withCheckAttrHandle :: (Git.CheckAttrHandle -> Annex a) -> Annex a
withCheckAttrHandle a =
maybe mkpool go =<< Annex.getState Annex.checkattrhandle
where
go p = withResourcePool p start a
start = inRepo $ Git.checkAttrStart annexAttrs
mkpool = do
-- This only runs in non-concurrent code paths;
-- a concurrent pool is set up earlier when needed.
p <- mkResourcePoolNonConcurrent start
Annex.changeState $ \s -> s { Annex.checkattrhandle = Just p }
go p
mkConcurrentCheckAttrHandle :: Concurrency -> Annex (ResourcePool Git.CheckAttrHandle)
mkConcurrentCheckAttrHandle c =
Annex.getState Annex.checkattrhandle >>= \case
Just p@(ResourcePool {}) -> return p
_ -> mkResourcePool =<< liftIO (maxCheckAttrs c)
{- git check-attr is typically CPU bound, and is not likely to be the main
- bottleneck for any command. So limit to the number of CPU cores, maximum,
- while respecting the -Jn value.
-}
maxCheckAttrs :: Concurrency -> IO Int
maxCheckAttrs = concurrencyUpToCpus
checkAttrStop :: Annex ()
checkAttrStop = maybe noop stop =<< Annex.getState Annex.checkattrhandle
where
stop p = do
liftIO $ freeResourcePool p Git.checkAttrStop
Annex.changeState $ \s -> s { Annex.checkattrhandle = Nothing }

64
Annex/CheckIgnore.hs Normal file
View file

@ -0,0 +1,64 @@
{- git check-ignore interface, with handle automatically stored in
- the Annex monad
-
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.CheckIgnore (
CheckGitIgnore(..),
checkIgnored,
checkIgnoreStop,
mkConcurrentCheckIgnoreHandle,
) where
import Annex.Common
import qualified Git.CheckIgnore as Git
import qualified Annex
import Utility.ResourcePool
import Types.Concurrency
import Annex.Concurrent.Utility
newtype CheckGitIgnore = CheckGitIgnore Bool
checkIgnored :: CheckGitIgnore -> FilePath -> Annex Bool
checkIgnored (CheckGitIgnore False) _ = pure False
checkIgnored (CheckGitIgnore True) file =
ifM (Annex.getState Annex.force)
( pure False
, withCheckIgnoreHandle $ \h -> liftIO $ Git.checkIgnored h file
)
withCheckIgnoreHandle :: (Git.CheckIgnoreHandle -> Annex a) -> Annex a
withCheckIgnoreHandle a =
maybe mkpool go =<< Annex.getState Annex.checkignorehandle
where
go p = withResourcePool p start a
start = inRepo Git.checkIgnoreStart
mkpool = do
-- This only runs in non-concurrent code paths;
-- a concurrent pool is set up earlier when needed.
p <- mkResourcePoolNonConcurrent start
Annex.changeState $ \s -> s { Annex.checkignorehandle = Just p }
go p
mkConcurrentCheckIgnoreHandle :: Concurrency -> Annex (ResourcePool Git.CheckIgnoreHandle)
mkConcurrentCheckIgnoreHandle c =
Annex.getState Annex.checkignorehandle >>= \case
Just p@(ResourcePool {}) -> return p
_ -> mkResourcePool =<< liftIO (maxCheckIgnores c)
{- git check-ignore is typically CPU bound, and is not likely to be the main
- bottleneck for any command. So limit to the number of CPU cores, maximum,
- while respecting the -Jn value.
-}
maxCheckIgnores :: Concurrency -> IO Int
maxCheckIgnores = concurrencyUpToCpus
checkIgnoreStop :: Annex ()
checkIgnoreStop = maybe noop stop =<< Annex.getState Annex.checkignorehandle
where
stop p = do
liftIO $ freeResourcePool p Git.checkIgnoreStop
Annex.changeState $ \s -> s { Annex.checkignorehandle = Nothing }

14
Annex/Common.hs Normal file
View file

@ -0,0 +1,14 @@
{-# LANGUAGE CPP #-}
module Annex.Common (module X) where
import Common as X
import Types as X
import Key as X
import Types.UUID as X
import Annex as X (gitRepo, inRepo, fromRepo, calcRepo)
import Annex.Locations as X
import Messages as X
#ifndef mingw32_HOST_OS
import System.Posix.IO as X hiding (createPipe)
#endif

217
Annex/Concurrent.hs Normal file
View file

@ -0,0 +1,217 @@
{- git-annex concurrent state
-
- Copyright 2015-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Concurrent (
module Annex.Concurrent,
module Annex.Concurrent.Utility
) where
import Annex
import Annex.Common
import Annex.Concurrent.Utility
import qualified Annex.Queue
import Annex.Action
import Types.Concurrency
import Types.WorkerPool
import Types.CatFileHandles
import Annex.CheckAttr
import Annex.CheckIgnore
import Remote.List
import Control.Concurrent
import Control.Concurrent.STM
import qualified Data.Map as M
setConcurrency :: ConcurrencySetting -> Annex ()
setConcurrency (ConcurrencyCmdLine s) = setConcurrency' s ConcurrencyCmdLine
setConcurrency (ConcurrencyGitConfig s) = setConcurrency' s ConcurrencyGitConfig
setConcurrency' :: Concurrency -> (Concurrency -> ConcurrencySetting) -> Annex ()
setConcurrency' NonConcurrent f =
Annex.changeState $ \s -> s
{ Annex.concurrency = f NonConcurrent
}
setConcurrency' c f = do
cfh <- getState Annex.catfilehandles
cfh' <- case cfh of
CatFileHandlesNonConcurrent _ -> liftIO catFileHandlesPool
CatFileHandlesPool _ -> pure cfh
cah <- mkConcurrentCheckAttrHandle c
cih <- mkConcurrentCheckIgnoreHandle c
Annex.changeState $ \s -> s
{ Annex.concurrency = f c
, Annex.catfilehandles = cfh'
, Annex.checkattrhandle = Just cah
, Annex.checkignorehandle = Just cih
}
{- Allows forking off a thread that uses a copy of the current AnnexState
- to run an Annex action.
-
- The returned IO action can be used to start the thread.
- It returns an Annex action that must be run in the original
- calling context to merge the forked AnnexState back into the
- current AnnexState.
-}
forkState :: Annex a -> Annex (IO (Annex a))
forkState a = do
st <- dupState
return $ do
(ret, newst) <- run st a
return $ do
mergeState newst
return ret
{- Returns a copy of the current AnnexState that is safe to be
- used when forking off a thread.
-
- After an Annex action is run using this AnnexState, it
- should be merged back into the current Annex's state,
- by calling mergeState.
-}
dupState :: Annex AnnexState
dupState = do
-- Make sure that some expensive actions have been done before
-- starting threads. This way the state has them already run,
-- and each thread won't try to do them.
_ <- remoteList
st <- Annex.getState id
-- Make sure that concurrency is enabled, if it was not already,
-- so the concurrency-safe resource pools are set up.
st' <- case getConcurrency' (Annex.concurrency st) of
NonConcurrent -> do
setConcurrency (ConcurrencyCmdLine (Concurrent 1))
Annex.getState id
_ -> return st
return $ st'
-- each thread has its own repoqueue
{ Annex.repoqueue = Nothing
-- no errors from this thread yet
, Annex.errcounter = 0
}
{- Merges the passed AnnexState into the current Annex state.
- Also closes various handles in it. -}
mergeState :: AnnexState -> Annex ()
mergeState st = do
st' <- liftIO $ snd <$> run st stopCoProcesses
forM_ (M.toList $ Annex.cleanup st') $
uncurry addCleanup
Annex.Queue.mergeFrom st'
changeState $ \s -> s { errcounter = errcounter s + errcounter st' }
{- Runs an action and makes the current thread have the specified stage
- while doing so. If too many other threads are running in the specified
- stage, waits for one of them to become idle.
-
- Noop if the current thread already has the requested stage, or if the
- current thread is not in the worker pool, or if concurrency is not
- enabled.
-
- Also a noop if the stage is not one of the stages that the worker pool
- uses.
-}
enteringStage :: WorkerStage -> Annex a -> Annex a
enteringStage newstage a = Annex.getState Annex.workers >>= \case
Nothing -> a
Just tv -> do
mytid <- liftIO myThreadId
let set = changeStageTo mytid tv (const newstage)
let restore = maybe noop (void . changeStageTo mytid tv . const)
bracket set restore (const a)
{- Transition the current thread to the initial stage.
- This is done once the thread is ready to begin work.
-}
enteringInitialStage :: Annex ()
enteringInitialStage = Annex.getState Annex.workers >>= \case
Nothing -> noop
Just tv -> do
mytid <- liftIO myThreadId
void $ changeStageTo mytid tv initialStage
{- This needs to leave the WorkerPool with the same number of
- idle and active threads, and with the same number of threads for each
- WorkerStage. So, all it can do is swap the WorkerStage of our thread's
- ActiveWorker with an IdleWorker.
-
- Must avoid a deadlock if all worker threads end up here at the same
- time, or if there are no suitable IdleWorkers left. So if necessary
- we first replace our ActiveWorker with an IdleWorker in the pool, to allow
- some other thread to use it, before waiting for a suitable IdleWorker
- for us to use.
-
- Note that the spareVals in the WorkerPool does not get anything added to
- it when adding the IdleWorker, so there will for a while be more IdleWorkers
- in the pool than spareVals. That does not prevent other threads that call
- this from using them though, so it's fine.
-}
changeStageTo :: ThreadId -> TMVar (WorkerPool AnnexState) -> (UsedStages -> WorkerStage) -> Annex (Maybe WorkerStage)
changeStageTo mytid tv getnewstage = liftIO $
replaceidle >>= maybe
(return Nothing)
(either waitidle (return . Just))
where
replaceidle = atomically $ do
pool <- takeTMVar tv
let newstage = getnewstage (usedStages pool)
let notchanging = do
putTMVar tv pool
return Nothing
if memberStage newstage (usedStages pool)
then case removeThreadIdWorkerPool mytid pool of
Just ((myaid, oldstage), pool')
| oldstage /= newstage -> case getIdleWorkerSlot newstage pool' of
Nothing -> do
putTMVar tv $
addWorkerPool (IdleWorker oldstage) pool'
return $ Just $ Left (myaid, newstage, oldstage)
Just pool'' -> do
-- optimisation
putTMVar tv $
addWorkerPool (IdleWorker oldstage) $
addWorkerPool (ActiveWorker myaid newstage) pool''
return $ Just $ Right oldstage
| otherwise -> notchanging
_ -> notchanging
else notchanging
waitidle (myaid, newstage, oldstage) = atomically $ do
pool <- waitIdleWorkerSlot newstage =<< takeTMVar tv
putTMVar tv $ addWorkerPool (ActiveWorker myaid newstage) pool
return (Just oldstage)
-- | Waits until there's an idle StartStage worker in the worker pool,
-- removes it from the pool, and returns its state.
--
-- If the worker pool is not already allocated, returns Nothing.
waitStartWorkerSlot :: TMVar (WorkerPool Annex.AnnexState) -> STM (Maybe (Annex.AnnexState, WorkerStage))
waitStartWorkerSlot tv = do
pool <- takeTMVar tv
st <- go pool
return $ Just (st, StartStage)
where
go pool = case spareVals pool of
[] -> retry
(v:vs) -> do
let pool' = pool { spareVals = vs }
putTMVar tv =<< waitIdleWorkerSlot StartStage pool'
return v
waitIdleWorkerSlot :: WorkerStage -> WorkerPool Annex.AnnexState -> STM (WorkerPool Annex.AnnexState)
waitIdleWorkerSlot wantstage = maybe retry return . getIdleWorkerSlot wantstage
getIdleWorkerSlot :: WorkerStage -> WorkerPool Annex.AnnexState -> Maybe (WorkerPool Annex.AnnexState)
getIdleWorkerSlot wantstage pool = do
l <- findidle [] (workerList pool)
return $ pool { workerList = l }
where
findidle _ [] = Nothing
findidle c ((IdleWorker stage):rest)
| stage == wantstage = Just (c ++ rest)
findidle c (w:rest) = findidle (w:c) rest

View file

@ -0,0 +1,31 @@
{- git-annex concurrency utilities
-
- Copyright 2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Concurrent.Utility where
import Annex
import Types.Concurrency
import GHC.Conc
getConcurrency :: Annex Concurrency
getConcurrency = getConcurrency' <$> getState concurrency
getConcurrency' :: ConcurrencySetting -> Concurrency
getConcurrency' (ConcurrencyCmdLine c) = c
getConcurrency' (ConcurrencyGitConfig c) = c
{- Honor the requested level of concurrency, but only up to the number of
- CPU cores. Useful for things that are known to be CPU bound. -}
concurrencyUpToCpus :: Concurrency -> IO Int
concurrencyUpToCpus c = do
let cn = case c of
Concurrent n -> n
NonConcurrent -> 1
ConcurrentPerCpu -> 1
pn <- getNumProcessors
return (min cn pn)

929
Annex/Content.hs Normal file
View file

@ -0,0 +1,929 @@
{- git-annex file content managing
-
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Content (
inAnnex,
inAnnex',
inAnnexSafe,
inAnnexCheck,
objectFileExists,
lockContentShared,
lockContentForRemoval,
ContentRemovalLock,
RetrievalSecurityPolicy(..),
getViaTmp,
getViaTmpFromDisk,
checkDiskSpaceToGet,
checkSecureHashes,
prepTmp,
withTmp,
checkDiskSpace,
needMoreDiskSpace,
moveAnnex,
populatePointerFile,
linkToAnnex,
linkFromAnnex,
LinkAnnexResult(..),
unlinkAnnex,
checkedCopyFile,
linkOrCopy,
linkOrCopy',
sendAnnex,
prepSendAnnex,
removeAnnex,
moveBad,
KeyLocation(..),
listKeys,
saveState,
downloadUrl,
preseedTmp,
dirKeys,
withObjectLoc,
staleKeysPrune,
pruneTmpWorkDirBefore,
isUnmodified,
isUnmodifiedCheap,
verifyKeyContent,
VerifyConfig(..),
Verification(..),
unVerified,
withTmpWorkDir,
) where
import System.IO.Unsafe (unsafeInterleaveIO)
import qualified Data.Set as S
import Annex.Common
import Logs.Location
import Types.Transfer
import Logs.Transfer
import qualified Git
import qualified Annex
import qualified Annex.Queue
import qualified Annex.Branch
import Utility.FileMode
import qualified Annex.Url as Url
import Utility.CopyFile
import Utility.Metered
import Git.FilePath
import Annex.Perms
import Annex.Link
import Annex.LockPool
import Annex.WorkerPool
import Messages.Progress
import Types.Remote (unVerified, Verification(..), RetrievalSecurityPolicy(..))
import qualified Types.Remote
import qualified Types.Backend
import qualified Backend
import qualified Database.Keys
import Types.NumCopies
import Types.Key
import Annex.UUID
import Annex.InodeSentinal
import Utility.InodeCache
import Annex.Content.LowLevel
import Annex.Content.PointerFile
import Types.WorkerPool
import qualified Utility.RawFilePath as R
import qualified System.FilePath.ByteString as P
{- Checks if a given key's content is currently present. -}
inAnnex :: Key -> Annex Bool
inAnnex key = inAnnexCheck key $ liftIO . R.doesPathExist
{- Runs an arbitrary check on a key's content. -}
inAnnexCheck :: Key -> (RawFilePath -> Annex Bool) -> Annex Bool
inAnnexCheck key check = inAnnex' id False check key
{- inAnnex that performs an arbitrary check of the key's content. -}
inAnnex' :: (a -> Bool) -> a -> (RawFilePath -> Annex a) -> Key -> Annex a
inAnnex' isgood bad check key = withObjectLoc key $ \loc -> do
r <- check loc
if isgood r
then ifM (annexThin <$> Annex.getGitConfig)
-- When annex.thin is set, the object file
-- could be modified; make sure it's not.
-- (Suppress any messages about
-- checksumming, to avoid them cluttering
-- the display.)
( ifM (doQuietAction $ isUnmodified key loc)
( return r
, return bad
)
, return r
)
else return bad
{- Like inAnnex, checks if the object file for a key exists,
- but there are no guarantees it has the right content. -}
objectFileExists :: Key -> Annex Bool
objectFileExists key =
calcRepo (gitAnnexLocation key)
>>= liftIO . R.doesPathExist
{- A safer check; the key's content must not only be present, but
- is not in the process of being removed. -}
inAnnexSafe :: Key -> Annex (Maybe Bool)
inAnnexSafe key =
inAnnex' (fromMaybe True) (Just False) (go . fromRawFilePath) key
where
is_locked = Nothing
is_unlocked = Just True
is_missing = Just False
go contentfile = flip checklock contentfile =<< contentLockFile key
#ifndef mingw32_HOST_OS
checklock Nothing contentfile = checkOr is_missing contentfile
{- The content file must exist, but the lock file generally
- won't exist unless a removal is in process. -}
checklock (Just lockfile) contentfile =
ifM (liftIO $ doesFileExist contentfile)
( checkOr is_unlocked lockfile
, return is_missing
)
checkOr d lockfile = checkLocked lockfile >>= return . \case
Nothing -> d
Just True -> is_locked
Just False -> is_unlocked
#else
checklock Nothing contentfile = liftIO $ ifM (doesFileExist contentfile)
( lockShared contentfile >>= \case
Nothing -> return is_locked
Just lockhandle -> do
dropLock lockhandle
return is_unlocked
, return is_missing
)
{- In Windows, see if we can take a shared lock. If so,
- remove the lock file to clean up after ourselves. -}
checklock (Just lockfile) contentfile =
ifM (liftIO $ doesFileExist contentfile)
( modifyContent lockfile $ liftIO $
lockShared lockfile >>= \case
Nothing -> return is_locked
Just lockhandle -> do
dropLock lockhandle
void $ tryIO $ nukeFile lockfile
return is_unlocked
, return is_missing
)
#endif
{- Windows has to use a separate lock file from the content, since
- locking the actual content file would interfere with the user's
- use of it. -}
contentLockFile :: Key -> Annex (Maybe FilePath)
#ifndef mingw32_HOST_OS
contentLockFile _ = pure Nothing
#else
contentLockFile key = Just <$> calcRepo (gitAnnexContentLock key)
#endif
{- Prevents the content from being removed while the action is running.
- Uses a shared lock.
-
- If locking fails, or the content is not present, throws an exception
- rather than running the action.
-}
lockContentShared :: Key -> (VerifiedCopy -> Annex a) -> Annex a
lockContentShared key a = lockContentUsing lock key notpresent $
ifM (inAnnex key)
( do
u <- getUUID
withVerifiedCopy LockedCopy u (return True) a
, notpresent
)
where
notpresent = giveup $ "failed to lock content: not present"
#ifndef mingw32_HOST_OS
lock contentfile Nothing = tryLockShared Nothing contentfile
lock _ (Just lockfile) = posixLocker tryLockShared lockfile
#else
lock = winLocker lockShared
#endif
{- Exclusively locks content, while performing an action that
- might remove it.
-
- If locking fails, throws an exception rather than running the action.
-
- But, if locking fails because the the content is not present, runs the
- fallback action instead.
-}
lockContentForRemoval :: Key -> Annex a -> (ContentRemovalLock -> Annex a) -> Annex a
lockContentForRemoval key fallback a = lockContentUsing lock key fallback $
a (ContentRemovalLock key)
where
#ifndef mingw32_HOST_OS
{- Since content files are stored with the write bit disabled, have
- to fiddle with permissions to open for an exclusive lock. -}
lock contentfile Nothing = bracket_
(thawContent contentfile)
(freezeContent contentfile)
(tryLockExclusive Nothing contentfile)
lock _ (Just lockfile) = posixLocker tryLockExclusive lockfile
#else
lock = winLocker lockExclusive
#endif
{- Passed the object content file, and maybe a separate lock file to use,
- when the content file itself should not be locked. -}
type ContentLocker = FilePath -> Maybe LockFile -> Annex (Maybe LockHandle)
#ifndef mingw32_HOST_OS
posixLocker :: (Maybe FileMode -> LockFile -> Annex (Maybe LockHandle)) -> LockFile -> Annex (Maybe LockHandle)
posixLocker takelock lockfile = do
mode <- annexFileMode
modifyContent lockfile $
takelock (Just mode) lockfile
#else
winLocker :: (LockFile -> IO (Maybe LockHandle)) -> ContentLocker
winLocker takelock _ (Just lockfile) = do
modifyContent lockfile $
void $ liftIO $ tryIO $
writeFile lockfile ""
liftIO $ takelock lockfile
-- never reached; windows always uses a separate lock file
winLocker _ _ Nothing = return Nothing
#endif
{- The fallback action is run if the ContentLocker throws an IO exception
- and the content is not present. It's not guaranteed to always run when
- the content is not present, because the content file is not always
- the file that is locked eg on Windows a different file is locked. -}
lockContentUsing :: ContentLocker -> Key -> Annex a -> Annex a -> Annex a
lockContentUsing locker key fallback a = do
contentfile <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
lockfile <- contentLockFile key
bracket
(lock contentfile lockfile)
(either (const noop) (unlock lockfile))
go
where
alreadylocked = giveup "content is locked"
failedtolock e = giveup $ "failed to lock content: " ++ show e
lock contentfile lockfile = tryIO $
maybe alreadylocked return
=<< locker contentfile lockfile
go (Right _) = a
go (Left e) = ifM (inAnnex key)
( failedtolock e
, fallback
)
#ifndef mingw32_HOST_OS
unlock mlockfile lck = do
maybe noop cleanuplockfile mlockfile
liftIO $ dropLock lck
#else
unlock mlockfile lck = do
-- Can't delete a locked file on Windows
liftIO $ dropLock lck
maybe noop cleanuplockfile mlockfile
#endif
cleanuplockfile lockfile = modifyContent lockfile $
void $ liftIO $ tryIO $
nukeFile lockfile
{- Runs an action, passing it the temp file to get,
- and if the action succeeds, verifies the file matches
- the key and moves the file into the annex as a key's content. -}
getViaTmp :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> (FilePath -> Annex (Bool, Verification)) -> Annex Bool
getViaTmp rsp v key action = checkDiskSpaceToGet key False $
getViaTmpFromDisk rsp v key action
{- Like getViaTmp, but does not check that there is enough disk space
- for the incoming key. For use when the key content is already on disk
- and not being copied into place. -}
getViaTmpFromDisk :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> (FilePath -> Annex (Bool, Verification)) -> Annex Bool
getViaTmpFromDisk rsp v key action = checkallowed $ do
tmpfile <- prepTmp key
resuming <- liftIO $ doesFileExist tmpfile
(ok, verification) <- action tmpfile
-- When the temp file already had content, we don't know if
-- that content is good or not, so only trust if it the action
-- Verified it in passing. Otherwise, force verification even
-- if the VerifyConfig normally disables it.
let verification' = if resuming
then case verification of
Verified -> Verified
_ -> MustVerify
else verification
if ok
then ifM (verifyKeyContent rsp v verification' key tmpfile)
( ifM (pruneTmpWorkDirBefore tmpfile (moveAnnex key))
( do
logStatus key InfoPresent
return True
, return False
)
, do
warning "verification of content failed"
-- The bad content is not retained, because
-- a retry should not try to resume from it
-- since it's apparently corrupted.
-- Also, the bad content could be any data,
-- including perhaps the content of another
-- file than the one that was requested,
-- and so it's best not to keep it on disk.
pruneTmpWorkDirBefore tmpfile (liftIO . nukeFile)
return False
)
-- On transfer failure, the tmp file is left behind, in case
-- caller wants to resume its transfer
else return False
where
-- Avoid running the action to get the content when the
-- RetrievalSecurityPolicy would cause verification to always fail.
checkallowed a = case rsp of
RetrievalAllKeysSecure -> a
RetrievalVerifiableKeysSecure -> ifM (Backend.isVerifiable key)
( a
, ifM (annexAllowUnverifiedDownloads <$> Annex.getGitConfig)
( a
, warnUnverifiableInsecure key >> return False
)
)
{- Verifies that a file is the expected content of a key.
-
- Configuration can prevent verification, for either a
- particular remote or always, unless the RetrievalSecurityPolicy
- requires verification.
-
- Most keys have a known size, and if so, the file size is checked.
-
- When the key's backend allows verifying the content (via checksum),
- it is checked.
-
- If the RetrievalSecurityPolicy requires verification and the key's
- backend doesn't support it, the verification will fail.
-}
verifyKeyContent :: RetrievalSecurityPolicy -> VerifyConfig -> Verification -> Key -> FilePath -> Annex Bool
verifyKeyContent rsp v verification k f = case (rsp, verification) of
(_, Verified) -> return True
(RetrievalVerifiableKeysSecure, _) -> ifM (Backend.isVerifiable k)
( verify
, ifM (annexAllowUnverifiedDownloads <$> Annex.getGitConfig)
( verify
, warnUnverifiableInsecure k >> return False
)
)
(_, UnVerified) -> ifM (shouldVerify v)
( verify
, return True
)
(_, MustVerify) -> verify
where
verify = enteringStage VerifyStage $ verifysize <&&> verifycontent
verifysize = case fromKey keySize k of
Nothing -> return True
Just size -> do
size' <- liftIO $ catchDefaultIO 0 $ getFileSize f
return (size' == size)
verifycontent = Backend.maybeLookupBackendVariety (fromKey keyVariety k) >>= \case
Nothing -> return True
Just b -> case Types.Backend.verifyKeyContent b of
Nothing -> return True
Just verifier -> verifier k f
warnUnverifiableInsecure :: Key -> Annex ()
warnUnverifiableInsecure k = warning $ unwords
[ "Getting " ++ kv ++ " keys with this remote is not secure;"
, "the content cannot be verified to be correct."
, "(Use annex.security.allow-unverified-downloads to bypass"
, "this safety check.)"
]
where
kv = decodeBS (formatKeyVariety (fromKey keyVariety k))
data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify
shouldVerify :: VerifyConfig -> Annex Bool
shouldVerify AlwaysVerify = return True
shouldVerify NoVerify = return False
shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig
shouldVerify (RemoteVerify r) =
(shouldVerify DefaultVerify
<&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r)))
-- Export remotes are not key/value stores, so always verify
-- content from them even when verification is disabled.
<||> Types.Remote.isExportSupported r
{- Checks if there is enough free disk space to download a key
- to its temp file.
-
- When the temp file already exists, count the space it is using as
- free, since the download will overwrite it or resume.
-
- Wen there's enough free space, runs the download action.
-}
checkDiskSpaceToGet :: Key -> a -> Annex a -> Annex a
checkDiskSpaceToGet key unabletoget getkey = do
tmp <- fromRepo $ gitAnnexTmpObjectLocation key
e <- liftIO $ doesFileExist tmp
alreadythere <- liftIO $ if e
then getFileSize tmp
else return 0
ifM (checkDiskSpace Nothing key alreadythere True)
( do
-- The tmp file may not have been left writable
when e $ thawContent tmp
getkey
, return unabletoget
)
prepTmp :: Key -> Annex FilePath
prepTmp key = do
tmp <- fromRepo $ gitAnnexTmpObjectLocation key
createAnnexDirectory (parentDir tmp)
return tmp
{- Prepares a temp file for a key, runs an action on it, and cleans up
- the temp file. If the action throws an exception, the temp file is
- left behind, which allows for resuming.
-}
withTmp :: Key -> (FilePath -> Annex a) -> Annex a
withTmp key action = do
tmp <- prepTmp key
res <- action tmp
pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
return res
{- Moves a key's content into .git/annex/objects/
-
- When a key has associated pointer files, the object is hard
- linked (or copied) to the files, and the object file is left thawed.
-
- What if the key there already has content? This could happen for
- various reasons; perhaps the same content is being annexed again.
- Perhaps there has been a hash collision generating the keys.
-
- The current strategy is to assume that in this case it's safe to delete
- one of the two copies of the content; and the one already in the annex
- is left there, assuming it's the original, canonical copy.
-
- I considered being more paranoid, and checking that both files had
- the same content. Decided against it because A) users explicitly choose
- a backend based on its hashing properties and so if they're dealing
- with colliding files it's their own fault and B) adding such a check
- would not catch all cases of colliding keys. For example, perhaps
- a remote has a key; if it's then added again with different content then
- the overall system now has two different peices of content for that
- key, and one of them will probably get deleted later. So, adding the
- check here would only raise expectations that git-annex cannot truely
- meet.
-
- May return false, when a particular variety of key is not being
- accepted into the repository. Will display a warning message in this
- case. May also throw exceptions in some cases.
-}
moveAnnex :: Key -> FilePath -> Annex Bool
moveAnnex key src = ifM (checkSecureHashes' key)
( do
withObjectLoc key storeobject
return True
, return False
)
where
storeobject dest = ifM (liftIO $ R.doesPathExist dest)
( alreadyhave
, modifyContent dest' $ do
freezeContent src
liftIO $ moveFile src dest'
g <- Annex.gitRepo
fs <- map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
unless (null fs) $ do
ics <- mapM (populatePointerFile (Restage True) key dest) fs
Database.Keys.storeInodeCaches' key [dest] (catMaybes ics)
)
where
dest' = fromRawFilePath dest
alreadyhave = liftIO $ removeFile src
checkSecureHashes :: Key -> Annex (Maybe String)
checkSecureHashes key = ifM (Backend.isCryptographicallySecure key)
( return Nothing
, ifM (annexSecureHashesOnly <$> Annex.getGitConfig)
( return $ Just $ "annex.securehashesonly blocked adding " ++ decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " key"
, return Nothing
)
)
checkSecureHashes' :: Key -> Annex Bool
checkSecureHashes' key = checkSecureHashes key >>= \case
Nothing -> return True
Just msg -> do
warning $ msg ++ "to annex objects"
return False
data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
{- Populates the annex object file by hard linking or copying a source
- file to it. -}
linkToAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult
linkToAnnex key src srcic = ifM (checkSecureHashes' key)
( do
dest <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
modifyContent dest $ linkAnnex To key src srcic dest Nothing
, return LinkAnnexFailed
)
{- Makes a destination file be a link or copy from the annex object. -}
linkFromAnnex :: Key -> FilePath -> Maybe FileMode -> Annex LinkAnnexResult
linkFromAnnex key dest destmode = do
src <- calcRepo (gitAnnexLocation key)
srcic <- withTSDelta (liftIO . genInodeCache src)
linkAnnex From key (fromRawFilePath src) srcic dest destmode
data FromTo = From | To
{- Hard links or copies from or to the annex object location.
- Updates inode cache.
-
- Freezes or thaws the destination appropriately.
-
- When a hard link is made, the annex object necessarily has to be thawed
- too. So, adding an object to the annex with a hard link can prevent
- losing the content if the source file is deleted, but does not
- guard against modifications.
-
- Nothing is done if the destination file already exists.
-}
linkAnnex :: FromTo -> Key -> FilePath -> Maybe InodeCache -> FilePath -> Maybe FileMode -> Annex LinkAnnexResult
linkAnnex _ _ _ Nothing _ _ = return LinkAnnexFailed
linkAnnex fromto key src (Just srcic) dest destmode =
withTSDelta (liftIO . genInodeCache dest') >>= \case
Just destic -> do
cs <- Database.Keys.getInodeCaches key
if null cs
then Database.Keys.addInodeCaches key [srcic, destic]
else Database.Keys.addInodeCaches key [srcic]
return LinkAnnexNoop
Nothing -> linkOrCopy key src dest destmode >>= \case
Nothing -> failed
Just r -> do
case fromto of
From -> thawContent dest
To -> case r of
Copied -> freezeContent dest
Linked -> noop
checksrcunchanged
where
dest' = toRawFilePath dest
failed = do
Database.Keys.addInodeCaches key [srcic]
return LinkAnnexFailed
checksrcunchanged = withTSDelta (liftIO . genInodeCache (toRawFilePath src)) >>= \case
Just srcic' | compareStrong srcic srcic' -> do
destic <- withTSDelta (liftIO . genInodeCache dest')
Database.Keys.addInodeCaches key $
catMaybes [destic, Just srcic]
return LinkAnnexOk
_ -> do
liftIO $ nukeFile dest
failed
{- Removes the annex object file for a key. Lowlevel. -}
unlinkAnnex :: Key -> Annex ()
unlinkAnnex key = do
obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
modifyContent obj $ do
secureErase obj
liftIO $ nukeFile obj
{- Runs an action to transfer an object's content.
-
- In some cases, it's possible for the file to change as it's being sent.
- If this happens, runs the rollback action and throws an exception.
- The rollback action should remove the data that was transferred.
-}
sendAnnex :: Key -> Annex () -> (FilePath -> Annex a) -> Annex a
sendAnnex key rollback sendobject = go =<< prepSendAnnex key
where
go (Just (f, checksuccess)) = do
r <- sendobject f
unlessM checksuccess $ do
rollback
giveup "content changed while it was being sent"
return r
go Nothing = giveup "content not available to send"
{- Returns a file that contains an object's content,
- and a check to run after the transfer is complete.
-
- When a file is unlocked, it's possble for its content to
- change as it's being sent. The check detects this case
- and returns False.
-
- Note that the returned check action is, in some cases, run in the
- Annex monad of the remote that is receiving the object, rather than
- the sender. So it cannot rely on Annex state.
-}
prepSendAnnex :: Key -> Annex (Maybe (FilePath, Annex Bool))
prepSendAnnex key = withObjectLoc key $ \f -> do
cache <- Database.Keys.getInodeCaches key
cache' <- if null cache
-- Since no inode cache is in the database, this
-- object is not currently unlocked. But that could
-- change while the transfer is in progress, so
-- generate an inode cache for the starting
-- content.
then maybeToList <$>
withTSDelta (liftIO . genInodeCache f)
else pure cache
return $ if null cache'
then Nothing
else Just (fromRawFilePath f, sameInodeCache f cache')
{- Performs an action, passing it the location to use for a key's content. -}
withObjectLoc :: Key -> (RawFilePath -> Annex a) -> Annex a
withObjectLoc key a = a =<< calcRepo (gitAnnexLocation key)
cleanObjectLoc :: Key -> Annex () -> Annex ()
cleanObjectLoc key cleaner = do
file <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
void $ tryIO $ thawContentDir file
cleaner
liftIO $ removeparents file (3 :: Int)
where
removeparents _ 0 = noop
removeparents file n = do
let dir = parentDir file
maybe noop (const $ removeparents dir (n-1))
<=< catchMaybeIO $ removeDirectory dir
{- Removes a key's file from .git/annex/objects/
-}
removeAnnex :: ContentRemovalLock -> Annex ()
removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file ->
cleanObjectLoc key $ do
let file' = fromRawFilePath file
secureErase file'
liftIO $ nukeFile file'
g <- Annex.gitRepo
mapM_ (\f -> void $ tryIO $ resetpointer $ fromTopFilePath f g)
=<< Database.Keys.getAssociatedFiles key
Database.Keys.removeInodeCaches key
where
-- Check associated pointer file for modifications, and reset if
-- it's unmodified.
resetpointer file = ifM (isUnmodified key file)
( depopulatePointerFile key file
-- Modified file, so leave it alone.
-- If it was a hard link to the annex object,
-- that object might have been frozen as part of the
-- removal process, so thaw it.
, void $ tryIO $ thawContent $ fromRawFilePath file
)
{- Check if a file contains the unmodified content of the key.
-
- The expensive way to tell is to do a verification of its content.
- The cheaper way is to see if the InodeCache for the key matches the
- file. -}
isUnmodified :: Key -> RawFilePath -> Annex Bool
isUnmodified key f = go =<< geti
where
go Nothing = return False
go (Just fc) = isUnmodifiedCheap' key fc <||> expensivecheck fc
expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key (fromRawFilePath f))
( do
-- The file could have been modified while it was
-- being verified. Detect that.
ifM (geti >>= maybe (return False) (compareInodeCaches fc))
( do
-- Update the InodeCache to avoid
-- performing this expensive check again.
Database.Keys.addInodeCaches key [fc]
return True
, return False
)
, return False
)
geti = withTSDelta (liftIO . genInodeCache f)
{- Cheap check if a file contains the unmodified content of the key,
- only checking the InodeCache of the key.
-
- Note that, on systems not supporting high-resolution mtimes,
- this may report a false positive when repeated edits are made to a file
- within a small time window (eg 1 second).
-}
isUnmodifiedCheap :: Key -> RawFilePath -> Annex Bool
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)
=<< withTSDelta (liftIO . genInodeCache f)
isUnmodifiedCheap' :: Key -> InodeCache -> Annex Bool
isUnmodifiedCheap' key fc =
anyM (compareInodeCaches fc) =<< Database.Keys.getInodeCaches key
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
- returns the file it was moved to. -}
moveBad :: Key -> Annex FilePath
moveBad key = do
src <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
bad <- fromRepo gitAnnexBadDir
let dest = bad </> takeFileName src
createAnnexDirectory (parentDir dest)
cleanObjectLoc key $
liftIO $ moveFile src dest
logStatus key InfoMissing
return dest
data KeyLocation = InAnnex | InAnywhere
{- InAnnex only lists keys with content in .git/annex/objects.
- InAnywhere lists all keys that have directories in
- .git/annex/objects, whether or not the content is present.
-}
listKeys :: KeyLocation -> Annex [Key]
listKeys keyloc = do
dir <- fromRepo gitAnnexObjectDir
{- In order to run Annex monad actions within unsafeInterleaveIO,
- the current state is taken and reused. No changes made to this
- state will be preserved.
-}
s <- Annex.getState id
depth <- gitAnnexLocationDepth <$> Annex.getGitConfig
liftIO $ walk s depth dir
where
walk s depth dir = do
contents <- catchDefaultIO [] (dirContents dir)
if depth < 2
then do
contents' <- filterM (present s) contents
let keys = mapMaybe (fileKey . P.takeFileName . toRawFilePath) contents'
continue keys []
else do
let deeper = walk s (depth - 1)
continue [] (map deeper contents)
continue keys [] = return keys
continue keys (a:as) = do
{- Force lazy traversal with unsafeInterleaveIO. -}
morekeys <- unsafeInterleaveIO a
continue (morekeys++keys) as
inanywhere = case keyloc of
InAnywhere -> True
_ -> False
present _ _ | inanywhere = pure True
present _ d = presentInAnnex d
presentInAnnex = doesFileExist . contentfile
contentfile d = d </> takeFileName d
{- Things to do to record changes to content when shutting down.
-
- It's acceptable to avoid committing changes to the branch,
- especially if performing a short-lived action.
-}
saveState :: Bool -> Annex ()
saveState nocommit = doSideAction $ do
Annex.Queue.flush
Database.Keys.closeDb
unless nocommit $
whenM (annexAlwaysCommit <$> Annex.getGitConfig) $
Annex.Branch.commit =<< Annex.Branch.commitMessage
{- Downloads content from any of a list of urls, displaying a progress
- meter. -}
downloadUrl :: Key -> MeterUpdate -> [Url.URLString] -> FilePath -> Url.UrlOptions -> Annex Bool
downloadUrl k p urls file uo =
-- Poll the file to handle configurations where an external
-- download command is used.
meteredFile file (Just p) k (go urls Nothing)
where
-- Display only one error message, if all the urls fail to
-- download.
go [] (Just err) = warning err >> return False
go [] Nothing = return False
go (u:us) _ = Url.download' p u file uo >>= \case
Right () -> return True
Left err -> go us (Just err)
{- Copies a key's content, when present, to a temp file.
- This is used to speed up some rsyncs. -}
preseedTmp :: Key -> FilePath -> Annex Bool
preseedTmp key file = go =<< inAnnex key
where
go False = return False
go True = do
ok <- copy
when ok $ thawContent file
return ok
copy = ifM (liftIO $ doesFileExist file)
( return True
, do
s <- fromRawFilePath <$> (calcRepo $ gitAnnexLocation key)
liftIO $ ifM (doesFileExist s)
( copyFileExternal CopyTimeStamps s file
, return False
)
)
{- Finds files directly inside a directory like gitAnnexBadDir
- (not in subdirectories) and returns the corresponding keys. -}
dirKeys :: (Git.Repo -> FilePath) -> Annex [Key]
dirKeys dirspec = do
dir <- fromRepo dirspec
ifM (liftIO $ doesDirectoryExist dir)
( do
contents <- liftIO $ getDirectoryContents dir
files <- liftIO $ filterM doesFileExist $
map (dir </>) contents
return $ mapMaybe (fileKey . P.takeFileName . toRawFilePath) files
, return []
)
{- Looks in the specified directory for bad/tmp keys, and returns a list
- of those that might still have value, or might be stale and removable.
-
- Also, stale keys that can be proven to have no value
- (ie, their content is already present) are deleted.
-}
staleKeysPrune :: (Git.Repo -> FilePath) -> Bool -> Annex [Key]
staleKeysPrune dirspec nottransferred = do
contents <- dirKeys dirspec
dups <- filterM inAnnex contents
let stale = contents `exclude` dups
dir <- fromRepo dirspec
forM_ dups $ \k ->
pruneTmpWorkDirBefore (dir </> fromRawFilePath (keyFile k))
(liftIO . removeFile)
if nottransferred
then do
inprogress <- S.fromList . map (transferKey . fst)
<$> getTransfers
return $ filter (`S.notMember` inprogress) stale
else return stale
{- Prune the work dir associated with the specified content file,
- before performing an action that deletes the file, or moves it away.
-
- This preserves the invariant that the workdir never exists without
- the content file.
-}
pruneTmpWorkDirBefore :: FilePath -> (FilePath -> Annex a) -> Annex a
pruneTmpWorkDirBefore f action = do
let workdir = gitAnnexTmpWorkDir f
liftIO $ whenM (doesDirectoryExist workdir) $
removeDirectoryRecursive workdir
action f
{- Runs an action, passing it a temporary work directory where
- it can write files while receiving the content of a key.
-
- Preserves the invariant that the workdir never exists without the
- content file, by creating an empty content file first.
-
- On exception, or when the action returns Nothing,
- the temporary work directory is retained (unless
- empty), so anything in it can be used on resume.
-}
withTmpWorkDir :: Key -> (FilePath -> Annex (Maybe a)) -> Annex (Maybe a)
withTmpWorkDir key action = do
-- Create the object file if it does not exist. This way,
-- staleKeysPrune only has to look for object files, and can
-- clean up gitAnnexTmpWorkDir for those it finds.
obj <- prepTmp key
unlessM (liftIO $ doesFileExist obj) $ do
liftIO $ writeFile obj ""
setAnnexFilePerm obj
let tmpdir = gitAnnexTmpWorkDir obj
createAnnexDirectory tmpdir
res <- action tmpdir
case res of
Just _ -> liftIO $ removeDirectoryRecursive tmpdir
Nothing -> liftIO $ void $ tryIO $ removeDirectory tmpdir
return res
{- Finds items in the first, smaller list, that are not
- present in the second, larger list.
-
- Constructing a single set, of the list that tends to be
- smaller, appears more efficient in both memory and CPU
- than constructing and taking the S.difference of two sets. -}
exclude :: Ord a => [a] -> [a] -> [a]
exclude [] _ = [] -- optimisation
exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
where
remove a b = foldl (flip S.delete) b a

137
Annex/Content/LowLevel.hs Normal file
View file

@ -0,0 +1,137 @@
{- git-annex low-level content functions
-
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Content.LowLevel where
import System.PosixCompat.Files
import Annex.Common
import Logs.Transfer
import qualified Annex
import Utility.DiskFree
import Utility.FileMode
import Utility.DataUnits
import Utility.CopyFile
{- Runs the secure erase command if set, otherwise does nothing.
- File may or may not be deleted at the end; caller is responsible for
- making sure it's deleted. -}
secureErase :: FilePath -> Annex ()
secureErase file = maybe noop go =<< annexSecureEraseCommand <$> Annex.getGitConfig
where
go basecmd = void $ liftIO $
boolSystem "sh" [Param "-c", Param $ gencmd basecmd]
gencmd = massReplace [ ("%file", shellEscape file) ]
data LinkedOrCopied = Linked | Copied
{- Hard links or copies src to dest, which must not already exist.
-
- Only uses a hard link when annex.thin is enabled and when src is
- not already hardlinked to elsewhere.
-
- Checks disk reserve before copying against the size of the key,
- and will fail if not enough space, or if the dest file already exists.
-
- The FileMode, if provided, influences the mode of the dest file.
- In particular, if it has an execute bit set, the dest file's
- execute bit will be set. The mode is not fully copied over because
- git doesn't support file modes beyond execute.
-}
linkOrCopy :: Key -> FilePath -> FilePath -> Maybe FileMode -> Annex (Maybe LinkedOrCopied)
linkOrCopy = linkOrCopy' (annexThin <$> Annex.getGitConfig)
linkOrCopy' :: Annex Bool -> Key -> FilePath -> FilePath -> Maybe FileMode -> Annex (Maybe LinkedOrCopied)
linkOrCopy' canhardlink key src dest destmode = catchDefaultIO Nothing $
ifM canhardlink
( hardlink
, copy =<< getstat
)
where
hardlink = do
s <- getstat
if linkCount s > 1
then copy s
else liftIO (createLink src dest >> preserveGitMode dest destmode >> return (Just Linked))
`catchIO` const (copy s)
copy s = ifM (checkedCopyFile' key src dest destmode s)
( return (Just Copied)
, return Nothing
)
getstat = liftIO $ getFileStatus src
{- Checks disk space before copying. -}
checkedCopyFile :: Key -> FilePath -> FilePath -> Maybe FileMode -> Annex Bool
checkedCopyFile key src dest destmode = catchBoolIO $
checkedCopyFile' key src dest destmode
=<< liftIO (getFileStatus src)
checkedCopyFile' :: Key -> FilePath -> FilePath -> Maybe FileMode -> FileStatus -> Annex Bool
checkedCopyFile' key src dest destmode s = catchBoolIO $
ifM (checkDiskSpace' (fromIntegral $ fileSize s) (Just $ takeDirectory dest) key 0 True)
( liftIO $
copyFileExternal CopyAllMetaData src dest
<&&> preserveGitMode dest destmode
, return False
)
preserveGitMode :: FilePath -> Maybe FileMode -> IO Bool
preserveGitMode f (Just mode)
| isExecutable mode = catchBoolIO $ do
modifyFileMode f $ addModes executeModes
return True
| otherwise = catchBoolIO $ do
modifyFileMode f $ removeModes executeModes
return True
preserveGitMode _ _ = return True
{- Checks that there is disk space available to store a given key,
- in a destination directory (or the annex) printing a warning if not.
-
- If the destination is on the same filesystem as the annex,
- checks for any other running downloads, removing the amount of data still
- to be downloaded from the free space. This way, we avoid overcommitting
- when doing concurrent downloads.
-}
checkDiskSpace :: Maybe FilePath -> Key -> Integer -> Bool -> Annex Bool
checkDiskSpace destdir key = checkDiskSpace' (fromMaybe 1 (fromKey keySize key)) destdir key
{- Allows specifying the size of the key, if it's known, which is useful
- as not all keys know their size. -}
checkDiskSpace' :: Integer -> Maybe FilePath -> Key -> Integer -> Bool -> Annex Bool
checkDiskSpace' need destdir key alreadythere samefilesystem = ifM (Annex.getState Annex.force)
( return True
, do
-- We can't get inprogress and free at the same
-- time, and both can be changing, so there's a
-- small race here. Err on the side of caution
-- by getting inprogress first, so if it takes
-- a while, we'll see any decrease in the free
-- disk space.
inprogress <- if samefilesystem
then sizeOfDownloadsInProgress (/= key)
else pure 0
dir >>= liftIO . getDiskFree >>= \case
Just have -> do
reserve <- annexDiskReserve <$> Annex.getGitConfig
let delta = need + reserve - have - alreadythere + inprogress
let ok = delta <= 0
unless ok $
warning $ needMoreDiskSpace delta
return ok
_ -> return True
)
where
dir = maybe (fromRawFilePath <$> fromRepo gitAnnexDir) return destdir
needMoreDiskSpace :: Integer -> String
needMoreDiskSpace n = "not enough free space, need " ++
roughSize storageUnits True n ++ " more" ++ forcemsg
where
forcemsg = " (use --force to override this check or adjust annex.diskreserve)"

View file

@ -0,0 +1,75 @@
{- git-annex pointer files
-
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Content.PointerFile where
#if ! defined(mingw32_HOST_OS)
import System.Posix.Files
#else
import System.PosixCompat.Files
#endif
import Annex.Common
import Annex.Perms
import Annex.Link
import Annex.ReplaceFile
import Annex.InodeSentinal
import Annex.Content.LowLevel
import Utility.InodeCache
#if ! defined(mingw32_HOST_OS)
import Utility.Touch
#endif
{- Populates a pointer file with the content of a key.
-
- If the file already has some other content, it is not modified.
-
- Returns an InodeCache if it populated the pointer file.
-}
populatePointerFile :: Restage -> Key -> RawFilePath -> RawFilePath -> Annex (Maybe InodeCache)
populatePointerFile restage k obj f = go =<< liftIO (isPointerFile f)
where
go (Just k') | k == k' = do
let f' = fromRawFilePath f
destmode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus f'
liftIO $ nukeFile f'
(ic, populated) <- replaceWorkTreeFile f' $ \tmp -> do
let tmp' = toRawFilePath tmp
ok <- linkOrCopy k (fromRawFilePath obj) tmp destmode >>= \case
Just _ -> thawContent tmp >> return True
Nothing -> liftIO (writePointerFile tmp' k destmode) >> return False
ic <- withTSDelta (liftIO . genInodeCache tmp')
return (ic, ok)
maybe noop (restagePointerFile restage f) ic
if populated
then return ic
else return Nothing
go _ = return Nothing
{- Removes the content from a pointer file, replacing it with a pointer.
-
- Does not check if the pointer file is modified. -}
depopulatePointerFile :: Key -> RawFilePath -> Annex ()
depopulatePointerFile key file = do
let file' = fromRawFilePath file
st <- liftIO $ catchMaybeIO $ getFileStatus file'
let mode = fmap fileMode st
secureErase file'
liftIO $ nukeFile file'
ic <- replaceWorkTreeFile file' $ \tmp -> do
liftIO $ writePointerFile (toRawFilePath tmp) key mode
#if ! defined(mingw32_HOST_OS)
-- Don't advance mtime; this avoids unncessary re-smudging
-- by git in some cases.
liftIO $ maybe noop
(\t -> touch tmp t False)
(fmap modificationTimeHiRes st)
#endif
withTSDelta (liftIO . genInodeCache (toRawFilePath tmp))
maybe noop (restagePointerFile (Restage True) file) ic

41
Annex/CurrentBranch.hs Normal file
View file

@ -0,0 +1,41 @@
{- currently checked out branch
-
- Copyright 2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.CurrentBranch where
import Annex.Common
import Types.AdjustedBranch
import Annex.AdjustedBranch.Name
import qualified Annex
import qualified Git
import qualified Git.Branch
type CurrBranch = (Maybe Git.Branch, Maybe Adjustment)
{- Gets the currently checked out branch.
- When on an adjusted branch, gets the original branch, and the adjustment.
-
- Cached for speed.
-
- Until a commit is made in a new repository, no branch is checked out.
- Since git-annex may make the first commit, this does not cache
- the absence of a branch.
-}
getCurrentBranch :: Annex CurrBranch
getCurrentBranch = maybe cache return
=<< Annex.getState Annex.cachedcurrentbranch
where
cache = inRepo Git.Branch.current >>= \case
Just b -> do
let v = case adjustedToOriginal b of
Nothing -> (Just b, Nothing)
Just (adj, origbranch) ->
(Just origbranch, Just adj)
Annex.changeState $ \s ->
s { Annex.cachedcurrentbranch = Just v }
return v
Nothing -> return (Nothing, Nothing)

58
Annex/Difference.hs Normal file
View file

@ -0,0 +1,58 @@
{- git-annex repository differences
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Difference (
module Types.Difference,
setDifferences,
) where
import Annex.Common
import Types.Difference
import Logs.Difference
import Config
import Annex.UUID
import Logs.UUID
import Annex.Version
import qualified Annex
import qualified Data.Map as M
-- Differences are only allowed to be tweaked when initializing a
-- repository for the first time, and then only if there is not another
-- known uuid. If the repository was cloned from elsewhere, it inherits
-- the existing settings.
--
-- Must be called before setVersion, so it can check if this is the first
-- time the repository is being initialized.
setDifferences :: Annex ()
setDifferences = do
u <- getUUID
otherds <- allDifferences <$> recordedDifferences
ds <- mappend otherds . annexDifferences <$> Annex.getGitConfig
when (ds /= mempty) $ do
ds' <- ifM (isJust <$> getVersion)
( do
oldds <- recordedDifferencesFor u
when (ds /= oldds) $
warning "Cannot change tunable parameters in already initialized repository."
return oldds
, if otherds == mempty
then ifM (any (/= u) . M.keys <$> uuidDescMap)
( do
warning "Cannot change tunable parameters in a clone of an existing repository."
return mempty
, return ds
)
else if otherds /= ds
then do
warning "The specified tunable parameters differ from values being used in other clones of this repository."
return otherds
else return ds
)
forM_ (listDifferences ds') $ \d ->
setConfig (differenceConfigKey d) (differenceConfigVal d)
recordDifferences ds' u

90
Annex/DirHashes.hs Normal file
View file

@ -0,0 +1,90 @@
{- git-annex file locations
-
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.DirHashes (
Hasher,
HashLevels(..),
objectHashLevels,
branchHashLevels,
branchHashDir,
dirHashes,
hashDirMixed,
hashDirLower,
display_32bits_as_dir
) where
import Data.Default
import Data.Bits
import qualified Data.ByteArray as BA
import qualified Data.ByteArray.Encoding as BA
import qualified Data.ByteString as S
import qualified System.FilePath.ByteString as P
import Common
import Key
import Types.GitConfig
import Types.Difference
import Utility.Hash
import Utility.MD5
type Hasher = Key -> RawFilePath
-- Number of hash levels to use. 2 is the default.
newtype HashLevels = HashLevels Int
instance Default HashLevels where
def = HashLevels 2
objectHashLevels :: GitConfig -> HashLevels
objectHashLevels = configHashLevels OneLevelObjectHash
branchHashLevels :: GitConfig -> HashLevels
branchHashLevels = configHashLevels OneLevelBranchHash
configHashLevels :: Difference -> GitConfig -> HashLevels
configHashLevels d config
| hasDifference d (annexDifferences config) = HashLevels 1
| otherwise = def
branchHashDir :: GitConfig -> Key -> S.ByteString
branchHashDir = hashDirLower . branchHashLevels
{- Two different directory hashes may be used. The mixed case hash
- came first, and is fine, except for the problem of case-strict
- filesystems such as Linux VFAT (mounted with shortname=mixed),
- which do not allow using a directory "XX" when "xx" already exists.
- To support that, some git-annex repositories use the lower case-hash.
- All special remotes use the lower-case hash for new data, but old data
- may still use the mixed case hash. -}
dirHashes :: [HashLevels -> Hasher]
dirHashes = [hashDirLower, hashDirMixed]
hashDirs :: HashLevels -> Int -> S.ByteString -> RawFilePath
hashDirs (HashLevels 1) sz s = P.addTrailingPathSeparator $ S.take sz s
hashDirs _ sz s = P.addTrailingPathSeparator $ h P.</> t
where
(h, t) = S.splitAt sz s
hashDirLower :: HashLevels -> Hasher
hashDirLower n k = hashDirs n 3 $ S.pack $ take 6 $ conv $
md5s $ serializeKey' $ nonChunkKey k
where
conv v = BA.unpack $
(BA.convertToBase BA.Base16 v :: BA.Bytes)
{- This was originally using Data.Hash.MD5 from MissingH. This new version
- is faster, but ugly as it has to replicate the 4 Word32's that produced. -}
hashDirMixed :: HashLevels -> Hasher
hashDirMixed n k = hashDirs n 2 $ S.pack $ take 4 $
concatMap display_32bits_as_dir $
encodeWord32 $ map fromIntegral $ BA.unpack $
Utility.Hash.md5s $ serializeKey' $ nonChunkKey k
where
encodeWord32 (b1:b2:b3:b4:rest) =
(shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1)
: encodeWord32 rest
encodeWord32 _ = []

133
Annex/Drop.hs Normal file
View file

@ -0,0 +1,133 @@
{- dropping of unwanted content
-
- Copyright 2012-2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Drop where
import Annex.Common
import qualified Annex
import Logs.Trust
import Annex.NumCopies
import Types.Remote (uuid, appendonly, config)
import qualified Remote
import qualified Command.Drop
import Command
import Annex.Wanted
import Annex.Content
import Annex.SpecialRemote.Config
import qualified Database.Keys
import Git.FilePath
import qualified Data.Set as S
import System.Log.Logger (debugM)
type Reason = String
{- Drop a key from local and/or remote when allowed by the preferred content
- and numcopies settings.
-
- Skips trying to drop from remotes that are appendonly, since those drops
- would presumably fail. Also skips dropping from exporttree remotes,
- which don't allow dropping individual keys.
-
- The UUIDs are ones where the content is believed to be present.
- The Remote list can include other remotes that do not have the content;
- only ones that match the UUIDs will be dropped from.
-
- If allowed to drop fromhere, that drop will be done last. This is done
- because local drops do not need any LockedCopy evidence, and so dropping
- from local last allows the content to be removed from more remotes.
-
- A VerifiedCopy can be provided as an optimisation when eg, a key
- has just been uploaded to a remote.
-
- The runner is used to run CommandStart sequentially, it's typically
- callCommandAction.
-}
handleDropsFrom :: [UUID] -> [Remote] -> Reason -> Bool -> Key -> AssociatedFile -> SeekInput -> [VerifiedCopy] -> (CommandStart -> CommandCleanup) -> Annex ()
handleDropsFrom locs rs reason fromhere key afile si preverified runner = do
g <- Annex.gitRepo
l <- map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
let fs = case afile of
AssociatedFile (Just f) -> nub (f : l)
AssociatedFile Nothing -> l
n <- getcopies fs
void $ if fromhere && checkcopies n Nothing
then go fs rs n >>= dropl fs
else go fs rs n
where
getcopies fs = do
(untrusted, have) <- trustPartition UnTrusted locs
numcopies <- if null fs
then getNumCopies
else maximum <$> mapM (getFileNumCopies . fromRawFilePath) fs
return (NumCopies (length have), numcopies, S.fromList untrusted)
{- Check that we have enough copies still to drop the content.
- When the remote being dropped from is untrusted, it was not
- counted as a copy, so having only numcopies suffices. Otherwise,
- we need more than numcopies to safely drop. -}
checkcopies (have, numcopies, _untrusted) Nothing = have > numcopies
checkcopies (have, numcopies, untrusted) (Just u)
| S.member u untrusted = have >= numcopies
| otherwise = have > numcopies
decrcopies (have, numcopies, untrusted) Nothing =
(NumCopies (fromNumCopies have - 1), numcopies, untrusted)
decrcopies v@(_have, _numcopies, untrusted) (Just u)
| S.member u untrusted = v
| otherwise = decrcopies v Nothing
go _ [] n = pure n
go fs (r:rest) n
| uuid r `S.notMember` slocs = go fs rest n
| appendonly r = go fs rest n
| exportTree (config r) = go fs rest n
| checkcopies n (Just $ Remote.uuid r) =
dropr fs r n >>= go fs rest
| otherwise = pure n
checkdrop fs n u a
| null fs = check $ -- no associated files; unused content
wantDrop True u (Just key) (AssociatedFile Nothing)
| otherwise = check $
allM (wantDrop True u (Just key) . AssociatedFile . Just) fs
where
check c = ifM c
( dodrop n u a
, return n
)
dodrop n@(have, numcopies, _untrusted) u a =
ifM (safely $ runner $ a numcopies)
( do
liftIO $ debugM "drop" $ unwords
[ "dropped"
, case afile of
AssociatedFile Nothing -> serializeKey key
AssociatedFile (Just af) -> fromRawFilePath af
, "(from " ++ maybe "here" show u ++ ")"
, "(copies now " ++ show (fromNumCopies have - 1) ++ ")"
, ": " ++ reason
]
return $ decrcopies n u
, return n
)
dropl fs n = checkdrop fs n Nothing $ \numcopies ->
stopUnless (inAnnex key) $
Command.Drop.startLocal afile ai si numcopies key preverified
dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \numcopies ->
Command.Drop.startRemote afile ai si numcopies key r
ai = mkActionItem (key, afile)
slocs = S.fromList locs
safely a = either (const False) id <$> tryNonAsync a

52
Annex/Environment.hs Normal file
View file

@ -0,0 +1,52 @@
{- git-annex environment
-
- Copyright 2012, 2013 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Environment where
import Annex.Common
import Utility.UserInfo
import qualified Git.Config
import Config
import Utility.Env.Set
{- Checks that the system's environment allows git to function.
- Git requires a GECOS username, or suitable git configuration, or
- environment variables.
-
- Git also requires the system have a hostname containing a dot.
- Otherwise, it tries various methods to find a FQDN, and will fail if it
- does not. To avoid replicating that code here, which would break if its
- methods change, this function does not check the hostname is valid.
- Instead, code that commits can use ensureCommit.
-}
checkEnvironment :: Annex ()
checkEnvironment = do
gitusername <- fromRepo $ Git.Config.getMaybe "user.name"
when (isNothing gitusername || gitusername == Just "") $
liftIO checkEnvironmentIO
checkEnvironmentIO :: IO ()
checkEnvironmentIO = whenM (isNothing <$> myUserGecos) $ do
username <- either (const "unknown") id <$> myUserName
ensureEnv "GIT_AUTHOR_NAME" username
ensureEnv "GIT_COMMITTER_NAME" username
where
-- existing environment is not overwritten
ensureEnv var val = setEnv var val False
{- Runs an action that commits to the repository, and if it fails,
- sets user.email and user.name to a dummy value and tries the action again. -}
ensureCommit :: Annex a -> Annex a
ensureCommit a = either retry return =<< tryNonAsync a
where
retry _ = do
name <- liftIO $ either (const "unknown") id <$> myUserName
setConfig "user.name" name
setConfig "user.email" name
a

52
Annex/Export.hs Normal file
View file

@ -0,0 +1,52 @@
{- git-annex exports
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Export where
import Annex
import Annex.CatFile
import Types
import Types.Key
import qualified Git
import qualified Types.Remote as Remote
import Messages
import Control.Applicative
import Data.Maybe
import Prelude
-- An export includes both annexed files and files stored in git.
-- For the latter, a SHA1 key is synthesized.
data ExportKey = AnnexKey Key | GitKey Key
deriving (Show, Eq, Ord)
asKey :: ExportKey -> Key
asKey (AnnexKey k) = k
asKey (GitKey k) = k
exportKey :: Git.Sha -> Annex ExportKey
exportKey sha = mk <$> catKey sha
where
mk (Just k) = AnnexKey k
mk Nothing = GitKey $ mkKey $ \k -> k
{ keyName = Git.fromRef' sha
, keyVariety = SHA1Key (HasExt False)
, keySize = Nothing
, keyMtime = Nothing
, keyChunkSize = Nothing
, keyChunkNum = Nothing
}
warnExportImportConflict :: Remote -> Annex ()
warnExportImportConflict r = do
ops <- Remote.isImportSupported r >>= return . \case
True -> "exported to and/or imported from"
False -> "exported to"
toplevelWarning True $
"Conflict detected. Different trees have been " ++ ops ++
Remote.name r ++
". Use git-annex export to resolve this conflict."

View file

@ -0,0 +1,93 @@
{- External addon processes for special remotes and backends.
-
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.ExternalAddonProcess where
import qualified Annex
import Annex.Common
import Git.Env
import Utility.Shell
import Messages.Progress
import Control.Concurrent.Async
import System.Log.Logger (debugM)
data ExternalAddonProcess = ExternalAddonProcess
{ externalSend :: Handle
, externalReceive :: Handle
-- Shut down the process. With True, it's forced to stop
-- immediately.
, externalShutdown :: Bool -> IO ()
, externalPid :: ExternalAddonPID
, externalProgram :: String
}
type ExternalAddonPID = Int
data ExternalAddonStartError
= ProgramNotInstalled String
| ProgramFailure String
startExternalAddonProcess :: String -> ExternalAddonPID -> Annex (Either ExternalAddonStartError ExternalAddonProcess)
startExternalAddonProcess basecmd pid = do
errrelayer <- mkStderrRelayer
g <- Annex.gitRepo
cmdpath <- liftIO $ searchPath basecmd
liftIO $ start errrelayer g cmdpath
where
start errrelayer g cmdpath = do
(cmd, ps) <- maybe (pure (basecmd, [])) findShellCommand cmdpath
let basep = (proc cmd (toCommand ps))
{ std_in = CreatePipe
, std_out = CreatePipe
, std_err = CreatePipe
}
p <- propgit g basep
tryNonAsync (createProcess p) >>= \case
Right v -> (Right <$> started cmd errrelayer v)
`catchNonAsync` const (runerr cmdpath)
Left _ -> runerr cmdpath
started cmd errrelayer pall@(Just hin, Just hout, Just herr, ph) = do
stderrelay <- async $ errrelayer herr
let shutdown forcestop = do
cancel stderrelay
if forcestop
then cleanupProcess pall
else flip onException (cleanupProcess pall) $ do
hClose herr
hClose hin
hClose hout
void $ waitForProcess ph
return $ ExternalAddonProcess
{ externalSend = hin
, externalReceive = hout
, externalPid = pid
, externalShutdown = shutdown
, externalProgram = cmd
}
started _ _ _ = giveup "internal"
propgit g p = do
environ <- propGitEnv g
return $ p { env = Just environ }
runerr (Just cmd) =
return $ Left $ ProgramFailure $
"Cannot run " ++ cmd ++ " -- Make sure it's executable and that its dependencies are installed."
runerr Nothing = do
path <- intercalate ":" <$> getSearchPath
return $ Left $ ProgramNotInstalled $
"Cannot run " ++ basecmd ++ " -- It is not installed in PATH (" ++ path ++ ")"
protocolDebug :: ExternalAddonProcess -> Bool -> String -> IO ()
protocolDebug external sendto line = debugM "external" $ unwords
[ externalProgram external ++
"[" ++ show (externalPid external) ++ "]"
, if sendto then "<--" else "-->"
, line
]

275
Annex/FileMatcher.hs Normal file
View file

@ -0,0 +1,275 @@
{- git-annex file matching
-
- Copyright 2012-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.FileMatcher (
GetFileMatcher,
checkFileMatcher,
checkFileMatcher',
checkMatcher,
checkMatcher',
matchAll,
PreferredContentData(..),
preferredContentTokens,
preferredContentKeylessTokens,
preferredContentParser,
ParseToken,
parsedToMatcher,
mkMatchExpressionParser,
largeFilesMatcher,
AddUnlockedMatcher,
addUnlockedMatcher,
checkAddUnlockedMatcher,
module Types.FileMatcher
) where
import qualified Data.Map as M
import Annex.Common
import Limit
import Utility.Matcher
import Types.Group
import Types.FileMatcher
import Types.GitConfig
import Config.GitConfig
import Annex.SpecialRemote.Config (preferreddirField)
import Git.FilePath
import Types.Remote (RemoteConfig)
import Types.ProposedAccepted
import Annex.CheckAttr
import Git.CheckAttr (unspecifiedAttr)
import qualified Git.Config
#ifdef WITH_MAGICMIME
import Annex.Magic
#endif
import Data.Either
import qualified Data.Set as S
type GetFileMatcher = FilePath -> Annex (FileMatcher Annex)
checkFileMatcher :: GetFileMatcher -> FilePath -> Annex Bool
checkFileMatcher getmatcher file = checkFileMatcher' getmatcher file (return True)
-- | Allows running an action when no matcher is configured for the file.
checkFileMatcher' :: GetFileMatcher -> FilePath -> Annex Bool -> Annex Bool
checkFileMatcher' getmatcher file notconfigured = do
matcher <- getmatcher file
checkMatcher matcher Nothing afile S.empty notconfigured d
where
afile = AssociatedFile (Just (toRawFilePath file))
-- checkMatcher will never use this, because afile is provided.
d = return True
checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Annex Bool -> Annex Bool -> Annex Bool
checkMatcher matcher mkey afile notpresent notconfigured d
| isEmpty matcher = notconfigured
| otherwise = case (mkey, afile) of
(_, AssociatedFile (Just file)) -> go =<< fileMatchInfo file
(Just key, _) -> go (MatchingKey key afile)
_ -> d
where
go mi = checkMatcher' matcher mi notpresent
checkMatcher' :: FileMatcher Annex -> MatchInfo -> AssumeNotPresent -> Annex Bool
checkMatcher' matcher mi notpresent =
matchMrun matcher $ \o -> matchAction o notpresent mi
fileMatchInfo :: RawFilePath -> Annex MatchInfo
fileMatchInfo file = do
matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
return $ MatchingFile FileInfo
{ matchFile = matchfile
, contentFile = Just file
}
matchAll :: FileMatcher Annex
matchAll = generate []
parsedToMatcher :: [ParseResult (MatchFiles Annex)] -> Either String (FileMatcher Annex)
parsedToMatcher parsed = case partitionEithers parsed of
([], vs) -> Right $ generate vs
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
data ParseToken t
= SimpleToken String (ParseResult t)
| ValueToken String (String -> ParseResult t)
type ParseResult t = Either String (Token t)
parseToken :: [ParseToken t] -> String -> ParseResult t
parseToken l t = case syntaxToken t of
Right st -> Right st
Left _ -> go l
where
go [] = Left $ "near " ++ show t
go (SimpleToken s r : _) | s == t = r
go (ValueToken s mkr : _) | s == k = mkr v
go (_ : ps) = go ps
(k, v) = separate (== '=') t
{- This is really dumb tokenization; there's no support for quoted values.
- Open and close parens are always treated as standalone tokens;
- otherwise tokens must be separated by whitespace. -}
tokenizeMatcher :: String -> [String]
tokenizeMatcher = filter (not . null) . concatMap splitparens . words
where
splitparens = segmentDelim (`elem` "()")
commonKeylessTokens :: LimitBy -> [ParseToken (MatchFiles Annex)]
commonKeylessTokens lb =
[ SimpleToken "anything" (simply limitAnything)
, SimpleToken "nothing" (simply limitNothing)
, ValueToken "include" (usev limitInclude)
, ValueToken "exclude" (usev limitExclude)
, ValueToken "largerthan" (usev $ limitSize lb (>))
, ValueToken "smallerthan" (usev $ limitSize lb (<))
]
commonKeyedTokens :: [ParseToken (MatchFiles Annex)]
commonKeyedTokens =
[ SimpleToken "unused" (simply limitUnused)
]
data PreferredContentData = PCD
{ matchStandard :: Either String (FileMatcher Annex)
, matchGroupWanted :: Either String (FileMatcher Annex)
, getGroupMap :: Annex GroupMap
, configMap :: M.Map UUID RemoteConfig
, repoUUID :: Maybe UUID
}
-- Tokens of preferred content expressions that do not need a Key to be
-- known.
--
-- When importing from a special remote, this is used to match
-- some preferred content expressions before the content is downloaded,
-- so the Key is not known.
preferredContentKeylessTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentKeylessTokens pcd =
[ SimpleToken "standard" (call $ matchStandard pcd)
, SimpleToken "groupwanted" (call $ matchGroupWanted pcd)
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
] ++ commonKeylessTokens LimitAnnexFiles
where
preferreddir = maybe "public" fromProposedAccepted $
M.lookup preferreddirField =<< (`M.lookup` configMap pcd) =<< repoUUID pcd
preferredContentKeyedTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentKeyedTokens pcd =
[ SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
, SimpleToken "securehash" (simply limitSecureHash)
, ValueToken "copies" (usev limitCopies)
, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
, ValueToken "inbacked" (usev limitInBackend)
, ValueToken "metadata" (usev limitMetaData)
, ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd)
] ++ commonKeyedTokens
preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentTokens pcd = concat
[ preferredContentKeylessTokens pcd
, preferredContentKeyedTokens pcd
]
preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher
mkMatchExpressionParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
mkMatchExpressionParser = do
#ifdef WITH_MAGICMIME
magicmime <- liftIO initMagicMime
let mimer n f = ValueToken n (usev $ f magicmime)
#else
let mimer n = ValueToken n $
const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
#endif
let parse = parseToken $
commonKeyedTokens ++
commonKeylessTokens LimitDiskFiles ++
#ifdef WITH_MAGICMIME
[ mimer "mimetype" $
matchMagic "mimetype" getMagicMimeType providedMimeType userProvidedMimeType
, mimer "mimeencoding" $
matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding userProvidedMimeEncoding
]
#else
[ mimer "mimetype"
, mimer "mimeencoding"
]
#endif
return $ map parse . tokenizeMatcher
{- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git.
-
- annex.largefiles is configured in git config, or git attributes,
- or global git-annex config, in that order.
-}
largeFilesMatcher :: Annex GetFileMatcher
largeFilesMatcher = go =<< getGitConfigVal' annexLargeFiles
where
go (HasGitConfig (Just expr)) = do
matcher <- mkmatcher expr "git config"
return $ const $ return matcher
go v = return $ \file -> do
expr <- checkAttr "annex.largefiles" file
if null expr || expr == unspecifiedAttr
then case v of
HasGlobalConfig (Just expr') ->
mkmatcher expr' "git-annex config"
_ -> return matchAll
else mkmatcher expr "gitattributes"
mkmatcher expr cfgfrom = do
parser <- mkMatchExpressionParser
either (badexpr cfgfrom) return $ parsedToMatcher $ parser expr
badexpr cfgfrom e = giveup $ "bad annex.largefiles configuration in " ++ cfgfrom ++ ": " ++ e
newtype AddUnlockedMatcher = AddUnlockedMatcher (FileMatcher Annex)
addUnlockedMatcher :: Annex AddUnlockedMatcher
addUnlockedMatcher = AddUnlockedMatcher <$>
(go =<< getGitConfigVal' annexAddUnlocked)
where
go (HasGitConfig (Just expr)) = mkmatcher expr "git config"
go (HasGlobalConfig (Just expr)) = mkmatcher expr "git annex config"
go _ = matchalways False
mkmatcher :: String -> String -> Annex (FileMatcher Annex)
mkmatcher expr cfgfrom = case Git.Config.isTrueFalse expr of
Just b -> matchalways b
Nothing -> do
parser <- mkMatchExpressionParser
either (badexpr cfgfrom) return $ parsedToMatcher $ parser expr
badexpr cfgfrom e = giveup $ "bad annex.addunlocked configuration in " ++ cfgfrom ++ ": " ++ e
matchalways True = return $ MOp limitAnything
matchalways False = return $ MOp limitNothing
checkAddUnlockedMatcher :: AddUnlockedMatcher -> MatchInfo -> Annex Bool
checkAddUnlockedMatcher (AddUnlockedMatcher matcher) mi =
checkMatcher' matcher mi S.empty
simply :: MatchFiles Annex -> ParseResult (MatchFiles Annex)
simply = Right . Operation
usev :: MkLimit Annex -> String -> ParseResult (MatchFiles Annex)
usev a v = Operation <$> a v
call :: Either String (FileMatcher Annex) -> ParseResult (MatchFiles Annex)
call (Right sub) = Right $ Operation $ MatchFiles
{ matchAction = \notpresent mi ->
matchMrun sub $ \o -> matchAction o notpresent mi
, matchNeedsFileName = any matchNeedsFileName sub
, matchNeedsFileContent = any matchNeedsFileContent sub
, matchNeedsKey = any matchNeedsKey sub
, matchNeedsLocationLog = any matchNeedsLocationLog sub
}
call (Left err) = Left err

156
Annex/Fixup.hs Normal file
View file

@ -0,0 +1,156 @@
{- git-annex repository fixups
-
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Fixup where
import Git.Types
import Git.Config
import Types.GitConfig
import Utility.Path
import Utility.SafeCommand
import Utility.Directory
import Utility.Exception
import Utility.Monad
import Utility.FileSystemEncoding
import Utility.PartialPrelude
import System.IO
import System.FilePath
import System.PosixCompat.Files
import Data.List
import Data.Maybe
import Control.Monad
import Control.Monad.IfElse
import qualified Data.Map as M
import qualified System.FilePath.ByteString as P
import qualified Data.ByteString as S
import Control.Applicative
import Prelude
fixupRepo :: Repo -> GitConfig -> IO Repo
fixupRepo r c = do
let r' = disableWildcardExpansion r
r'' <- fixupUnusualRepos r' c
if annexDirect c
then return (fixupDirect r'')
else return r''
{- Disable git's built-in wildcard expansion, which is not wanted
- when using it as plumbing by git-annex. -}
disableWildcardExpansion :: Repo -> Repo
disableWildcardExpansion r = r
{ gitGlobalOpts = gitGlobalOpts r ++ [Param "--literal-pathspecs"] }
{- Direct mode repos have core.bare=true, but are not really bare.
- Fix up the Repo to be a non-bare repo, and arrange for git commands
- run by git-annex to be passed parameters that override this setting. -}
fixupDirect :: Repo -> Repo
fixupDirect r@(Repo { location = l@(Local { gitdir = d, worktree = Nothing }) }) = do
r
{ location = l { worktree = Just (toRawFilePath (parentDir (fromRawFilePath d))) }
, gitGlobalOpts = gitGlobalOpts r ++
[ Param "-c"
, Param $ fromConfigKey coreBare ++ "=" ++ boolConfig False
]
}
fixupDirect r = r
{- Submodules have their gitdir containing ".git/modules/", and
- have core.worktree set, and also have a .git file in the top
- of the repo. We need to unset core.worktree, and change the .git
- file into a symlink to the git directory. This way, annex symlinks will be
- of the usual .git/annex/object form, and will consistently work
- whether a repo is used as a submodule or not, and wheverever the
- submodule is mounted.
-
- git-worktree directories have a .git file.
- That needs to be converted to a symlink, and .git/annex made a symlink
- to the main repository's git-annex directory.
- The worktree shares git config with the main repository, so the same
- annex uuid and other configuration will be used in the worktree as in
- the main repository.
-
- git clone or init with --separate-git-dir similarly makes a .git file,
- which in that case points to a different git directory. It's
- also converted to a symlink so links to .git/annex will work.
-
- When the filesystem doesn't support symlinks, we cannot make .git
- into a symlink. But we don't need too, since the repo will use adjusted
- unlocked branches.
-
- Don't do any of this if the repo has not been initialized for git-annex
- use yet.
-}
fixupUnusualRepos :: Repo -> GitConfig -> IO Repo
fixupUnusualRepos r@(Repo { location = l@(Local { worktree = Just w, gitdir = d }) }) c
| isNothing (annexVersion c) = return r
| needsSubmoduleFixup r = do
when (coreSymlinks c) $
(replacedotgit >> unsetcoreworktree)
`catchNonAsync` \_e -> hPutStrLn stderr
"warning: unable to convert submodule to form that will work with git-annex"
return $ r'
{ config = M.delete "core.worktree" (config r)
}
| otherwise = ifM (needsGitLinkFixup r)
( do
when (coreSymlinks c) $
(replacedotgit >> worktreefixup)
`catchNonAsync` \_e -> hPutStrLn stderr
"warning: unable to convert .git file to symlink that will work with git-annex"
return r'
, return r
)
where
dotgit = w P.</> ".git"
dotgit' = fromRawFilePath dotgit
replacedotgit = whenM (doesFileExist dotgit') $ do
linktarget <- relPathDirToFile (fromRawFilePath w) (fromRawFilePath d)
nukeFile dotgit'
createSymbolicLink linktarget dotgit'
unsetcoreworktree =
maybe (error "unset core.worktree failed") (\_ -> return ())
=<< Git.Config.unset "core.worktree" r
worktreefixup =
-- git-worktree sets up a "commondir" file that contains
-- the path to the main git directory.
-- Using --separate-git-dir does not.
catchDefaultIO Nothing (headMaybe . lines <$> readFile (fromRawFilePath (d P.</> "commondir"))) >>= \case
Just gd -> do
-- Make the worktree's git directory
-- contain an annex symlink to the main
-- repository's annex directory.
let linktarget = gd </> "annex"
createSymbolicLink linktarget (dotgit' </> "annex")
Nothing -> return ()
-- Repo adjusted, so that symlinks to objects that get checked
-- in will have the usual path, rather than pointing off to the
-- real .git directory.
r'
| coreSymlinks c = r { location = l { gitdir = dotgit } }
| otherwise = r
fixupUnusualRepos r _ = return r
needsSubmoduleFixup :: Repo -> Bool
needsSubmoduleFixup (Repo { location = (Local { worktree = Just _, gitdir = d }) }) =
(".git" P.</> "modules") `S.isInfixOf` d
needsSubmoduleFixup _ = False
needsGitLinkFixup :: Repo -> IO Bool
needsGitLinkFixup (Repo { location = (Local { worktree = Just wt, gitdir = d }) })
-- Optimization: Avoid statting .git in the common case; only
-- when the gitdir is not in the usual place inside the worktree
-- might .git be a file.
| wt P.</> ".git" == d = return False
| otherwise = doesFileExist (fromRawFilePath (wt P.</> ".git"))
needsGitLinkFixup _ = return False

127
Annex/GitOverlay.hs Normal file
View file

@ -0,0 +1,127 @@
{- Temporarily changing how git-annex runs git commands.
-
- Copyright 2014-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.GitOverlay (
module Annex.GitOverlay,
AltIndexFile(..),
) where
import qualified Control.Exception as E
import Annex.Common
import Types.IndexFiles
import Git
import Git.Types
import Git.Index
import Git.Env
import qualified Annex
import qualified Annex.Queue
{- Runs an action using a different git index file. -}
withIndexFile :: AltIndexFile -> (FilePath -> Annex a) -> Annex a
withIndexFile i = withAltRepo usecachedgitenv restoregitenv
where
-- This is an optimisation. Since withIndexFile is run repeatedly,
-- typically with the same file, and addGitEnv uses the slow
-- getEnvironment when gitEnv is Nothing, and has to do a
-- nontrivial amount of work, we cache the modified environment
-- the first time, and reuse it in subsequent calls for the same
-- index file.
--
-- (This could be done at another level; eg when creating the
-- Git object in the first place, but it's more efficient to let
-- the environment be inherited in all calls to git where it
-- does not need to be modified.)
--
-- Also, the use of AltIndexFile avoids needing to construct
-- the FilePath each time, which saves enough time to be worth the
-- added complication.
usecachedgitenv g = case gitEnv g of
Nothing -> Annex.withState $ \s -> case Annex.cachedgitenv s of
Just (cachedi, cachedf, cachede) | i == cachedi ->
return (s, (g { gitEnv = Just cachede }, cachedf))
_ -> do
r@(g', f) <- addindex g
let cache = (,,)
<$> Just i
<*> Just f
<*> gitEnv g'
return (s { Annex.cachedgitenv = cache }, r)
Just _ -> liftIO $ addindex g
addindex g = do
f <- indexEnvVal $ case i of
AnnexIndexFile -> gitAnnexIndex g
ViewIndexFile -> gitAnnexViewIndex g
g' <- addGitEnv g indexEnv f
return (g', f)
restoregitenv g g' = g' { gitEnv = gitEnv g }
{- Runs an action using a different git work tree.
-
- Smudge and clean filters are disabled in this work tree. -}
withWorkTree :: FilePath -> Annex a -> Annex a
withWorkTree d a = withAltRepo
(\g -> return $ (g { location = modlocation (location g), gitGlobalOpts = gitGlobalOpts g ++ disableSmudgeConfig }, ()))
(\g g' -> g' { location = location g, gitGlobalOpts = gitGlobalOpts g })
(const a)
where
modlocation l@(Local {}) = l { worktree = Just (toRawFilePath d) }
modlocation _ = error "withWorkTree of non-local git repo"
disableSmudgeConfig = map Param
[ "-c", "filter.annex.smudge="
, "-c", "filter.annex.clean="
]
{- Runs an action with the git index file and HEAD, and a few other
- files that are related to the work tree coming from an overlay
- directory other than the usual. This is done by pointing
- GIT_COMMON_DIR at the regular git directory, and GIT_DIR at the
- overlay directory.
-
- Needs git 2.2.0 or newer.
-}
withWorkTreeRelated :: FilePath -> Annex a -> Annex a
withWorkTreeRelated d a = withAltRepo modrepo unmodrepo (const a)
where
modrepo g = liftIO $ do
g' <- addGitEnv g "GIT_COMMON_DIR"
=<< absPath (fromRawFilePath (localGitDir g))
g'' <- addGitEnv g' "GIT_DIR" d
return (g'' { gitEnvOverridesGitDir = True }, ())
unmodrepo g g' = g'
{ gitEnv = gitEnv g
, gitEnvOverridesGitDir = gitEnvOverridesGitDir g
}
withAltRepo
:: (Repo -> Annex (Repo, t))
-- ^ modify Repo
-> (Repo -> Repo -> Repo)
-- ^ undo modifications; first Repo is the original and second
-- is the one after running the action.
-> (t -> Annex a)
-> Annex a
withAltRepo modrepo unmodrepo a = do
g <- gitRepo
(g', t) <- modrepo g
q <- Annex.Queue.get
v <- tryNonAsync $ do
Annex.changeState $ \s -> s
{ Annex.repo = g'
-- Start a separate queue for any changes made
-- with the modified repo.
, Annex.repoqueue = Nothing
}
a t
void $ tryNonAsync Annex.Queue.flush
Annex.changeState $ \s -> s
{ Annex.repo = unmodrepo g (Annex.repo s)
, Annex.repoqueue = Just q
}
either E.throw return v

47
Annex/HashObject.hs Normal file
View file

@ -0,0 +1,47 @@
{- git hash-object interface, with handle automatically stored in the Annex monad
-
- Copyright 2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.HashObject (
hashFile,
hashBlob,
hashObjectHandle,
hashObjectStop,
) where
import Annex.Common
import qualified Git.HashObject
import qualified Annex
import Git.Types
hashObjectHandle :: Annex Git.HashObject.HashObjectHandle
hashObjectHandle = maybe startup return =<< Annex.getState Annex.hashobjecthandle
where
startup = do
h <- inRepo $ Git.HashObject.hashObjectStart True
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just h }
return h
hashObjectStop :: Annex ()
hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle
where
stop h = do
liftIO $ Git.HashObject.hashObjectStop h
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
return ()
hashFile :: FilePath -> Annex Sha
hashFile f = do
h <- hashObjectHandle
liftIO $ Git.HashObject.hashFile h f
{- Note that the content will be written to a temp file.
- So it may be faster to use Git.HashObject.hashObject for large
- blob contents. -}
hashBlob :: Git.HashObject.HashableBlob b => b -> Annex Sha
hashBlob content = do
h <- hashObjectHandle
liftIO $ Git.HashObject.hashBlob h content

97
Annex/Hook.hs Normal file
View file

@ -0,0 +1,97 @@
{- git-annex git hooks
-
- Note that it's important that the content of scripts installed by
- git-annex not change, otherwise removing old hooks using an old
- version of the script would fail.
-
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Hook where
import Annex.Common
import qualified Git.Hook as Git
import qualified Annex
import Utility.Shell
import qualified Data.Map as M
-- Remove all hooks.
unHook :: Annex ()
unHook = do
hookUnWrite preCommitHook
hookUnWrite postReceiveHook
hookUnWrite postCheckoutHook
hookUnWrite postMergeHook
hookUnWrite preCommitAnnexHook
hookUnWrite postUpdateAnnexHook
preCommitHook :: Git.Hook
preCommitHook = Git.Hook "pre-commit" (mkHookScript "git annex pre-commit .") []
postReceiveHook :: Git.Hook
postReceiveHook = Git.Hook "post-receive"
-- Only run git-annex post-receive when git-annex supports it,
-- to avoid failing if the repository with this hook is used
-- with an older version of git-annex.
(mkHookScript "if git annex post-receive --help >/dev/null 2>&1; then git annex post-receive; fi")
-- This is an old version of the hook script.
[ mkHookScript "git annex post-receive"
]
postCheckoutHook :: Git.Hook
postCheckoutHook = Git.Hook "post-checkout" smudgeHook []
postMergeHook :: Git.Hook
postMergeHook = Git.Hook "post-merge" smudgeHook []
-- Older versions of git-annex didn't support this command, but neither did
-- they support v7 repositories.
smudgeHook :: String
smudgeHook = mkHookScript "git annex smudge --update"
preCommitAnnexHook :: Git.Hook
preCommitAnnexHook = Git.Hook "pre-commit-annex" "" []
postUpdateAnnexHook :: Git.Hook
postUpdateAnnexHook = Git.Hook "post-update-annex" "" []
mkHookScript :: String -> String
mkHookScript s = unlines
[ shebang
, "# automatically configured by git-annex"
, s
]
hookWrite :: Git.Hook -> Annex ()
hookWrite h = unlessM (inRepo $ Git.hookWrite h) $
hookWarning h "already exists, not configuring"
hookUnWrite :: Git.Hook -> Annex ()
hookUnWrite h = unlessM (inRepo $ Git.hookUnWrite h) $
hookWarning h "contents modified; not deleting. Edit it to remove call to git annex."
hookWarning :: Git.Hook -> String -> Annex ()
hookWarning h msg = do
r <- gitRepo
warning $ Git.hookName h ++ " hook (" ++ Git.hookFile h r ++ ") " ++ msg
{- Runs a hook. To avoid checking if the hook exists every time,
- the existing hooks are cached. -}
runAnnexHook :: Git.Hook -> Annex ()
runAnnexHook hook = do
m <- Annex.getState Annex.existinghooks
case M.lookup hook m of
Just True -> run
Just False -> noop
Nothing -> do
exists <- inRepo $ Git.hookExists hook
Annex.changeState $ \s -> s
{ Annex.existinghooks = M.insert hook exists m }
when exists run
where
run = unlessM (inRepo $ Git.runHook hook) $ do
h <- fromRepo $ Git.hookFile hook
warning $ h ++ " failed"

678
Annex/Import.hs Normal file
View file

@ -0,0 +1,678 @@
{- git-annex import from remotes
-
- Copyright 2019-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Import (
ImportTreeConfig(..),
ImportCommitConfig(..),
buildImportCommit,
buildImportTrees,
canImportKeys,
importKeys,
makeImportMatcher,
getImportableContents,
) where
import Annex.Common
import Types.Import
import qualified Types.Remote as Remote
import Git.Types
import Git.Tree
import Git.Sha
import Git.FilePath
import Git.History
import qualified Git.Ref
import qualified Git.Branch
import qualified Annex
import Annex.Link
import Annex.LockFile
import Annex.Content
import Annex.Export
import Annex.RemoteTrackingBranch
import Annex.HashObject
import Annex.Transfer
import Annex.CheckIgnore
import Command
import Backend
import Types.Key
import Types.KeySource
import Messages.Progress
import Utility.DataUnits
import Utility.Metered
import Logs.Export
import Logs.Location
import Logs.PreferredContent
import Types.FileMatcher
import Annex.FileMatcher
import qualified Utility.Matcher
import qualified Database.Export as Export
import qualified Database.ContentIdentifier as CIDDb
import qualified Logs.ContentIdentifier as CIDLog
import Backend.Utilities
import Control.Concurrent.STM
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import qualified System.FilePath.Posix.ByteString as Posix
import qualified System.FilePath.ByteString as P
{- Configures how to build an import tree. -}
data ImportTreeConfig
= ImportTree
-- ^ Import the tree as-is from the remote.
| ImportSubTree TopFilePath Sha
-- ^ Import a tree from the remote and graft it into a subdirectory
-- of the existing tree whose Sha is provided, replacing anything
-- that was there before.
deriving (Show)
{- Configures how to build an import commit. -}
data ImportCommitConfig = ImportCommitConfig
{ importCommitTracking :: Maybe Sha
-- ^ Current commit on the remote tracking branch.
, importCommitMode :: Git.Branch.CommitMode
, importCommitMessage :: String
}
{- Buils a commit for an import from a special remote.
-
- When there are no changes to make (importCommitTracking
- already matches what was imported), returns Nothing.
-
- After importing from a remote, exporting the same thing back to the
- remote should be a no-op. So, the export log and database are
- updated to reflect the imported tree.
-
- This does not download any content from a remote. But since it needs the
- Key of imported files to be known, its caller will have to first download
- new files in order to generate keys for them.
-}
buildImportCommit
:: Remote
-> ImportTreeConfig
-> ImportCommitConfig
-> ImportableContents (Either Sha Key)
-> Annex (Maybe Ref)
buildImportCommit remote importtreeconfig importcommitconfig importable =
case importCommitTracking importcommitconfig of
Nothing -> go Nothing
Just trackingcommit -> inRepo (Git.Ref.tree trackingcommit) >>= \case
Nothing -> go Nothing
Just _ -> go (Just trackingcommit)
where
basetree = case importtreeconfig of
ImportTree -> emptyTree
ImportSubTree _ sha -> sha
subdir = case importtreeconfig of
ImportTree -> Nothing
ImportSubTree dir _ -> Just dir
go trackingcommit = do
imported@(History finaltree _) <-
buildImportTrees basetree subdir importable
buildImportCommit' remote importcommitconfig trackingcommit imported >>= \case
Just finalcommit -> do
updatestate finaltree
return (Just finalcommit)
Nothing -> return Nothing
updatestate committedtree = do
importedtree <- case subdir of
Nothing -> pure committedtree
Just dir ->
let subtreeref = Ref $
fromRef' committedtree
<> ":"
<> getTopFilePath dir
in fromMaybe emptyTree
<$> inRepo (Git.Ref.tree subtreeref)
updateexportdb importedtree
oldexport <- updateexportlog importedtree
updatelocationlog oldexport importedtree
updateexportdb importedtree = do
db <- Export.openDb (Remote.uuid remote)
Export.writeLockDbWhile db $ do
prevtree <- liftIO $ fromMaybe emptyTree
<$> Export.getExportTreeCurrent db
when (importedtree /= prevtree) $ do
Export.updateExportDb db prevtree importedtree
liftIO $ Export.recordExportTreeCurrent db importedtree
Export.closeDb db
updateexportlog importedtree = do
oldexport <- getExport (Remote.uuid remote)
recordExport (Remote.uuid remote) $ ExportChange
{ oldTreeish = exportedTreeishes oldexport
, newTreeish = importedtree
}
return oldexport
-- downloadImport takes care of updating the location log
-- for the local repo when keys are downloaded, and also updates
-- the location log for the remote for keys that are present in it.
-- That leaves updating the location log for the remote for keys
-- that have had the last copy of their content removed from it.
--
-- This must run after the export database has been updated
-- and flushed to disk, so it can query it.
updatelocationlog oldexport finaltree = do
let stillpresent db k = liftIO $ not . null
<$> Export.getExportedLocation db k
let updater db oldkey _newkey _ = case oldkey of
Just (AnnexKey k) -> unlessM (stillpresent db k) $
logChange k (Remote.uuid remote) InfoMissing
Just (GitKey _) -> noop
Nothing -> noop
db <- Export.openDb (Remote.uuid remote)
forM_ (exportedTreeishes oldexport) $ \oldtree ->
Export.runExportDiffUpdater updater db oldtree finaltree
Export.closeDb db
buildImportCommit' :: Remote -> ImportCommitConfig -> Maybe Sha -> History Sha -> Annex (Maybe Sha)
buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History ti _) =
case mtrackingcommit of
Nothing -> Just <$> mkcommitsunconnected imported
Just trackingcommit -> do
-- Get history of tracking branch to at most
-- one more level deep than what was imported,
-- so we'll have enough history to compare,
-- but not spend too much time getting it.
let maxdepth = succ importeddepth
inRepo (getHistoryToDepth maxdepth trackingcommit)
>>= go trackingcommit
where
go _ Nothing = Just <$> mkcommitsunconnected imported
go trackingcommit (Just h)
-- If the tracking branch head is a merge commit
-- and one side of the merge matches the history,
-- nothing new needs to be committed.
| t == ti && any sametodepth (S.toList s) = return Nothing
-- If the tracking branch matches the history,
-- nothing new needs to be committed.
-- (This is unlikely to happen.)
| sametodepth h' = return Nothing
| otherwise = do
importedcommit <- case getRemoteTrackingBranchImportHistory h of
Nothing -> mkcommitsunconnected imported
Just oldimported@(History oldhc _)
| importeddepth == 1 ->
mkcommitconnected imported oldimported
| otherwise -> do
let oldimportedtrees = mapHistory historyCommitTree oldimported
mknewcommits oldhc oldimportedtrees imported
ti' <- addBackExportExcluded remote ti
Just <$> makeRemoteTrackingBranchMergeCommit'
trackingcommit importedcommit ti'
where
h'@(History t s) = mapHistory historyCommitTree h
importeddepth = historyDepth imported
sametodepth b = imported == truncateHistoryToDepth importeddepth b
mkcommit parents tree = inRepo $ Git.Branch.commitTree
(importCommitMode importcommitconfig)
(importCommitMessage importcommitconfig)
parents
tree
-- Start a new history of import commits, not connected to any
-- prior import commits.
mkcommitsunconnected (History importedtree hs) = do
parents <- mapM mkcommitsunconnected (S.toList hs)
mkcommit parents importedtree
-- Commit the new history connected with the old history.
-- Used when the import is not versioned, so the history depth is 1.
mkcommitconnected (History importedtree _) (History oldhc _) = do
let parents = [historyCommit oldhc]
mkcommit parents importedtree
-- Reuse the commits from the old imported History when possible.
mknewcommits oldhc old new@(History importedtree hs)
| old == new = return $ historyCommit oldhc
| otherwise = do
parents <- mapM (mknewcommits oldhc old) (S.toList hs)
mkcommit parents importedtree
{- Builds a history of git trees reflecting the ImportableContents.
-
- When a subdir is provided, imported tree is grafted into the basetree at
- that location, replacing any object that was there.
-}
buildImportTrees
:: Ref
-> Maybe TopFilePath
-> ImportableContents (Either Sha Key)
-> Annex (History Sha)
buildImportTrees basetree msubdir importable = History
<$> (buildtree (importableContents importable) =<< Annex.gitRepo)
<*> buildhistory
where
buildhistory = S.fromList
<$> mapM (buildImportTrees basetree msubdir)
(importableHistory importable)
buildtree ls repo = withMkTreeHandle repo $ \hdl -> do
importtree <- liftIO . recordTree' hdl
. treeItemsToTree
=<< mapM mktreeitem ls
case msubdir of
Nothing -> return importtree
Just subdir -> liftIO $
graftTree' importtree subdir basetree repo hdl
mktreeitem (loc, v) = case v of
Right k -> do
relf <- fromRepo $ fromTopFilePath topf
symlink <- calcRepo $ gitAnnexLink (fromRawFilePath relf) k
linksha <- hashSymlink symlink
return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha
Left sha ->
return $ TreeItem treepath (fromTreeItemType TreeFile) sha
where
lf = fromImportLocation loc
treepath = asTopFilePath lf
topf = asTopFilePath $
maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir
canImportKeys :: Remote -> Bool -> Bool
canImportKeys remote importcontent =
importcontent || isJust (Remote.importKey ia)
where
ia = Remote.importActions remote
{- Downloads all new ContentIdentifiers, or when importcontent is False,
- generates Keys without downloading.
-
- Generates either a Key or a git Sha, depending on annex.largefiles.
- But when importcontent is False, it cannot match on annex.largefiles
- (or generate a git Sha), so always generates Keys.
-
- Supports concurrency when enabled.
-
- If it fails on any file, the whole thing fails with Nothing,
- but it will resume where it left off.
-
- Note that, when a ContentIdentifier has been imported before,
- generates the same thing that was imported before, so annex.largefiles
- is not reapplied.
-}
importKeys
:: Remote
-> ImportTreeConfig
-> Bool
-> ImportableContents (ContentIdentifier, ByteSize)
-> Annex (Maybe (ImportableContents (Either Sha Key)))
importKeys remote importtreeconfig importcontent importablecontents = do
unless (canImportKeys remote importcontent) $
giveup "This remote does not support importing without downloading content."
-- This map is used to remember content identifiers that
-- were just imported, before they have necessarily been
-- stored in the database. This way, if the same content
-- identifier appears multiple times in the
-- importablecontents (eg when it has a history),
-- they will only be imported once.
cidmap <- liftIO $ newTVarIO M.empty
-- When concurrency is enabled, this set is needed to
-- avoid two threads both importing the same content identifier.
importing <- liftIO $ newTVarIO S.empty
withExclusiveLock gitAnnexContentIdentifierLock $
bracket CIDDb.openDb CIDDb.closeDb $ \db -> do
CIDDb.needsUpdateFromLog db
>>= maybe noop (CIDDb.updateFromLog db)
go False cidmap importing importablecontents db
where
go oldversion cidmap importing (ImportableContents l h) db = do
largematcher <- largeFilesMatcher
jobs <- forM l $ \i ->
startimport cidmap importing db i oldversion largematcher
l' <- liftIO $ forM jobs $
either pure (atomically . takeTMVar)
if any isNothing l'
then return Nothing
else do
h' <- mapM (\ic -> go True cidmap importing ic db) h
if any isNothing h'
then return Nothing
else return $ Just $
ImportableContents
(catMaybes l')
(catMaybes h')
waitstart importing cid = liftIO $ atomically $ do
s <- readTVar importing
if S.member cid s
then retry
else writeTVar importing $ S.insert cid s
signaldone importing cid = liftIO $ atomically $ do
s <- readTVar importing
writeTVar importing $ S.delete cid s
startimport cidmap importing db i@(loc, (cid, _sz)) oldversion largematcher = getcidkey cidmap db cid >>= \case
(k:ks) ->
-- If the same content was imported before
-- yeilding multiple different keys, it's not clear
-- which is best to use this time, so pick the
-- first in the list. But, if any of them is a
-- git sha, use it, because the content must
-- be included in the git repo then.
let v = case mapMaybe keyGitSha (k:ks) of
(sha:_) -> Left sha
[] -> Right k
in return $ Left $ Just (loc, v)
[] -> do
job <- liftIO $ newEmptyTMVarIO
let ai = ActionItemOther (Just (fromRawFilePath (fromImportLocation loc)))
let si = SeekInput []
let importaction = starting ("import " ++ Remote.name remote) ai si $ do
when oldversion $
showNote "old version"
tryNonAsync (importordownload cidmap db i largematcher) >>= \case
Left e -> next $ do
warning (show e)
liftIO $ atomically $
putTMVar job Nothing
return False
Right r -> next $ do
liftIO $ atomically $
putTMVar job r
return True
commandAction $ bracket_
(waitstart importing cid)
(signaldone importing cid)
importaction
return (Right job)
importordownload cidmap db (loc, (cid, sz)) largematcher= do
f <- locworktreefile loc
matcher <- largematcher (fromRawFilePath f)
-- When importing a key is supported, always use it rather
-- than downloading and retrieving a key, to avoid
-- generating trees with different keys for the same content.
let act = if importcontent
then case Remote.importKey ia of
Nothing -> dodownload
Just _ -> if Utility.Matcher.introspect matchNeedsFileContent matcher
then dodownload
else doimport
else doimport
act cidmap db (loc, (cid, sz)) f matcher
doimport cidmap db (loc, (cid, sz)) f matcher =
case Remote.importKey ia of
Nothing -> error "internal" -- checked earlier
Just importkey -> do
when (Utility.Matcher.introspect matchNeedsFileContent matcher) $
giveup "annex.largefiles configuration examines file contents, so cannot import without content."
let mi = MatchingInfo ProvidedInfo
{ providedFilePath = f
, providedKey = Nothing
, providedFileSize = sz
, providedMimeType = Nothing
, providedMimeEncoding = Nothing
}
islargefile <- checkMatcher' matcher mi mempty
metered Nothing sz $ const $ if islargefile
then doimportlarge importkey cidmap db loc cid sz f
else doimportsmall cidmap db loc cid sz
doimportlarge importkey cidmap db loc cid sz f p =
tryNonAsync importer >>= \case
Right (Just (k, True)) -> return $ Just (loc, Right k)
Right _ -> return Nothing
Left e -> do
warning (show e)
return Nothing
where
importer = do
unsizedk <- importkey loc cid
-- Don't display progress when generating
-- key, if the content will later be
-- downloaded, which is a more expensive
-- operation generally.
(if importcontent then nullMeterUpdate else p)
-- This avoids every remote needing
-- to add the size.
let k = alterKey unsizedk $ \kd -> kd
{ keySize = keySize kd <|> Just sz }
checkSecureHashes k >>= \case
Nothing -> do
recordcidkey cidmap db cid k
logChange k (Remote.uuid remote) InfoPresent
if importcontent
then getcontent k
else return (Just (k, True))
Just msg -> giveup (msg ++ " to import")
getcontent :: Key -> Annex (Maybe (Key, Bool))
getcontent k = do
let af = AssociatedFile (Just f)
let downloader p' tmpfile = do
k' <- Remote.retrieveExportWithContentIdentifier
ia loc cid tmpfile
(pure k)
(combineMeterUpdate p' p)
ok <- moveAnnex k' tmpfile
when ok $
logStatus k InfoPresent
return (Just (k', ok))
checkDiskSpaceToGet k Nothing $
notifyTransfer Download af $
download (Remote.uuid remote) k af stdRetry $ \p' ->
withTmp k $ downloader p'
-- The file is small, so is added to git, so while importing
-- without content does not retrieve annexed files, it does
-- need to retrieve this file.
doimportsmall cidmap db loc cid sz p = do
let downloader tmpfile = do
k <- Remote.retrieveExportWithContentIdentifier
ia loc cid tmpfile
(mkkey tmpfile)
p
case keyGitSha k of
Just sha -> do
recordcidkey cidmap db cid k
return sha
Nothing -> error "internal"
checkDiskSpaceToGet tmpkey Nothing $
withTmp tmpkey $ \tmpfile ->
tryNonAsync (downloader tmpfile) >>= \case
Right sha -> return $ Just (loc, Left sha)
Left e -> do
warning (show e)
return Nothing
where
tmpkey = importKey cid sz
mkkey tmpfile = gitShaKey <$> hashFile tmpfile
dodownload cidmap db (loc, (cid, sz)) f matcher = do
let af = AssociatedFile (Just f)
let downloader tmpfile p = do
k <- Remote.retrieveExportWithContentIdentifier
ia loc cid tmpfile
(mkkey tmpfile)
p
case keyGitSha k of
Nothing -> do
ok <- moveAnnex k tmpfile
when ok $ do
recordcidkey cidmap db cid k
logStatus k InfoPresent
logChange k (Remote.uuid remote) InfoPresent
return (Right k, ok)
Just sha -> do
recordcidkey cidmap db cid k
return (Left sha, True)
let rundownload tmpfile p = tryNonAsync (downloader tmpfile p) >>= \case
Right (v, True) -> return $ Just (loc, v)
Right (_, False) -> return Nothing
Left e -> do
warning (show e)
return Nothing
checkDiskSpaceToGet tmpkey Nothing $
notifyTransfer Download af $
download (Remote.uuid remote) tmpkey af stdRetry $ \p ->
withTmp tmpkey $ \tmpfile ->
metered (Just p) tmpkey $
const (rundownload tmpfile)
where
tmpkey = importKey cid sz
mkkey tmpfile = do
let mi = MatchingFile FileInfo
{ matchFile = f
, contentFile = Just (toRawFilePath tmpfile)
}
islargefile <- checkMatcher' matcher mi mempty
if islargefile
then do
backend <- chooseBackend (fromRawFilePath f)
let ks = KeySource
{ keyFilename = f
, contentLocation = toRawFilePath tmpfile
, inodeCache = Nothing
}
fst <$> genKey ks nullMeterUpdate backend
else gitShaKey <$> hashFile tmpfile
ia = Remote.importActions remote
locworktreefile loc = fromRepo $ fromTopFilePath $ asTopFilePath $
case importtreeconfig of
ImportTree -> fromImportLocation loc
ImportSubTree subdir _ ->
getTopFilePath subdir P.</> fromImportLocation loc
getcidkey cidmap db cid = liftIO $
CIDDb.getContentIdentifierKeys db rs cid >>= \case
[] -> atomically $
maybeToList . M.lookup cid <$> readTVar cidmap
l -> return l
recordcidkey cidmap db cid k = do
liftIO $ atomically $ modifyTVar' cidmap $
M.insert cid k
liftIO $ CIDDb.recordContentIdentifier db rs cid k
CIDLog.recordContentIdentifier rs cid k
rs = Remote.remoteStateHandle remote
{- Temporary key used for import of a ContentIdentifier while downloading
- content, before generating its real key. -}
importKey :: ContentIdentifier -> Integer -> Key
importKey (ContentIdentifier cid) size = mkKey $ \k -> k
{ keyName = genKeyName (decodeBS cid)
, keyVariety = OtherKey "CID"
, keySize = Just size
}
{-- Export omits non-preferred content from the tree stored on the
-- remote. So the import will normally have that content
-- omitted (unless something else added files with the same names to the
-- special remote).
--
-- That presents a problem: Merging the imported tree would result
-- in deletion of the files that were excluded from export.
-- To avoid that happening, this adds them back to the imported tree.
--}
addBackExportExcluded :: Remote -> Sha -> Annex Sha
addBackExportExcluded remote importtree =
getExportExcluded (Remote.uuid remote) >>= \case
[] -> return importtree
excludedlist -> inRepo $
adjustTree
-- don't remove any
(pure . Just)
excludedlist
-- if something was imported with the same
-- name as a file that was previously
-- excluded from import, use what was imported
(\imported _excluded -> imported)
[]
importtree
{- Match the preferred content of the remote at import time.
-
- Only keyless tokens are supported, because the keys are not known
- until an imported file is downloaded, which is too late to bother
- excluding it from an import.
-}
makeImportMatcher :: Remote -> Annex (Either String (FileMatcher Annex))
makeImportMatcher r = load preferredContentKeylessTokens >>= \case
Nothing -> return $ Right matchAll
Just (Right v) -> return $ Right v
Just (Left err) -> load preferredContentTokens >>= \case
Just (Left err') -> return $ Left err'
_ -> return $ Left $
"The preferred content expression contains terms that cannot be checked when importing: " ++ err
where
load t = M.lookup (Remote.uuid r) . fst <$> preferredRequiredMapsLoad' t
{- Gets the ImportableContents from the remote.
-
- Filters out any paths that include a ".git" component, because git does
- not allow storing ".git" in a git repository. While it is possible to
- write a git tree that contains that, git will complain and refuse to
- check it out.
-
- Filters out new things not matching the FileMatcher or that are
- gitignored. However, files that are already in git get imported
- regardless. (Similar to how git add behaves on gitignored files.)
- This avoids creating a remote tracking branch that, when merged,
- would delete the files.
-}
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
getImportableContents r importtreeconfig ci matcher =
Remote.listImportableContents (Remote.importActions r) >>= \case
Nothing -> return Nothing
Just importable -> do
dbhandle <- Export.openDb (Remote.uuid r)
Just <$> filterunwanted dbhandle importable
where
filterunwanted dbhandle ic = ImportableContents
<$> filterM (wanted dbhandle) (importableContents ic)
<*> mapM (filterunwanted dbhandle) (importableHistory ic)
wanted dbhandle (loc, (_cid, sz))
| ingitdir = pure False
| otherwise =
isknown <||> (matches <&&> notignored)
where
-- Checks, from least to most expensive.
ingitdir = ".git" `elem` Posix.splitDirectories (fromImportLocation loc)
matches = matchesImportLocation matcher loc sz
isknown = isKnownImportLocation dbhandle loc
notignored = notIgnoredImportLocation importtreeconfig ci loc
isKnownImportLocation :: Export.ExportHandle -> ImportLocation -> Annex Bool
isKnownImportLocation dbhandle loc = liftIO $
not . null <$> Export.getExportTreeKey dbhandle loc
matchesImportLocation :: FileMatcher Annex -> ImportLocation -> Integer -> Annex Bool
matchesImportLocation matcher loc sz = checkMatcher' matcher mi mempty
where
mi = MatchingInfo $ ProvidedInfo
{ providedFilePath = fromImportLocation loc
, providedKey = Nothing
, providedFileSize = sz
, providedMimeType = Nothing
, providedMimeEncoding = Nothing
}
notIgnoredImportLocation :: ImportTreeConfig -> CheckGitIgnore -> ImportLocation -> Annex Bool
notIgnoredImportLocation importtreeconfig ci loc = not <$> checkIgnored ci f
where
f = fromRawFilePath $ case importtreeconfig of
ImportSubTree dir _ ->
getTopFilePath dir P.</> fromImportLocation loc
ImportTree ->
fromImportLocation loc

389
Annex/Ingest.hs Normal file
View file

@ -0,0 +1,389 @@
{- git-annex content ingestion
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Ingest (
LockedDown(..),
LockDownConfig(..),
lockDown,
ingestAdd,
ingestAdd',
ingest,
ingest',
finishIngestUnlocked,
cleanOldKeys,
addLink,
makeLink,
addUnlocked,
CheckGitIgnore(..),
gitAddParams,
addAnnexedFile,
) where
import Annex.Common
import Types.KeySource
import Types.FileMatcher
import Backend
import Annex.Content
import Annex.Perms
import Annex.Link
import Annex.MetaData
import Annex.CurrentBranch
import Annex.CheckIgnore
import Logs.Location
import qualified Annex
import qualified Annex.Queue
import qualified Database.Keys
import Config
import Utility.InodeCache
import Annex.ReplaceFile
import Utility.Tmp
import Utility.CopyFile
import Utility.Touch
import Utility.Metered
import Git.FilePath
import Annex.InodeSentinal
import Annex.AdjustedBranch
import Annex.FileMatcher
import Control.Exception (IOException)
import qualified Utility.RawFilePath as R
data LockedDown = LockedDown
{ lockDownConfig :: LockDownConfig
, keySource :: KeySource
}
deriving (Show)
data LockDownConfig = LockDownConfig
{ lockingFile :: Bool
-- ^ write bit removed during lock down
, hardlinkFileTmpDir :: Maybe FilePath
-- ^ hard link to temp directory
}
deriving (Show)
{- The file that's being ingested is locked down before a key is generated,
- to prevent it from being modified in between. This lock down is not
- perfect at best (and pretty weak at worst). For example, it does not
- guard against files that are already opened for write by another process.
- So, the InodeCache can be used to detect any changes that might be made
- to the file after it was locked down.
-
- When possible, the file is hard linked to a temp directory. This guards
- against some changes, like deletion or overwrite of the file, and
- allows lsof checks to be done more efficiently when adding a lot of files.
-
- Lockdown can fail if a file gets deleted, and Nothing will be returned.
-}
lockDown :: LockDownConfig -> FilePath -> Annex (Maybe LockedDown)
lockDown cfg file = either
(\e -> warning (show e) >> return Nothing)
(return . Just)
=<< lockDown' cfg file
lockDown' :: LockDownConfig -> FilePath -> Annex (Either IOException LockedDown)
lockDown' cfg file = tryIO $ ifM crippledFileSystem
( nohardlink
, case hardlinkFileTmpDir cfg of
Nothing -> nohardlink
Just tmpdir -> withhardlink tmpdir
)
where
file' = toRawFilePath file
nohardlink = withTSDelta $ liftIO . nohardlink'
nohardlink' delta = do
cache <- genInodeCache (toRawFilePath file) delta
return $ LockedDown cfg $ KeySource
{ keyFilename = file'
, contentLocation = file'
, inodeCache = cache
}
withhardlink tmpdir = do
when (lockingFile cfg) $
freezeContent file
withTSDelta $ \delta -> liftIO $ do
(tmpfile, h) <- openTempFile tmpdir $
relatedTemplate $ "ingest-" ++ takeFileName file
hClose h
nukeFile tmpfile
withhardlink' delta tmpfile
`catchIO` const (nohardlink' delta)
withhardlink' delta tmpfile = do
createLink file tmpfile
cache <- genInodeCache (toRawFilePath tmpfile) delta
return $ LockedDown cfg $ KeySource
{ keyFilename = file'
, contentLocation = toRawFilePath tmpfile
, inodeCache = cache
}
{- Ingests a locked down file into the annex. Updates the work tree and
- index. -}
ingestAdd :: CheckGitIgnore -> MeterUpdate -> Maybe LockedDown -> Annex (Maybe Key)
ingestAdd ci meterupdate ld = ingestAdd' ci meterupdate ld Nothing
ingestAdd' :: CheckGitIgnore -> MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key)
ingestAdd' _ _ Nothing _ = return Nothing
ingestAdd' ci meterupdate ld@(Just (LockedDown cfg source)) mk = do
(mk', mic) <- ingest meterupdate ld mk
case mk' of
Nothing -> return Nothing
Just k -> do
let f = keyFilename source
if lockingFile cfg
then addLink ci (fromRawFilePath f) k mic
else do
mode <- liftIO $ catchMaybeIO $
fileMode <$> R.getFileStatus (contentLocation source)
stagePointerFile f mode =<< hashPointerFile k
return (Just k)
{- Ingests a locked down file into the annex. Does not update the working
- tree or the index. -}
ingest :: MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key, Maybe InodeCache)
ingest meterupdate ld mk = ingest' Nothing meterupdate ld mk (Restage True)
ingest' :: Maybe Backend -> MeterUpdate -> Maybe LockedDown -> Maybe Key -> Restage -> Annex (Maybe Key, Maybe InodeCache)
ingest' _ _ Nothing _ _ = return (Nothing, Nothing)
ingest' preferredbackend meterupdate (Just (LockedDown cfg source)) mk restage = withTSDelta $ \delta -> do
k <- case mk of
Nothing -> do
backend <- maybe
(chooseBackend $ fromRawFilePath $ keyFilename source)
(return . Just)
preferredbackend
fst <$> genKey source meterupdate backend
Just k -> return k
let src = contentLocation source
ms <- liftIO $ catchMaybeIO $ R.getFileStatus src
mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta (fromRawFilePath src)) ms
case (mcache, inodeCache source) of
(_, Nothing) -> go k mcache ms
(Just newc, Just c) | compareStrong c newc -> go k mcache ms
_ -> failure "changed while it was being added"
where
go key mcache (Just s)
| lockingFile cfg = golocked key mcache s
| otherwise = gounlocked key mcache s
go _ _ Nothing = failure "failed to generate a key"
golocked key mcache s =
tryNonAsync (moveAnnex key $ fromRawFilePath $ contentLocation source) >>= \case
Right True -> do
populateAssociatedFiles key source restage
success key mcache s
Right False -> giveup "failed to add content to annex"
Left e -> restoreFile (fromRawFilePath $ keyFilename source) key e
gounlocked key (Just cache) s = do
-- Remove temp directory hard link first because
-- linkToAnnex falls back to copying if a file
-- already has a hard link.
cleanCruft source
cleanOldKeys (keyFilename source) key
linkToAnnex key (fromRawFilePath $ keyFilename source) (Just cache) >>= \case
LinkAnnexFailed -> failure "failed to link to annex"
_ -> do
finishIngestUnlocked' key source restage
success key (Just cache) s
gounlocked _ _ _ = failure "failed statting file"
success k mcache s = do
genMetaData k (keyFilename source) s
return (Just k, mcache)
failure msg = do
warning $ fromRawFilePath (keyFilename source) ++ " " ++ msg
cleanCruft source
return (Nothing, Nothing)
finishIngestUnlocked :: Key -> KeySource -> Annex ()
finishIngestUnlocked key source = do
cleanCruft source
finishIngestUnlocked' key source (Restage True)
finishIngestUnlocked' :: Key -> KeySource -> Restage -> Annex ()
finishIngestUnlocked' key source restage = do
Database.Keys.addAssociatedFile key
=<< inRepo (toTopFilePath (keyFilename source))
populateAssociatedFiles key source restage
{- Copy to any other locations using the same key. -}
populateAssociatedFiles :: Key -> KeySource -> Restage -> Annex ()
populateAssociatedFiles key source restage = do
obj <- calcRepo (gitAnnexLocation key)
g <- Annex.gitRepo
ingestedf <- flip fromTopFilePath g
<$> inRepo (toTopFilePath (keyFilename source))
afs <- map (`fromTopFilePath` g) <$> Database.Keys.getAssociatedFiles key
forM_ (filter (/= ingestedf) afs) $
populatePointerFile restage key obj
cleanCruft :: KeySource -> Annex ()
cleanCruft source = when (contentLocation source /= keyFilename source) $
liftIO $ nukeFile $ fromRawFilePath $ contentLocation source
-- If a worktree file was was hard linked to an annex object before,
-- modifying the file would have caused the object to have the wrong
-- content. Clean up from that.
cleanOldKeys :: RawFilePath -> Key -> Annex ()
cleanOldKeys file newkey = do
g <- Annex.gitRepo
topf <- inRepo (toTopFilePath file)
ingestedf <- fromRepo $ fromTopFilePath topf
oldkeys <- filter (/= newkey)
<$> Database.Keys.getAssociatedKey topf
forM_ oldkeys $ \key ->
unlessM (isUnmodified key =<< calcRepo (gitAnnexLocation key)) $ do
caches <- Database.Keys.getInodeCaches key
unlinkAnnex key
fs <- filter (/= ingestedf)
. map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
filterM (`sameInodeCache` caches) fs >>= \case
-- If linkToAnnex fails, the associated
-- file with the content is still present,
-- so no need for any recovery.
(f:_) -> do
ic <- withTSDelta (liftIO . genInodeCache f)
void $ linkToAnnex key (fromRawFilePath f) ic
_ -> logStatus key InfoMissing
{- On error, put the file back so it doesn't seem to have vanished.
- This can be called before or after the symlink is in place. -}
restoreFile :: FilePath -> Key -> SomeException -> Annex a
restoreFile file key e = do
whenM (inAnnex key) $ do
liftIO $ nukeFile file
-- The key could be used by other files too, so leave the
-- content in the annex, and make a copy back to the file.
obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $
warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj
thawContent file
throwM e
{- Creates the symlink to the annexed content, returns the link target. -}
makeLink :: FilePath -> Key -> Maybe InodeCache -> Annex String
makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do
l <- calcRepo $ gitAnnexLink file key
replaceWorkTreeFile file $ makeAnnexLink l . toRawFilePath
-- touch symlink to have same time as the original file,
-- as provided in the InodeCache
case mcache of
Just c -> liftIO $ touch file (inodeCacheToMtime c) False
Nothing -> noop
return l
{- Creates the symlink to the annexed content, and stages it in git.
-
- As long as the filesystem supports symlinks, we use
- git add, rather than directly staging the symlink to git.
- Using git add is best because it allows the queuing to work
- and is faster (staging the symlink runs hash-object commands each time).
- Also, using git add allows it to skip gitignored files, unless forced
- to include them.
-}
addLink :: CheckGitIgnore -> FilePath -> Key -> Maybe InodeCache -> Annex ()
addLink ci file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig)
( do
_ <- makeLink file key mcache
ps <- gitAddParams ci
Annex.Queue.addCommand "add" (ps++[Param "--"]) [file]
, do
l <- makeLink file key mcache
addAnnexLink l (toRawFilePath file)
)
{- Parameters to pass to git add, forcing addition of ignored files.
-
- Note that, when git add is being run on an ignored file that is already
- checked in, CheckGitIgnore True has no effect.
-}
gitAddParams :: CheckGitIgnore -> Annex [CommandParam]
gitAddParams (CheckGitIgnore True) = ifM (Annex.getState Annex.force)
( return [Param "-f"]
, return []
)
gitAddParams (CheckGitIgnore False) = return [Param "-f"]
{- Whether a file should be added unlocked or not. Default is to not,
- unless symlinks are not supported. annex.addunlocked can override that.
- Also, when in an adjusted unlocked branch, always add files unlocked.
-}
addUnlocked :: AddUnlockedMatcher -> MatchInfo -> Annex Bool
addUnlocked matcher mi =
((not . coreSymlinks <$> Annex.getGitConfig) <||>
(checkAddUnlockedMatcher matcher mi) <||>
(maybe False isadjustedunlocked . snd <$> getCurrentBranch)
)
where
isadjustedunlocked (LinkAdjustment UnlockAdjustment) = True
isadjustedunlocked (PresenceAdjustment _ (Just UnlockAdjustment)) = True
isadjustedunlocked _ = False
{- Adds a file to the work tree for the key, and stages it in the index.
- The content of the key may be provided in a temp file, which will be
- moved into place. If no content is provided, adds an annex link but does
- not ingest the content.
-
- When the content of the key is not accepted into the annex, returns False.
-}
addAnnexedFile :: CheckGitIgnore -> AddUnlockedMatcher -> FilePath -> Key -> Maybe FilePath -> Annex Bool
addAnnexedFile ci matcher file key mtmp = ifM (addUnlocked matcher mi)
( do
mode <- maybe
(pure Nothing)
(\tmp -> liftIO $ catchMaybeIO $ fileMode <$> getFileStatus tmp)
mtmp
stagePointerFile file' mode =<< hashPointerFile key
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file')
case mtmp of
Just tmp -> ifM (moveAnnex key tmp)
( linkunlocked mode >> return True
, writepointer mode >> return False
)
Nothing -> ifM (inAnnex key)
( linkunlocked mode >> return True
, writepointer mode >> return True
)
, do
addLink ci file key Nothing
case mtmp of
Just tmp -> moveAnnex key tmp
Nothing -> return True
)
where
mi = case mtmp of
Just tmp -> MatchingFile $ FileInfo
{ contentFile = Just (toRawFilePath tmp)
, matchFile = file'
}
-- Provide as much info as we can without access to the
-- file's content.
Nothing -> MatchingInfo $ ProvidedInfo
{ providedFilePath = file'
, providedKey = Just key
, providedFileSize = fromMaybe 0 $
keySize `fromKey` key
, providedMimeType = Nothing
, providedMimeEncoding = Nothing
}
linkunlocked mode = linkFromAnnex key file mode >>= \case
LinkAnnexFailed -> liftIO $
writePointerFile file' key mode
_ -> return ()
writepointer mode = liftIO $ writePointerFile file' key mode
file' = toRawFilePath file

343
Annex/Init.hs Normal file
View file

@ -0,0 +1,343 @@
{- git-annex repository initialization
-
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Init (
ensureInitialized,
autoInitialize,
isInitialized,
initialize,
initialize',
uninitialize,
probeCrippledFileSystem,
probeCrippledFileSystem',
) where
import Annex.Common
import qualified Annex
import qualified Git
import qualified Git.Config
import qualified Git.Objects
import Git.Types (fromConfigValue)
import qualified Annex.Branch
import Logs.UUID
import Logs.Trust.Basic
import Logs.Config
import Types.TrustLevel
import Types.RepoVersion
import Annex.Version
import Annex.Difference
import Annex.UUID
import Annex.WorkTree
import Annex.Fixup
import Annex.Path
import Config
import Config.Files
import Config.Smudge
import qualified Upgrade.V5.Direct as Direct
import qualified Annex.AdjustedBranch as AdjustedBranch
import Remote.List.Util (remotesChanged)
import Annex.Environment
import Annex.Hook
import Annex.InodeSentinal
import Upgrade
import Annex.Tmp
import Utility.UserInfo
import Utility.ThreadScheduler
#ifndef mingw32_HOST_OS
import Annex.Perms
import Utility.FileMode
import System.Posix.User
import qualified Utility.LockFile.Posix as Posix
import Data.Either
#endif
import qualified Data.Map as M
import Control.Concurrent.Async
checkCanInitialize :: Annex a -> Annex a
checkCanInitialize a = canInitialize' >>= \case
Nothing -> a
Just noannexmsg -> do
warning "Initialization prevented by .noannex file (remove the file to override)"
unless (null noannexmsg) $
warning noannexmsg
giveup "Not initialized."
canInitialize :: Annex Bool
canInitialize = isNothing <$> canInitialize'
canInitialize' :: Annex (Maybe String)
canInitialize' = inRepo (noAnnexFileContent . fmap fromRawFilePath . Git.repoWorkTree)
genDescription :: Maybe String -> Annex UUIDDesc
genDescription (Just d) = return $ UUIDDesc $ encodeBS d
genDescription Nothing = do
reldir <- liftIO . relHome
=<< liftIO . absPath . fromRawFilePath
=<< fromRepo Git.repoPath
hostname <- fromMaybe "" <$> liftIO getHostname
let at = if null hostname then "" else "@"
v <- liftIO myUserName
return $ UUIDDesc $ encodeBS $ concat $ case v of
Right username -> [username, at, hostname, ":", reldir]
Left _ -> [hostname, ":", reldir]
initialize :: Maybe String -> Maybe RepoVersion -> Annex ()
initialize mdescription mversion = checkCanInitialize $ do
{- Has to come before any commits are made as the shared
- clone heuristic expects no local objects. -}
sharedclone <- checkSharedClone
{- This will make the first commit to git, so ensure git is set up
- properly to allow commits when running it. -}
ensureCommit $ Annex.Branch.create
prepUUID
initialize' mversion
initSharedClone sharedclone
u <- getUUID
{- Avoid overwriting existing description with a default
- description. -}
whenM (pure (isJust mdescription) <||> not . M.member u <$> uuidDescMapRaw) $
describeUUID u =<< genDescription mdescription
-- Everything except for uuid setup, shared clone setup, and initial
-- description.
initialize' :: Maybe RepoVersion -> Annex ()
initialize' mversion = checkCanInitialize $ do
checkLockSupport
checkFifoSupport
checkCrippledFileSystem
unlessM isBareRepo $ do
hookWrite preCommitHook
hookWrite postReceiveHook
setDifferences
unlessM (isJust <$> getVersion) $
setVersion (fromMaybe defaultVersion mversion)
configureSmudgeFilter
unlessM isBareRepo $ do
showSideAction "scanning for unlocked files"
scanUnlockedFiles
hookWrite postCheckoutHook
hookWrite postMergeHook
AdjustedBranch.checkAdjustedClone >>= \case
AdjustedBranch.InAdjustedClone -> return ()
AdjustedBranch.NotInAdjustedClone ->
ifM (crippledFileSystem <&&> (not <$> isBareRepo))
( AdjustedBranch.adjustToCrippledFileSystem
-- Handle case where this repo was cloned from a
-- direct mode repo
, unlessM isBareRepo
Direct.switchHEADBack
)
propigateSecureHashesOnly
createInodeSentinalFile False
fixupUnusualReposAfterInit
uninitialize :: Annex ()
uninitialize = do
unHook
deconfigureSmudgeFilter
removeRepoUUID
removeVersion
{- Will automatically initialize if there is already a git-annex
- branch from somewhere. Otherwise, require a manual init
- to avoid git-annex accidentally being run in git
- repos that did not intend to use it.
-
- Checks repository version and handles upgrades too.
-}
ensureInitialized :: Annex ()
ensureInitialized = getVersion >>= maybe needsinit checkUpgrade
where
needsinit = ifM Annex.Branch.hasSibling
( do
initialize Nothing Nothing
autoEnableSpecialRemotes
, giveup "First run: git-annex init"
)
{- Initialize if it can do so automatically.
-
- Checks repository version and handles upgrades too.
-}
autoInitialize :: Annex ()
autoInitialize = getVersion >>= maybe needsinit checkUpgrade
where
needsinit = whenM (canInitialize <&&> Annex.Branch.hasSibling) $ do
initialize Nothing Nothing
autoEnableSpecialRemotes
{- Checks if a repository is initialized. Does not check version for ugrade. -}
isInitialized :: Annex Bool
isInitialized = maybe Annex.Branch.hasSibling (const $ return True) =<< getVersion
{- A crippled filesystem is one that does not allow making symlinks,
- or removing write access from files. -}
probeCrippledFileSystem :: Annex Bool
probeCrippledFileSystem = withEventuallyCleanedOtherTmp $ \tmp -> do
(r, warnings) <- liftIO $ probeCrippledFileSystem' tmp
mapM_ warning warnings
return r
probeCrippledFileSystem' :: FilePath -> IO (Bool, [String])
#ifdef mingw32_HOST_OS
probeCrippledFileSystem' _ = return (True, [])
#else
probeCrippledFileSystem' tmp = do
let f = tmp </> "gaprobe"
writeFile f ""
r <- probe f
void $ tryIO $ allowWrite f
removeFile f
return r
where
probe f = catchDefaultIO (True, []) $ do
let f2 = f ++ "2"
nukeFile f2
createSymbolicLink f f2
nukeFile f2
preventWrite f
-- Should be unable to write to the file, unless
-- running as root, but some crippled
-- filesystems ignore write bit removals.
ifM ((== 0) <$> getRealUserID)
( return (False, [])
, do
r <- catchBoolIO $ do
writeFile f "2"
return True
if r
then return (True, ["Filesystem allows writing to files whose write bit is not set."])
else return (False, [])
)
#endif
checkCrippledFileSystem :: Annex ()
checkCrippledFileSystem = whenM probeCrippledFileSystem $ do
warning "Detected a crippled filesystem."
setCrippledFileSystem True
{- Normally git disables core.symlinks itself when the
- filesystem does not support them. But, even if symlinks are
- supported, we don't use them by default in a crippled
- filesystem. -}
whenM (coreSymlinks <$> Annex.getGitConfig) $ do
warning "Disabling core.symlinks."
setConfig "core.symlinks"
(Git.Config.boolConfig False)
probeLockSupport :: Annex Bool
#ifdef mingw32_HOST_OS
probeLockSupport = return True
#else
probeLockSupport = withEventuallyCleanedOtherTmp $ \tmp -> do
let f = tmp </> "lockprobe"
mode <- annexFileMode
liftIO $ withAsync warnstall (const (go f mode))
where
go f mode = do
nukeFile f
let locktest = bracket
(Posix.lockExclusive (Just mode) f)
Posix.dropLock
(const noop)
ok <- isRight <$> tryNonAsync locktest
nukeFile f
return ok
warnstall = do
threadDelaySeconds (Seconds 10)
warningIO "Probing the filesystem for POSIX fcntl lock support is taking a long time."
warningIO "(Setting annex.pidlock will avoid this probe.)"
#endif
probeFifoSupport :: Annex Bool
probeFifoSupport = do
#ifdef mingw32_HOST_OS
return False
#else
withEventuallyCleanedOtherTmp $ \tmp -> do
let f = tmp </> "gaprobe"
let f2 = tmp </> "gaprobe2"
liftIO $ do
nukeFile f
nukeFile f2
ms <- tryIO $ do
createNamedPipe f ownerReadMode
createLink f f2
getFileStatus f
nukeFile f
nukeFile f2
return $ either (const False) isNamedPipe ms
#endif
checkLockSupport :: Annex ()
checkLockSupport =
unlessM (annexPidLock <$> Annex.getGitConfig) $
unlessM probeLockSupport $ do
warning "Detected a filesystem without POSIX fcntl lock support."
warning "Enabling annex.pidlock."
setConfig (annexConfig "pidlock") (Git.Config.boolConfig True)
checkFifoSupport :: Annex ()
checkFifoSupport = unlessM probeFifoSupport $ do
warning "Detected a filesystem without fifo support."
warning "Disabling ssh connection caching."
setConfig (annexConfig "sshcaching") (Git.Config.boolConfig False)
checkSharedClone :: Annex Bool
checkSharedClone = inRepo Git.Objects.isSharedClone
initSharedClone :: Bool -> Annex ()
initSharedClone False = return ()
initSharedClone True = do
showLongNote "Repository was cloned with --shared; setting annex.hardlink=true and making repository untrusted."
u <- getUUID
trustSet u UnTrusted
setConfig (annexConfig "hardlink") (Git.Config.boolConfig True)
{- Propigate annex.securehashesonly from then global config to local
- config. This makes a clone inherit a parent's setting, but once
- a repository has a local setting, changes to the global config won't
- affect it. -}
propigateSecureHashesOnly :: Annex ()
propigateSecureHashesOnly =
maybe noop (setConfig "annex.securehashesonly" . fromConfigValue)
=<< getGlobalConfig "annex.securehashesonly"
fixupUnusualReposAfterInit :: Annex ()
fixupUnusualReposAfterInit = do
gc <- Annex.getGitConfig
void $ inRepo $ \r -> fixupUnusualRepos r gc
{- Try to enable any special remotes that are configured to do so.
-
- The enabling is done in a child process to avoid it using stdio.
-}
autoEnableSpecialRemotes :: Annex ()
autoEnableSpecialRemotes = do
rp <- fromRawFilePath <$> fromRepo Git.repoPath
withNullHandle $ \nullh -> gitAnnexChildProcess
[ "init"
, "--autoenable"
]
(\p -> p
{ std_out = UseHandle nullh
, std_err = UseHandle nullh
, std_in = UseHandle nullh
, cwd = Just rp
}
)
(\_ _ _ pid -> void $ waitForProcess pid)
remotesChanged

96
Annex/InodeSentinal.hs Normal file
View file

@ -0,0 +1,96 @@
{- git-annex inode sentinal file
-
- Copyright 2012-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.InodeSentinal where
import Annex.Common
import qualified Annex
import Utility.InodeCache
import Annex.Perms
{- If the sendinal shows the inodes have changed, only the size and mtime
- are compared. -}
compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool
compareInodeCaches x y
| compareStrong x y = return True
| otherwise = ifM inodesChanged
( return $ compareWeak x y
, return False
)
compareInodeCachesWith :: Annex InodeComparisonType
compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
{- Checks if one of the provided old InodeCache matches the current
- version of a file. -}
sameInodeCache :: RawFilePath -> [InodeCache] -> Annex Bool
sameInodeCache _ [] = return False
sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file)
where
go Nothing = return False
go (Just curr) = elemInodeCaches curr old
elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool
elemInodeCaches _ [] = return False
elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l)
( return True
, elemInodeCaches c ls
)
{- Some filesystems get new inodes each time they are mounted.
- In order to work on such a filesystem, a sentinal file is used to detect
- when the inodes have changed.
-
- If the sentinal file does not exist, we have to assume that the
- inodes have changed.
-}
inodesChanged :: Annex Bool
inodesChanged = sentinalInodesChanged <$> sentinalStatus
withTSDelta :: (TSDelta -> Annex a) -> Annex a
withTSDelta a = a =<< getTSDelta
getTSDelta :: Annex TSDelta
#ifdef mingw32_HOST_OS
getTSDelta = sentinalTSDelta <$> sentinalStatus
#else
getTSDelta = pure noTSDelta -- optimisation
#endif
sentinalStatus :: Annex SentinalStatus
sentinalStatus = maybe check return =<< Annex.getState Annex.sentinalstatus
where
check = do
sc <- liftIO . checkSentinalFile =<< annexSentinalFile
Annex.changeState $ \s -> s { Annex.sentinalstatus = Just sc }
return sc
{- The sentinal file is only created when first initializing a repository.
- If there are any annexed objects in the repository already, creating
- the file would invalidate their inode caches. -}
createInodeSentinalFile :: Bool -> Annex ()
createInodeSentinalFile evenwithobjects =
unlessM (alreadyexists <||> hasobjects) $ do
s <- annexSentinalFile
createAnnexDirectory (parentDir (fromRawFilePath (sentinalFile s)))
liftIO $ writeSentinalFile s
where
alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile
hasobjects
| evenwithobjects = pure False
| otherwise = liftIO . doesDirectoryExist =<< fromRepo gitAnnexObjectDir
annexSentinalFile :: Annex SentinalFile
annexSentinalFile = do
sentinalfile <- fromRepo gitAnnexInodeSentinal
sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache
return SentinalFile
{ sentinalFile = sentinalfile
, sentinalCacheFile = sentinalcachefile
}

144
Annex/Journal.hs Normal file
View file

@ -0,0 +1,144 @@
{- management of the git-annex journal
-
- The journal is used to queue up changes before they are committed to the
- git-annex branch. Among other things, it ensures that if git-annex is
- interrupted, its recorded data is not lost.
-
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Journal where
import Annex.Common
import qualified Git
import Annex.Perms
import Annex.Tmp
import Annex.LockFile
import Utility.Directory.Stream
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
import qualified System.FilePath.ByteString as P
import Data.ByteString.Builder
import Data.Char
class Journalable t where
writeJournalHandle :: Handle -> t -> IO ()
journalableByteString :: t -> L.ByteString
instance Journalable L.ByteString where
writeJournalHandle = L.hPut
journalableByteString = id
-- This is more efficient than the ByteString instance.
instance Journalable Builder where
writeJournalHandle = hPutBuilder
journalableByteString = toLazyByteString
{- Records content for a file in the branch to the journal.
-
- Using the journal, rather than immediatly staging content to the index
- avoids git needing to rewrite the index after every change.
-
- The file in the journal is updated atomically, which allows
- getJournalFileStale to always return a consistent journal file
- content, although possibly not the most current one.
-}
setJournalFile :: Journalable content => JournalLocked -> RawFilePath -> content -> Annex ()
setJournalFile _jl file content = withOtherTmp $ \tmp -> do
createAnnexDirectory =<< fromRepo gitAnnexJournalDir
-- journal file is written atomically
jfile <- fromRawFilePath <$> fromRepo (journalFile file)
let tmpfile = tmp </> takeFileName jfile
liftIO $ do
withFile tmpfile WriteMode $ \h -> writeJournalHandle h content
moveFile tmpfile jfile
{- Gets any journalled content for a file in the branch. -}
getJournalFile :: JournalLocked -> RawFilePath -> Annex (Maybe L.ByteString)
getJournalFile _jl = getJournalFileStale
{- Without locking, this is not guaranteed to be the most recent
- version of the file in the journal, so should not be used as a basis for
- changes.
-
- The file is read strictly so that its content can safely be fed into
- an operation that modifies the file. While setJournalFile doesn't
- write directly to journal files and so probably avoids problems with
- writing to the same file that's being read, but there could be
- concurrency or other issues with a lazy read, and the minor loss of
- laziness doesn't matter much, as the files are not very large.
-}
getJournalFileStale :: RawFilePath -> Annex (Maybe L.ByteString)
getJournalFileStale file = inRepo $ \g -> catchMaybeIO $
L.fromStrict <$> S.readFile (fromRawFilePath $ journalFile file g)
{- List of existing journal files, but without locking, may miss new ones
- just being added, or may have false positives if the journal is staged
- as it is run. -}
getJournalledFilesStale :: Annex [FilePath]
getJournalledFilesStale = do
g <- gitRepo
fs <- liftIO $ catchDefaultIO [] $
getDirectoryContents $ gitAnnexJournalDir g
return $ filter (`notElem` [".", ".."]) $
map (fromRawFilePath . fileJournal . toRawFilePath) fs
withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a
withJournalHandle a = do
d <- fromRepo gitAnnexJournalDir
bracketIO (openDirectory d) closeDirectory (liftIO . a)
{- Checks if there are changes in the journal. -}
journalDirty :: Annex Bool
journalDirty = do
d <- fromRepo gitAnnexJournalDir
liftIO $
(not <$> isDirectoryEmpty d)
`catchIO` (const $ doesDirectoryExist d)
{- Produces a filename to use in the journal for a file on the branch.
-
- The journal typically won't have a lot of files in it, so the hashing
- used in the branch is not necessary, and all the files are put directly
- in the journal directory.
-}
journalFile :: RawFilePath -> Git.Repo -> RawFilePath
journalFile file repo = gitAnnexJournalDir' repo P.</> S.concatMap mangle file
where
mangle c
| P.isPathSeparator c = S.singleton underscore
| c == underscore = S.pack [underscore, underscore]
| otherwise = S.singleton c
underscore = fromIntegral (ord '_')
{- Converts a journal file (relative to the journal dir) back to the
- filename on the branch. -}
fileJournal :: RawFilePath -> RawFilePath
fileJournal = go
where
go b =
let (h, t) = S.break (== underscore) b
in h <> case S.uncons t of
Nothing -> t
Just (_u, t') -> case S.uncons t' of
Nothing -> t'
Just (w, t'')
| w == underscore ->
S.cons underscore (go t'')
| otherwise ->
S.cons P.pathSeparator (go t')
underscore = fromIntegral (ord '_')
{- Sentinal value, only produced by lockJournal; required
- as a parameter by things that need to ensure the journal is
- locked. -}
data JournalLocked = ProduceJournalLocked
{- Runs an action that modifies the journal, using locking to avoid
- contention with other git-annex processes. -}
lockJournal :: (JournalLocked -> Annex a) -> Annex a
lockJournal a = withExclusiveLock gitAnnexJournalLock $ a ProduceJournalLocked

316
Annex/Link.hs Normal file
View file

@ -0,0 +1,316 @@
{- git-annex links to content
-
- On file systems that support them, symlinks are used.
-
- On other filesystems, git instead stores the symlink target in a regular
- file.
-
- Pointer files are used instead of symlinks for unlocked files.
-
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP, BangPatterns #-}
module Annex.Link where
import Annex.Common
import qualified Annex
import qualified Annex.Queue
import qualified Git.Queue
import qualified Git.UpdateIndex
import qualified Git.Index
import qualified Git.LockFile
import qualified Git.Env
import qualified Git
import Git.Types
import Git.FilePath
import Git.Config
import Annex.HashObject
import Annex.InodeSentinal
import Annex.PidLock
import Utility.FileMode
import Utility.InodeCache
import Utility.Tmp.Dir
import Utility.CopyFile
import qualified Utility.RawFilePath as R
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import qualified System.FilePath.ByteString as P
type LinkTarget = String
{- Checks if a file is a link to a key. -}
isAnnexLink :: RawFilePath -> Annex (Maybe Key)
isAnnexLink file = maybe Nothing parseLinkTargetOrPointer <$> getAnnexLinkTarget file
{- Gets the link target of a symlink.
-
- On a filesystem that does not support symlinks, fall back to getting the
- link target by looking inside the file.
-
- Returns Nothing if the file is not a symlink, or not a link to annex
- content.
-}
getAnnexLinkTarget :: RawFilePath -> Annex (Maybe S.ByteString)
getAnnexLinkTarget f = getAnnexLinkTarget' f
=<< (coreSymlinks <$> Annex.getGitConfig)
{- Pass False to force looking inside file, for when git checks out
- symlinks as plain files. -}
getAnnexLinkTarget' :: RawFilePath -> Bool -> Annex (Maybe S.ByteString)
getAnnexLinkTarget' file coresymlinks = if coresymlinks
then check probesymlink $
return Nothing
else check probesymlink $
check probefilecontent $
return Nothing
where
check getlinktarget fallback =
liftIO (catchMaybeIO getlinktarget) >>= \case
Just l
| isLinkToAnnex l -> return (Just l)
| otherwise -> return Nothing
Nothing -> fallback
probesymlink = R.readSymbolicLink file
probefilecontent = withFile (fromRawFilePath file) ReadMode $ \h -> do
s <- S.hGet h unpaddedMaxPointerSz
-- If we got the full amount, the file is too large
-- to be a symlink target.
return $ if S.length s == unpaddedMaxPointerSz
then mempty
else
-- If there are any NUL or newline
-- characters, or whitespace, we
-- certianly don't have a symlink to a
-- git-annex key.
if any (`S8.elem` s) "\0\n\r \t"
then mempty
else s
makeAnnexLink :: LinkTarget -> RawFilePath -> Annex ()
makeAnnexLink = makeGitLink
{- Creates a link on disk.
-
- On a filesystem that does not support symlinks, writes the link target
- to a file. Note that git will only treat the file as a symlink if
- it's staged as such, so use addAnnexLink when adding a new file or
- modified link to git.
-}
makeGitLink :: LinkTarget -> RawFilePath -> Annex ()
makeGitLink linktarget file = ifM (coreSymlinks <$> Annex.getGitConfig)
( liftIO $ do
void $ tryIO $ removeFile (fromRawFilePath file)
createSymbolicLink linktarget (fromRawFilePath file)
, liftIO $ writeFile (fromRawFilePath file) linktarget
)
{- Creates a link on disk, and additionally stages it in git. -}
addAnnexLink :: LinkTarget -> RawFilePath -> Annex ()
addAnnexLink linktarget file = do
makeAnnexLink linktarget file
stageSymlink file =<< hashSymlink linktarget
{- Injects a symlink target into git, returning its Sha. -}
hashSymlink :: LinkTarget -> Annex Sha
hashSymlink = hashBlob . toInternalGitPath . toRawFilePath
{- Stages a symlink to an annexed object, using a Sha of its target. -}
stageSymlink :: RawFilePath -> Sha -> Annex ()
stageSymlink file sha =
Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.stageSymlink (fromRawFilePath file) sha)
{- Injects a pointer file content into git, returning its Sha. -}
hashPointerFile :: Key -> Annex Sha
hashPointerFile key = hashBlob $ formatPointer key
{- Stages a pointer file, using a Sha of its content -}
stagePointerFile :: RawFilePath -> Maybe FileMode -> Sha -> Annex ()
stagePointerFile file mode sha =
Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.stageFile sha treeitemtype $ fromRawFilePath file)
where
treeitemtype
| maybe False isExecutable mode = TreeExecutable
| otherwise = TreeFile
writePointerFile :: RawFilePath -> Key -> Maybe FileMode -> IO ()
writePointerFile file k mode = do
S.writeFile (fromRawFilePath file) (formatPointer k)
maybe noop (setFileMode $ fromRawFilePath file) mode
newtype Restage = Restage Bool
{- Restage pointer file. This is used after updating a worktree file
- when content is added/removed, to prevent git status from showing
- it as modified.
-
- Asks git to refresh its index information for the file.
- That in turn runs the clean filter on the file; when the clean
- filter produces the same pointer that was in the index before, git
- realizes that the file has not actually been modified.
-
- Note that, if the pointer file is staged for deletion, or has different
- content than the current worktree content staged, this won't change
- that. So it's safe to call at any time and any situation.
-
- If the index is known to be locked (eg, git add has run git-annex),
- that would fail. Restage False will prevent the index being updated.
- Will display a message to help the user understand why
- the file will appear to be modified.
-
- This uses the git queue, so the update is not performed immediately,
- and this can be run multiple times cheaply.
-
- The InodeCache is for the worktree file. It is used to detect when
- the worktree file is changed by something else before git update-index
- gets to look at it.
-}
restagePointerFile :: Restage -> RawFilePath -> InodeCache -> Annex ()
restagePointerFile (Restage False) f _ =
toplevelWarning True $ unableToRestage $ Just $ fromRawFilePath f
restagePointerFile (Restage True) f orig = withTSDelta $ \tsd ->
-- Avoid refreshing the index if run by the
-- smudge clean filter, because git uses that when
-- it's already refreshing the index, probably because
-- this very action is running. Running it again would likely
-- deadlock.
unlessM (Annex.getState Annex.insmudgecleanfilter) $ do
-- update-index is documented as picky about "./file" and it
-- fails on "../../repo/path/file" when cwd is not in the repo
-- being acted on. Avoid these problems with an absolute path.
absf <- liftIO $ absPath $ fromRawFilePath f
Annex.Queue.addInternalAction runner [(absf, isunmodified tsd)]
where
isunmodified tsd = genInodeCache f tsd >>= return . \case
Nothing -> False
Just new -> compareStrong orig new
-- Other changes to the files may have been staged before this
-- gets a chance to run. To avoid a race with any staging of
-- changes, first lock the index file. Then run git update-index
-- on all still-unmodified files, using a copy of the index file,
-- to bypass the lock. Then replace the old index file with the new
-- updated index file.
runner :: Git.Queue.InternalActionRunner Annex
runner = Git.Queue.InternalActionRunner "restagePointerFile" $ \r l -> do
realindex <- liftIO $ Git.Index.currentIndexFile r
let lock = Git.Index.indexFileLock realindex
lockindex = liftIO $ catchMaybeIO $ Git.LockFile.openLock' lock
unlockindex = liftIO . maybe noop Git.LockFile.closeLock
showwarning = warning $ unableToRestage Nothing
go Nothing = showwarning
go (Just _) = withTmpDirIn (fromRawFilePath $ Git.localGitDir r) "annexindex" $ \tmpdir -> do
let tmpindex = tmpdir </> "index"
let updatetmpindex = do
r' <- liftIO $ Git.Env.addGitEnv r Git.Index.indexEnv
=<< Git.Index.indexEnvVal tmpindex
-- Avoid git warning about CRLF munging.
let r'' = r' { gitGlobalOpts = gitGlobalOpts r' ++
[ Param "-c"
, Param $ "core.safecrlf=" ++ boolConfig False
] }
runsGitAnnexChildProcessViaGit' r'' $ \r''' ->
liftIO $ Git.UpdateIndex.refreshIndex r''' $ \feed ->
forM_ l $ \(f', checkunmodified) ->
whenM checkunmodified $
feed f'
let replaceindex = catchBoolIO $ do
moveFile tmpindex realindex
return True
ok <- liftIO (createLinkOrCopy realindex tmpindex)
<&&> updatetmpindex
<&&> liftIO replaceindex
unless ok showwarning
bracket lockindex unlockindex go
unableToRestage :: Maybe FilePath -> String
unableToRestage mf = unwords
[ "git status will show " ++ fromMaybe "some files" mf
, "to be modified, since content availability has changed"
, "and git-annex was unable to update the index."
, "This is only a cosmetic problem affecting git status; git add,"
, "git commit, etc won't be affected."
, "To fix the git status display, you can run:"
, "git update-index -q --refresh " ++ fromMaybe "<file>" mf
]
{- Parses a symlink target or a pointer file to a Key. -}
parseLinkTargetOrPointer :: S.ByteString -> Maybe Key
parseLinkTargetOrPointer = parseLinkTarget . S8.takeWhile (not . lineend)
where
lineend '\n' = True
lineend '\r' = True
lineend _ = False
{- Avoid looking at more of the lazy ByteString than necessary since it
- could be reading from a large file that is not a pointer file. -}
parseLinkTargetOrPointerLazy :: L.ByteString -> Maybe Key
parseLinkTargetOrPointerLazy b =
let b' = L.take (fromIntegral maxPointerSz) b
in parseLinkTargetOrPointer (L.toStrict b')
{- Parses a symlink target to a Key. -}
parseLinkTarget :: S.ByteString -> Maybe Key
parseLinkTarget l
| isLinkToAnnex l = fileKey $ snd $ S8.breakEnd pathsep l
| otherwise = Nothing
where
pathsep '/' = True
#ifdef mingw32_HOST_OS
pathsep '\\' = True
#endif
pathsep _ = False
formatPointer :: Key -> S.ByteString
formatPointer k = prefix <> keyFile k <> nl
where
prefix = toInternalGitPath $ P.pathSeparator `S.cons` objectDir'
nl = S8.singleton '\n'
{- Maximum size of a file that could be a pointer to a key.
- Check to avoid buffering really big files in git into
- memory when reading files that may be pointers.
-
- 8192 bytes is plenty for a pointer to a key. This adds some additional
- padding to allow for any pointer files that might have
- lines after the key explaining what the file is used for. -}
maxPointerSz :: Integer
maxPointerSz = 81920
unpaddedMaxPointerSz :: Int
unpaddedMaxPointerSz = 8192
{- Checks if a worktree file is a pointer to a key.
-
- Unlocked files whose content is present are not detected by this. -}
isPointerFile :: RawFilePath -> IO (Maybe Key)
isPointerFile f = catchDefaultIO Nothing $ withFile (fromRawFilePath f) ReadMode $ \h ->
parseLinkTargetOrPointer <$> S.hGet h unpaddedMaxPointerSz
{- Checks a symlink target or pointer file first line to see if it
- appears to point to annexed content.
-
- We only look for paths inside the .git directory, and not at the .git
- directory itself, because GIT_DIR may cause a directory name other
- than .git to be used.
-}
isLinkToAnnex :: S.ByteString -> Bool
isLinkToAnnex s = p `S.isInfixOf` s
#ifdef mingw32_HOST_OS
-- '/' is used inside pointer files on Windows, not the native '\'
|| p' `S.isInfixOf` s
#endif
where
p = P.pathSeparator `S.cons` objectDir'
#ifdef mingw32_HOST_OS
p' = toInternalGitPath p
#endif

649
Annex/Locations.hs Normal file
View file

@ -0,0 +1,649 @@
{- git-annex file locations
-
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Locations (
keyFile,
fileKey,
keyPaths,
keyPath,
annexDir,
objectDir,
objectDir',
gitAnnexLocation,
gitAnnexLocationDepth,
gitAnnexLink,
gitAnnexLinkCanonical,
gitAnnexContentLock,
gitAnnexMapping,
gitAnnexInodeCache,
gitAnnexInodeSentinal,
gitAnnexInodeSentinalCache,
annexLocations,
gitAnnexDir,
gitAnnexObjectDir,
gitAnnexTmpOtherDir,
gitAnnexTmpOtherLock,
gitAnnexTmpOtherDirOld,
gitAnnexTmpWatcherDir,
gitAnnexTmpObjectDir,
gitAnnexTmpObjectLocation,
gitAnnexTmpWorkDir,
gitAnnexBadDir,
gitAnnexBadLocation,
gitAnnexUnusedLog,
gitAnnexKeysDb,
gitAnnexKeysDbLock,
gitAnnexKeysDbIndexCache,
gitAnnexFsckState,
gitAnnexFsckDbDir,
gitAnnexFsckDbDirOld,
gitAnnexFsckDbLock,
gitAnnexFsckResultsLog,
gitAnnexSmudgeLog,
gitAnnexSmudgeLock,
gitAnnexExportDir,
gitAnnexExportDbDir,
gitAnnexExportLock,
gitAnnexExportUpdateLock,
gitAnnexExportExcludeLog,
gitAnnexContentIdentifierDbDir,
gitAnnexContentIdentifierLock,
gitAnnexScheduleState,
gitAnnexTransferDir,
gitAnnexCredsDir,
gitAnnexWebCertificate,
gitAnnexWebPrivKey,
gitAnnexFeedStateDir,
gitAnnexFeedState,
gitAnnexMergeDir,
gitAnnexJournalDir,
gitAnnexJournalDir',
gitAnnexJournalLock,
gitAnnexGitQueueLock,
gitAnnexMergeLock,
gitAnnexIndex,
gitAnnexIndexStatus,
gitAnnexViewIndex,
gitAnnexViewLog,
gitAnnexMergedRefs,
gitAnnexIgnoredRefs,
gitAnnexPidFile,
gitAnnexPidLockFile,
gitAnnexDaemonStatusFile,
gitAnnexLogFile,
gitAnnexFuzzTestLogFile,
gitAnnexHtmlShim,
gitAnnexUrlFile,
gitAnnexTmpCfgFile,
gitAnnexSshDir,
gitAnnexRemotesDir,
gitAnnexAssistantDefaultDir,
HashLevels(..),
hashDirMixed,
hashDirLower,
preSanitizeKeyName,
reSanitizeKeyName,
) where
import Data.Char
import Data.Default
import qualified Data.ByteString.Char8 as S8
import qualified System.FilePath.ByteString as P
import Common
import Key
import Types.UUID
import Types.GitConfig
import Types.Difference
import qualified Git
import qualified Git.Types as Git
import Git.FilePath
import Annex.DirHashes
import Annex.Fixup
import qualified Utility.RawFilePath as R
{- Conventions:
-
- Functions ending in "Dir" should always return values ending with a
- trailing path separator. Most code does not rely on that, but a few
- things do.
-
- Everything else should not end in a trailing path sepatator.
-
- Only functions (with names starting with "git") that build a path
- based on a git repository should return full path relative to the git
- repository. Everything else returns path segments.
-}
{- The directory git annex uses for local state, relative to the .git
- directory -}
annexDir :: RawFilePath
annexDir = P.addTrailingPathSeparator "annex"
{- The directory git annex uses for locally available object content,
- relative to the .git directory -}
objectDir :: FilePath
objectDir = fromRawFilePath objectDir'
objectDir' :: RawFilePath
objectDir' = P.addTrailingPathSeparator $ annexDir P.</> "objects"
{- Annexed file's possible locations relative to the .git directory.
- There are two different possibilities, using different hashes.
-
- Also, some repositories have a Difference in hash directory depth.
-}
annexLocations :: GitConfig -> Key -> [RawFilePath]
annexLocations config key = map (annexLocation config key) dirHashes
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath
annexLocation config key hasher = objectDir' P.</> keyPath key (hasher $ objectHashLevels config)
{- Number of subdirectories from the gitAnnexObjectDir
- to the gitAnnexLocation. -}
gitAnnexLocationDepth :: GitConfig -> Int
gitAnnexLocationDepth config = hashlevels + 1
where
HashLevels hashlevels = objectHashLevels config
{- Annexed object's location in a repository.
-
- When there are multiple possible locations, returns the one where the
- file is actually present.
-
- When the file is not present, returns the location where the file should
- be stored.
-
- This does not take direct mode into account, so in direct mode it is not
- the actual location of the file's content.
-}
gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLocation key r config = gitAnnexLocation' key r config
(annexCrippledFileSystem config)
(coreSymlinks config)
R.doesPathExist
(Git.localGitDir r)
gitAnnexLocation' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (RawFilePath -> IO Bool) -> RawFilePath -> IO RawFilePath
gitAnnexLocation' key r config crippled symlinkssupported checker gitdir
{- Bare repositories default to hashDirLower for new
- content, as it's more portable. But check all locations. -}
| Git.repoIsLocalBare r = checkall
| hasDifference ObjectHashLower (annexDifferences config) =
only hashDirLower
{- Repositories on crippled filesystems use hashDirLower
- for new content, unless symlinks are supported too.
- Then hashDirMixed is used. But, the content could be
- in either location so check both. -}
| crippled = if symlinkssupported
then check $ map inrepo $ reverse $ annexLocations config key
else checkall
{- Regular repositories only use hashDirMixed, so
- don't need to do any work to check if the file is
- present. -}
| otherwise = only hashDirMixed
where
only = return . inrepo . annexLocation config key
checkall = check $ map inrepo $ annexLocations config key
inrepo d = gitdir P.</> d
check locs@(l:_) = fromMaybe l <$> firstM checker locs
check [] = error "internal"
{- Calculates a symlink target to link a file to an annexed object. -}
gitAnnexLink :: FilePath -> Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexLink file key r config = do
currdir <- getCurrentDirectory
let absfile = absNormPathUnix currdir file
let gitdir = getgitdir currdir
loc <- gitAnnexLocation' key r config False False (\_ -> return True) gitdir
fromRawFilePath . toInternalGitPath . toRawFilePath
<$> relPathDirToFile (parentDir absfile) (fromRawFilePath loc)
where
getgitdir currdir
{- This special case is for git submodules on filesystems not
- supporting symlinks; generate link target that will
- work portably. -}
| not (coreSymlinks config) && needsSubmoduleFixup r =
toRawFilePath $
absNormPathUnix currdir $ fromRawFilePath $
Git.repoPath r P.</> ".git"
| otherwise = Git.localGitDir r
absNormPathUnix d p = fromRawFilePath $ toInternalGitPath $ toRawFilePath $
absPathFrom
(fromRawFilePath $ toInternalGitPath $ toRawFilePath d)
(fromRawFilePath $ toInternalGitPath $ toRawFilePath p)
{- Calculates a symlink target as would be used in a typical git
- repository, with .git in the top of the work tree. -}
gitAnnexLinkCanonical :: FilePath -> Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexLinkCanonical file key r config = gitAnnexLink file key r' config'
where
r' = case r of
Git.Repo { Git.location = l@Git.Local { Git.worktree = Just wt } } ->
r { Git.location = l { Git.gitdir = wt P.</> ".git" } }
_ -> r
config' = config
{ annexCrippledFileSystem = False
, coreSymlinks = True
}
{- File used to lock a key's content. -}
gitAnnexContentLock :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexContentLock key r config = do
loc <- gitAnnexLocation key r config
return $ fromRawFilePath loc ++ ".lck"
{- File that maps from a key to the file(s) in the git repository.
- Used in direct mode. -}
gitAnnexMapping :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexMapping key r config = do
loc <- gitAnnexLocation key r config
return $ fromRawFilePath loc ++ ".map"
{- File that caches information about a key's content, used to determine
- if a file has changed.
- Used in direct mode. -}
gitAnnexInodeCache :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexInodeCache key r config = do
loc <- gitAnnexLocation key r config
return $ fromRawFilePath loc ++ ".cache"
gitAnnexInodeSentinal :: Git.Repo -> RawFilePath
gitAnnexInodeSentinal r = gitAnnexDir r P.</> "sentinal"
gitAnnexInodeSentinalCache :: Git.Repo -> RawFilePath
gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r <> ".cache"
{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> RawFilePath
gitAnnexDir r = P.addTrailingPathSeparator $ Git.localGitDir r P.</> annexDir
{- The part of the annex directory where file contents are stored. -}
gitAnnexObjectDir :: Git.Repo -> FilePath
gitAnnexObjectDir r = fromRawFilePath $
P.addTrailingPathSeparator $ Git.localGitDir r P.</> objectDir'
{- .git/annex/tmp/ is used for temp files for key's contents -}
gitAnnexTmpObjectDir :: Git.Repo -> FilePath
gitAnnexTmpObjectDir = fromRawFilePath . gitAnnexTmpObjectDir'
gitAnnexTmpObjectDir' :: Git.Repo -> RawFilePath
gitAnnexTmpObjectDir' r = P.addTrailingPathSeparator $ gitAnnexDir r P.</> "tmp"
{- .git/annex/othertmp/ is used for other temp files -}
gitAnnexTmpOtherDir :: Git.Repo -> FilePath
gitAnnexTmpOtherDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "othertmp"
{- Lock file for gitAnnexTmpOtherDir. -}
gitAnnexTmpOtherLock :: Git.Repo -> FilePath
gitAnnexTmpOtherLock r = fromRawFilePath $ gitAnnexDir r P.</> "othertmp.lck"
{- .git/annex/misctmp/ was used by old versions of git-annex and is still
- used during initialization -}
gitAnnexTmpOtherDirOld :: Git.Repo -> FilePath
gitAnnexTmpOtherDirOld r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "misctmp"
{- .git/annex/watchtmp/ is used by the watcher and assistant -}
gitAnnexTmpWatcherDir :: Git.Repo -> FilePath
gitAnnexTmpWatcherDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "watchtmp"
{- The temp file to use for a given key's content. -}
gitAnnexTmpObjectLocation :: Key -> Git.Repo -> FilePath
gitAnnexTmpObjectLocation key r = fromRawFilePath $
gitAnnexTmpObjectDir' r P.</> keyFile key
{- Given a temp file such as gitAnnexTmpObjectLocation, makes a name for a
- subdirectory in the same location, that can be used as a work area
- when receiving the key's content.
-
- There are ordering requirements for creating these directories;
- use Annex.Content.withTmpWorkDir to set them up.
-}
gitAnnexTmpWorkDir :: FilePath -> FilePath
gitAnnexTmpWorkDir p =
let (dir, f) = splitFileName p
-- Using a prefix avoids name conflict with any other keys.
in dir </> "work." ++ f
{- .git/annex/bad/ is used for bad files found during fsck -}
gitAnnexBadDir :: Git.Repo -> FilePath
gitAnnexBadDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "bad"
{- The bad file to use for a given key. -}
gitAnnexBadLocation :: Key -> Git.Repo -> FilePath
gitAnnexBadLocation key r = gitAnnexBadDir r </> fromRawFilePath (keyFile key)
{- .git/annex/foounused is used to number possibly unused keys -}
gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
gitAnnexUnusedLog prefix r =
fromRawFilePath (gitAnnexDir r) </> (prefix ++ "unused")
{- .git/annex/keysdb/ contains a database of information about keys. -}
gitAnnexKeysDb :: Git.Repo -> FilePath
gitAnnexKeysDb r = fromRawFilePath $ gitAnnexDir r P.</> "keysdb"
{- Lock file for the keys database. -}
gitAnnexKeysDbLock :: Git.Repo -> FilePath
gitAnnexKeysDbLock r = gitAnnexKeysDb r ++ ".lck"
{- Contains the stat of the last index file that was
- reconciled with the keys database. -}
gitAnnexKeysDbIndexCache :: Git.Repo -> FilePath
gitAnnexKeysDbIndexCache r = gitAnnexKeysDb r ++ ".cache"
{- .git/annex/fsck/uuid/ is used to store information about incremental
- fscks. -}
gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDir u r = fromRawFilePath $
gitAnnexDir r P.</> "fsck" P.</> fromUUID u
{- used to store information about incremental fscks. -}
gitAnnexFsckState :: UUID -> Git.Repo -> FilePath
gitAnnexFsckState u r = gitAnnexFsckDir u r </> "state"
{- Directory containing database used to record fsck info. -}
gitAnnexFsckDbDir :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDbDir u r = gitAnnexFsckDir u r </> "fsckdb"
{- Directory containing old database used to record fsck info. -}
gitAnnexFsckDbDirOld :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDbDirOld u r = gitAnnexFsckDir u r </> "db"
{- Lock file for the fsck database. -}
gitAnnexFsckDbLock :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDbLock u r = gitAnnexFsckDir u r </> "fsck.lck"
{- .git/annex/fsckresults/uuid is used to store results of git fscks -}
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath
gitAnnexFsckResultsLog u r = fromRawFilePath $
gitAnnexDir r P.</> "fsckresults" P.</> fromUUID u
{- .git/annex/smudge.log is used to log smudges worktree files that need to
- be updated. -}
gitAnnexSmudgeLog :: Git.Repo -> FilePath
gitAnnexSmudgeLog r = fromRawFilePath $ gitAnnexDir r P.</> "smudge.log"
gitAnnexSmudgeLock :: Git.Repo -> FilePath
gitAnnexSmudgeLock r = fromRawFilePath $ gitAnnexDir r P.</> "smudge.lck"
{- .git/annex/export/ is used to store information about
- exports to special remotes. -}
gitAnnexExportDir :: Git.Repo -> FilePath
gitAnnexExportDir r = fromRawFilePath $ gitAnnexDir r P.</> "export"
{- Directory containing database used to record export info. -}
gitAnnexExportDbDir :: UUID -> Git.Repo -> FilePath
gitAnnexExportDbDir u r = gitAnnexExportDir r </> fromUUID u </> "exportdb"
{- Lock file for export state for a special remote. -}
gitAnnexExportLock :: UUID -> Git.Repo -> FilePath
gitAnnexExportLock u r = gitAnnexExportDbDir u r ++ ".lck"
{- Lock file for updating the export state for a special remote. -}
gitAnnexExportUpdateLock :: UUID -> Git.Repo -> FilePath
gitAnnexExportUpdateLock u r = gitAnnexExportDbDir u r ++ ".upl"
{- Log file used to keep track of files that were in the tree exported to a
- remote, but were excluded by its preferred content settings. -}
gitAnnexExportExcludeLog :: UUID -> Git.Repo -> FilePath
gitAnnexExportExcludeLog u r = fromRawFilePath $
gitAnnexDir r P.</> "export.ex" P.</> fromUUID u
{- Directory containing database used to record remote content ids.
-
- (This used to be "cid", but a problem with the database caused it to
- need to be rebuilt with a new name.)
-}
gitAnnexContentIdentifierDbDir :: Git.Repo -> FilePath
gitAnnexContentIdentifierDbDir r = fromRawFilePath $ gitAnnexDir r P.</> "cidsdb"
{- Lock file for writing to the content id database. -}
gitAnnexContentIdentifierLock :: Git.Repo -> FilePath
gitAnnexContentIdentifierLock r = gitAnnexContentIdentifierDbDir r ++ ".lck"
{- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> FilePath
gitAnnexScheduleState r = fromRawFilePath $ gitAnnexDir r P.</> "schedulestate"
{- .git/annex/creds/ is used to store credentials to access some special
- remotes. -}
gitAnnexCredsDir :: Git.Repo -> FilePath
gitAnnexCredsDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "creds"
{- .git/annex/certificate.pem and .git/annex/key.pem are used by the webapp
- when HTTPS is enabled -}
gitAnnexWebCertificate :: Git.Repo -> FilePath
gitAnnexWebCertificate r = fromRawFilePath $ gitAnnexDir r P.</> "certificate.pem"
gitAnnexWebPrivKey :: Git.Repo -> FilePath
gitAnnexWebPrivKey r = fromRawFilePath $ gitAnnexDir r P.</> "privkey.pem"
{- .git/annex/feeds/ is used to record per-key (url) state by importfeeds -}
gitAnnexFeedStateDir :: Git.Repo -> FilePath
gitAnnexFeedStateDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "feedstate"
gitAnnexFeedState :: Key -> Git.Repo -> FilePath
gitAnnexFeedState k r = gitAnnexFeedStateDir r </> fromRawFilePath (keyFile k)
{- .git/annex/merge/ is used as a empty work tree for direct mode merges and
- merges in adjusted branches. -}
gitAnnexMergeDir :: Git.Repo -> FilePath
gitAnnexMergeDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "merge"
{- .git/annex/transfer/ is used to record keys currently
- being transferred, and other transfer bookkeeping info. -}
gitAnnexTransferDir :: Git.Repo -> FilePath
gitAnnexTransferDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "transfer"
{- .git/annex/journal/ is used to journal changes made to the git-annex
- branch -}
gitAnnexJournalDir :: Git.Repo -> FilePath
gitAnnexJournalDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal"
gitAnnexJournalDir' :: Git.Repo -> RawFilePath
gitAnnexJournalDir' r = P.addTrailingPathSeparator $ gitAnnexDir r P.</> "journal"
{- Lock file for the journal. -}
gitAnnexJournalLock :: Git.Repo -> FilePath
gitAnnexJournalLock r = fromRawFilePath $ gitAnnexDir r P.</> "journal.lck"
{- Lock file for flushing a git queue that writes to the git index or
- other git state that should only have one writer at a time. -}
gitAnnexGitQueueLock :: Git.Repo -> FilePath
gitAnnexGitQueueLock r = fromRawFilePath $ gitAnnexDir r P.</> "gitqueue.lck"
{- Lock file for direct mode merge. -}
gitAnnexMergeLock :: Git.Repo -> FilePath
gitAnnexMergeLock r = fromRawFilePath $ gitAnnexDir r P.</> "merge.lck"
{- .git/annex/index is used to stage changes to the git-annex branch -}
gitAnnexIndex :: Git.Repo -> FilePath
gitAnnexIndex r = fromRawFilePath $ gitAnnexDir r P.</> "index"
{- Holds the ref of the git-annex branch that the index was last updated to.
-
- The .lck in the name is a historical accident; this is not used as a
- lock. -}
gitAnnexIndexStatus :: Git.Repo -> FilePath
gitAnnexIndexStatus r = fromRawFilePath $ gitAnnexDir r P.</> "index.lck"
{- The index file used to generate a filtered branch view._-}
gitAnnexViewIndex :: Git.Repo -> FilePath
gitAnnexViewIndex r = fromRawFilePath $ gitAnnexDir r P.</> "viewindex"
{- File containing a log of recently accessed views. -}
gitAnnexViewLog :: Git.Repo -> FilePath
gitAnnexViewLog r = fromRawFilePath $ gitAnnexDir r P.</> "viewlog"
{- List of refs that have already been merged into the git-annex branch. -}
gitAnnexMergedRefs :: Git.Repo -> FilePath
gitAnnexMergedRefs r = fromRawFilePath $ gitAnnexDir r P.</> "mergedrefs"
{- List of refs that should not be merged into the git-annex branch. -}
gitAnnexIgnoredRefs :: Git.Repo -> FilePath
gitAnnexIgnoredRefs r = fromRawFilePath $ gitAnnexDir r P.</> "ignoredrefs"
{- Pid file for daemon mode. -}
gitAnnexPidFile :: Git.Repo -> FilePath
gitAnnexPidFile r = fromRawFilePath $ gitAnnexDir r P.</> "daemon.pid"
{- Pid lock file for pidlock mode -}
gitAnnexPidLockFile :: Git.Repo -> FilePath
gitAnnexPidLockFile r = fromRawFilePath $ gitAnnexDir r P.</> "pidlock"
{- Status file for daemon mode. -}
gitAnnexDaemonStatusFile :: Git.Repo -> FilePath
gitAnnexDaemonStatusFile r = fromRawFilePath $
gitAnnexDir r P.</> "daemon.status"
{- Log file for daemon mode. -}
gitAnnexLogFile :: Git.Repo -> FilePath
gitAnnexLogFile r = fromRawFilePath $ gitAnnexDir r P.</> "daemon.log"
{- Log file for fuzz test. -}
gitAnnexFuzzTestLogFile :: Git.Repo -> FilePath
gitAnnexFuzzTestLogFile r = fromRawFilePath $
gitAnnexDir r P.</> "fuzztest.log"
{- Html shim file used to launch the webapp. -}
gitAnnexHtmlShim :: Git.Repo -> FilePath
gitAnnexHtmlShim r = fromRawFilePath $ gitAnnexDir r P.</> "webapp.html"
{- File containing the url to the webapp. -}
gitAnnexUrlFile :: Git.Repo -> FilePath
gitAnnexUrlFile r = fromRawFilePath $ gitAnnexDir r P.</> "url"
{- Temporary file used to edit configuriation from the git-annex branch. -}
gitAnnexTmpCfgFile :: Git.Repo -> FilePath
gitAnnexTmpCfgFile r = fromRawFilePath $ gitAnnexDir r P.</> "config.tmp"
{- .git/annex/ssh/ is used for ssh connection caching -}
gitAnnexSshDir :: Git.Repo -> FilePath
gitAnnexSshDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "ssh"
{- .git/annex/remotes/ is used for remote-specific state. -}
gitAnnexRemotesDir :: Git.Repo -> FilePath
gitAnnexRemotesDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "remotes"
{- This is the base directory name used by the assistant when making
- repositories, by default. -}
gitAnnexAssistantDefaultDir :: FilePath
gitAnnexAssistantDefaultDir = "annex"
{- Sanitizes a String that will be used as part of a Key's keyName,
- dealing with characters that cause problems.
-
- This is used when a new Key is initially being generated, eg by genKey.
- Unlike keyFile and fileKey, it does not need to be a reversable
- escaping. Also, it's ok to change this to add more problematic
- characters later. Unlike changing keyFile, which could result in the
- filenames used for existing keys changing and contents getting lost.
-
- It is, however, important that the input and output of this function
- have a 1:1 mapping, to avoid two different inputs from mapping to the
- same key.
-}
preSanitizeKeyName :: String -> String
preSanitizeKeyName = preSanitizeKeyName' False
preSanitizeKeyName' :: Bool -> String -> String
preSanitizeKeyName' resanitize = concatMap escape
where
escape c
| isAsciiUpper c || isAsciiLower c || isDigit c = [c]
| c `elem` ['.', '-', '_'] = [c] -- common, assumed safe
| c `elem` ['/', '%', ':'] = [c] -- handled by keyFile
-- , is safe and uncommon, so will be used to escape
-- other characters. By itself, it is escaped to
-- doubled form.
| c == ',' = if not resanitize
then ",,"
else ","
| otherwise = ',' : show (ord c)
{- Converts a keyName that has been santizied with an old version of
- preSanitizeKeyName to be sanitized with the new version. -}
reSanitizeKeyName :: String -> String
reSanitizeKeyName = preSanitizeKeyName' True
{- Converts a key into a filename fragment without any directory.
-
- Escape "/" in the key name, to keep a flat tree of files and avoid
- issues with keys containing "/../" or ending with "/" etc.
-
- "/" is escaped to "%" because it's short and rarely used, and resembles
- a slash
- "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping
- is one to one.
- ":" is escaped to "&c", because it seemed like a good idea at the time.
-
- Changing what this function escapes and how is not a good idea, as it
- can cause existing objects to get lost.
-}
keyFile :: Key -> RawFilePath
keyFile k =
let b = serializeKey' k
in if S8.any (`elem` ['&', '%', ':', '/']) b
then S8.concatMap esc b
else b
where
esc '&' = "&a"
esc '%' = "&s"
esc ':' = "&c"
esc '/' = "%"
esc c = S8.singleton c
{- Reverses keyFile, converting a filename fragment (ie, the basename of
- the symlink target) into a key. -}
fileKey :: RawFilePath -> Maybe Key
fileKey = deserializeKey' . S8.intercalate "/" . map go . S8.split '%'
where
go = S8.concat . unescafterfirst . S8.split '&'
unescafterfirst [] = []
unescafterfirst (b:bs) = b : map (unesc . S8.uncons) bs
unesc :: Maybe (Char, S8.ByteString) -> S8.ByteString
unesc Nothing = mempty
unesc (Just ('c', b)) = S8.cons ':' b
unesc (Just ('s', b)) = S8.cons '%' b
unesc (Just ('a', b)) = S8.cons '&' b
unesc (Just (c, b)) = S8.cons c b
{- A location to store a key on a special remote that uses a filesystem.
- A directory hash is used, to protect against filesystems that dislike
- having many items in a single directory.
-
- The file is put in a directory with the same name, this allows
- write-protecting the directory to avoid accidental deletion of the file.
-}
keyPath :: Key -> Hasher -> RawFilePath
keyPath key hasher = hasher key P.</> f P.</> f
where
f = keyFile key
{- All possibile locations to store a key in a special remote
- using different directory hashes.
-
- This is compatible with the annexLocations, for interoperability between
- special remotes and git-annex repos.
-}
keyPaths :: Key -> [RawFilePath]
keyPaths key = map (\h -> keyPath key (h def)) dirHashes

116
Annex/LockFile.hs Normal file
View file

@ -0,0 +1,116 @@
{- git-annex lock files.
-
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.LockFile (
lockFileCached,
unlockFile,
getLockCache,
fromLockCache,
withSharedLock,
withExclusiveLock,
takeExclusiveLock,
tryExclusiveLock,
) where
import Annex.Common
import Annex
import Types.LockCache
import qualified Git
import Annex.Perms
import Annex.LockPool
import qualified Data.Map as M
{- Create a specified lock file, and takes a shared lock, which is retained
- in the cache. -}
lockFileCached :: FilePath -> Annex ()
lockFileCached file = go =<< fromLockCache file
where
go (Just _) = noop -- already locked
go Nothing = do
#ifndef mingw32_HOST_OS
mode <- annexFileMode
lockhandle <- noUmask mode $ lockShared (Just mode) file
#else
lockhandle <- liftIO $ waitToLock $ lockShared file
#endif
changeLockCache $ M.insert file lockhandle
unlockFile :: FilePath -> Annex ()
unlockFile file = maybe noop go =<< fromLockCache file
where
go lockhandle = do
liftIO $ dropLock lockhandle
changeLockCache $ M.delete file
getLockCache :: Annex LockCache
getLockCache = getState lockcache
fromLockCache :: FilePath -> Annex (Maybe LockHandle)
fromLockCache file = M.lookup file <$> getLockCache
changeLockCache :: (LockCache -> LockCache) -> Annex ()
changeLockCache a = do
m <- getLockCache
changeState $ \s -> s { lockcache = a m }
{- Runs an action with a shared lock held. If an exclusive lock is held,
- blocks until it becomes free. -}
withSharedLock :: (Git.Repo -> FilePath) -> Annex a -> Annex a
withSharedLock getlockfile a = debugLocks $ do
lockfile <- fromRepo getlockfile
createAnnexDirectory $ takeDirectory lockfile
mode <- annexFileMode
bracket (lock mode lockfile) (liftIO . dropLock) (const a)
where
#ifndef mingw32_HOST_OS
lock mode = noUmask mode . lockShared (Just mode)
#else
lock _mode = liftIO . waitToLock . lockShared
#endif
{- Runs an action with an exclusive lock held. If the lock is already
- held, blocks until it becomes free. -}
withExclusiveLock :: (Git.Repo -> FilePath) -> Annex a -> Annex a
withExclusiveLock getlockfile a = bracket
(takeExclusiveLock getlockfile)
(liftIO . dropLock)
(const a)
{- Takes an exclusive lock, blocking until it's free. -}
takeExclusiveLock :: (Git.Repo -> FilePath) -> Annex LockHandle
takeExclusiveLock getlockfile = debugLocks $ do
lockfile <- fromRepo getlockfile
createAnnexDirectory $ takeDirectory lockfile
mode <- annexFileMode
lock mode lockfile
where
#ifndef mingw32_HOST_OS
lock mode = noUmask mode . lockExclusive (Just mode)
#else
lock _mode = liftIO . waitToLock . lockExclusive
#endif
{- Tries to take an exclusive lock and run an action. If the lock is
- already held, returns Nothing. -}
tryExclusiveLock :: (Git.Repo -> FilePath) -> Annex a -> Annex (Maybe a)
tryExclusiveLock getlockfile a = debugLocks $ do
lockfile <- fromRepo getlockfile
createAnnexDirectory $ takeDirectory lockfile
mode <- annexFileMode
bracket (lock mode lockfile) (liftIO . unlock) go
where
#ifndef mingw32_HOST_OS
lock mode = noUmask mode . tryLockExclusive (Just mode)
#else
lock _mode = liftIO . lockExclusive
#endif
unlock = maybe noop dropLock
go Nothing = return Nothing
go (Just _) = Just <$> a

17
Annex/LockPool.hs Normal file
View file

@ -0,0 +1,17 @@
{- Wraps Utility.LockPool, making pid locks be used when git-annex is so
- configured.
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.LockPool (module X) where
#ifndef mingw32_HOST_OS
import Annex.LockPool.PosixOrPid as X
#else
import Utility.LockPool.Windows as X
#endif

View file

@ -0,0 +1,91 @@
{- Wraps Utility.LockPool, making pid locks be used when git-annex is so
- configured.
-
- Copyright 2015-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.LockPool.PosixOrPid (
LockFile,
LockHandle,
lockShared,
lockExclusive,
tryLockShared,
tryLockExclusive,
dropLock,
checkLocked,
LockStatus(..),
getLockStatus,
checkSaneLock,
) where
import Common
import Types
import qualified Annex
import qualified Utility.LockPool.Posix as Posix
import qualified Utility.LockPool.PidLock as Pid
import qualified Utility.LockPool.LockHandle as H
import Utility.LockPool.LockHandle (LockHandle, dropLock)
import Utility.LockFile.Posix (openLockFile)
import Utility.LockPool.STM (LockFile)
import Utility.LockFile.LockStatus
import Config (pidLockFile)
import Messages (warning)
import System.Posix
lockShared :: Maybe FileMode -> LockFile -> Annex LockHandle
lockShared m f = pidLock m f $ Posix.lockShared m f
lockExclusive :: Maybe FileMode -> LockFile -> Annex LockHandle
lockExclusive m f = pidLock m f $ Posix.lockExclusive m f
tryLockShared :: Maybe FileMode -> LockFile -> Annex (Maybe LockHandle)
tryLockShared m f = tryPidLock m f $ Posix.tryLockShared m f
tryLockExclusive :: Maybe FileMode -> LockFile -> Annex (Maybe LockHandle)
tryLockExclusive m f = tryPidLock m f $ Posix.tryLockExclusive m f
checkLocked :: LockFile -> Annex (Maybe Bool)
checkLocked f = Posix.checkLocked f `pidLockCheck` checkpid
where
checkpid pidlock = Pid.checkLocked pidlock >>= \case
-- Only return true when the posix lock file exists.
Just _ -> Posix.checkLocked f
Nothing -> return Nothing
getLockStatus :: LockFile -> Annex LockStatus
getLockStatus f = Posix.getLockStatus f
`pidLockCheck` Pid.getLockStatus
checkSaneLock :: LockFile -> LockHandle -> Annex Bool
checkSaneLock f h = H.checkSaneLock f h
`pidLockCheck` flip Pid.checkSaneLock h
pidLockCheck :: IO a -> (LockFile -> IO a) -> Annex a
pidLockCheck posixcheck pidcheck = debugLocks $
liftIO . maybe posixcheck pidcheck =<< pidLockFile
pidLock :: Maybe FileMode -> LockFile -> IO LockHandle -> Annex LockHandle
pidLock m f posixlock = debugLocks $ go =<< pidLockFile
where
go Nothing = liftIO posixlock
go (Just pidlock) = do
timeout <- annexPidLockTimeout <$> Annex.getGitConfig
liftIO $ dummyPosixLock m f
Pid.waitLock timeout pidlock warning
tryPidLock :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle) -> Annex (Maybe LockHandle)
tryPidLock m f posixlock = debugLocks $ liftIO . go =<< pidLockFile
where
go Nothing = posixlock
go (Just pidlock) = do
dummyPosixLock m f
Pid.tryLock pidlock
-- The posix lock file is created even when using pid locks, in order to
-- avoid complicating any code that might expect to be able to see that
-- lock file. But, it's not locked.
dummyPosixLock :: Maybe FileMode -> LockFile -> IO ()
dummyPosixLock m f = bracket (openLockFile ReadLock m f) closeFd (const noop)

74
Annex/Magic.hs Normal file
View file

@ -0,0 +1,74 @@
{- Interface to libmagic
-
- Copyright 2019-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Magic (
Magic,
MimeType,
MimeEncoding,
initMagicMime,
getMagicMimeType,
getMagicMimeEncoding,
) where
import Types.Mime
import Control.Monad.IO.Class
#ifdef WITH_MAGICMIME
import Magic
import Utility.Env
import Control.Concurrent
import System.IO.Unsafe (unsafePerformIO)
import Common
#else
type Magic = ()
#endif
initMagicMime :: IO (Maybe Magic)
#ifdef WITH_MAGICMIME
initMagicMime = catchMaybeIO $ do
m <- magicOpen [MagicMime]
liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case
Nothing -> magicLoadDefault m
Just d -> magicLoad m
(d </> "magic" </> "magic.mgc")
return m
#else
initMagicMime = return Nothing
#endif
getMagicMime :: Magic -> FilePath -> IO (Maybe (MimeType, MimeEncoding))
#ifdef WITH_MAGICMIME
getMagicMime m f = Just . parse <$> magicConcurrentSafe (magicFile m f)
where
parse s =
let (mimetype, rest) = separate (== ';') s
in case rest of
(' ':'c':'h':'a':'r':'s':'e':'t':'=':mimeencoding) ->
(mimetype, mimeencoding)
_ -> (mimetype, "")
#else
getMagicMime _ _ = return Nothing
#endif
getMagicMimeType :: MonadIO m => Magic -> FilePath -> m (Maybe MimeType)
getMagicMimeType m f = liftIO $ fmap fst <$> getMagicMime m f
getMagicMimeEncoding :: MonadIO m => Magic -> FilePath -> m(Maybe MimeEncoding)
getMagicMimeEncoding m f = liftIO $ fmap snd <$> getMagicMime m f
#ifdef WITH_MAGICMIME
{-# NOINLINE mutex #-}
mutex :: MVar ()
mutex = unsafePerformIO $ newMVar ()
-- Work around a bug, the library is not concurrency safe and will
-- sometimes access the wrong memory if multiple ones are called at the
-- same time.
magicConcurrentSafe :: IO a -> IO a
magicConcurrentSafe = bracket_ (takeMVar mutex) (putMVar mutex ())
#endif

115
Annex/MetaData.hs Normal file
View file

@ -0,0 +1,115 @@
{- git-annex metadata
-
- Copyright 2014-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.MetaData (
genMetaData,
dateMetaData,
parseModMeta,
parseMetaDataMatcher,
module X
) where
import Annex.Common
import qualified Annex
import Types.MetaData as X
import Annex.MetaData.StandardFields as X
import Logs.MetaData
import Annex.CatFile
import Utility.Glob
import qualified Data.Set as S
import qualified Data.Text as T
import Data.Time.Calendar
import Data.Time.Clock
import Data.Time.Clock.POSIX
{- Adds metadata for a file that has just been ingested into the
- annex, but has not yet been committed to git.
-
- When the file has been modified, the metadata is copied over
- from the old key to the new key. Note that it looks at the old key as
- committed to HEAD -- the new key may or may not have already been staged
- in the index.
-
- Also, can generate new metadata, if configured to do so.
-}
genMetaData :: Key -> RawFilePath -> FileStatus -> Annex ()
genMetaData key file status = do
catKeyFileHEAD file >>= \case
Nothing -> noop
Just oldkey ->
-- Have to copy first, before adding any
-- more metadata, because copyMetaData does not
-- preserve any metadata already on key.
whenM (copyMetaData oldkey key <&&> (not <$> onlydatemeta oldkey)) $
warncopied
whenM (annexGenMetaData <$> Annex.getGitConfig) $ do
old <- getCurrentMetaData key
addMetaData key (dateMetaData mtime old)
where
mtime = posixSecondsToUTCTime $ realToFrac $ modificationTime status
warncopied = warning $
"Copied metadata from old version of " ++ fromRawFilePath file ++ " to new version. " ++
"If you don't want this copied metadata, run: git annex metadata --remove-all " ++ fromRawFilePath file
-- If the only fields copied were date metadata, and they'll
-- be overwritten with the current mtime, no need to warn about
-- copying.
onlydatemeta oldkey = ifM (annexGenMetaData <$> Annex.getGitConfig)
( null . filter (not . isDateMetaField . fst) . fromMetaData
<$> getCurrentMetaData oldkey
, return False
)
{- Generates metadata for a file's date stamp.
-
- Any date fields in the old metadata will be overwritten.
-
- Note that the returned MetaData does not contain all the input MetaData,
- only changes to add the date fields. -}
dateMetaData :: UTCTime -> MetaData -> MetaData
dateMetaData mtime old = modMeta old $
(SetMeta yearMetaField $ S.singleton $ toMetaValue $ encodeBS' $ show y)
`ComposeModMeta`
(SetMeta monthMetaField $ S.singleton $ toMetaValue $ encodeBS' $ show m)
`ComposeModMeta`
(SetMeta dayMetaField $ S.singleton $ toMetaValue $ encodeBS' $ show d)
where
(y, m, d) = toGregorian $ utctDay mtime
{- Parses field=value, field+=value, field-=value, field?=value -}
parseModMeta :: String -> Either String ModMeta
parseModMeta p = case lastMaybe f of
Just '+' -> AddMeta <$> mkMetaField (T.pack f') <*> v
Just '-' -> DelMeta <$> mkMetaField (T.pack f') <*> (Just <$> v)
Just '?' -> MaybeSetMeta <$> mkMetaField (T.pack f') <*> v
_ -> SetMeta <$> mkMetaField (T.pack f) <*> (S.singleton <$> v)
where
(f, sv) = separate (== '=') p
f' = beginning f
v = pure (toMetaValue (encodeBS sv))
{- Parses field=value, field<value, field<=value, field>value, field>=value -}
parseMetaDataMatcher :: String -> Either String (MetaField, MetaValue -> Bool)
parseMetaDataMatcher p = (,)
<$> mkMetaField (T.pack f)
<*> pure matcher
where
(f, op_v) = break (`elem` "=<>") p
matcher = case op_v of
('=':v) -> checkglob v
('<':'=':v) -> checkcmp (<=) v
('<':v) -> checkcmp (<) v
('>':'=':v) -> checkcmp (>=) v
('>':v) -> checkcmp (>) v
_ -> checkglob ""
checkglob v =
let cglob = compileGlob v CaseInsensative
in matchGlob cglob . decodeBS . fromMetaValue
checkcmp cmp v v' = case (doubleval v, doubleval (decodeBS (fromMetaValue v'))) of
(Just d, Just d') -> d' `cmp` d
_ -> False
doubleval v = readish v :: Maybe Double

View file

@ -0,0 +1,63 @@
{- git-annex metadata, standard fields
-
- Copyright 2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.MetaData.StandardFields (
tagMetaField,
yearMetaField,
monthMetaField,
dayMetaField,
isDateMetaField,
lastChangedField,
mkLastChangedField,
isLastChangedField
) where
import Types.MetaData
import qualified Data.Text as T
import Data.Monoid
import Prelude
tagMetaField :: MetaField
tagMetaField = mkMetaFieldUnchecked "tag"
yearMetaField :: MetaField
yearMetaField = mkMetaFieldUnchecked "year"
monthMetaField :: MetaField
monthMetaField = mkMetaFieldUnchecked "month"
dayMetaField :: MetaField
dayMetaField = mkMetaFieldUnchecked "day"
isDateMetaField :: MetaField -> Bool
isDateMetaField f
| f == yearMetaField = True
| f == monthMetaField = True
| f == dayMetaField = True
| otherwise = False
lastChangedField :: MetaField
lastChangedField = mkMetaFieldUnchecked lastchanged
mkLastChangedField :: MetaField -> MetaField
mkLastChangedField f = mkMetaFieldUnchecked (fromMetaField f <> lastchangedSuffix)
isLastChangedField :: MetaField -> Bool
isLastChangedField f
| f == lastChangedField = True
| otherwise = lastchanged `T.isSuffixOf` s && s /= lastchangedSuffix
where
s = fromMetaField f
lastchanged :: T.Text
lastchanged = "lastchanged"
lastchangedSuffix :: T.Text
lastchangedSuffix = "-lastchanged"

44
Annex/Multicast.hs Normal file
View file

@ -0,0 +1,44 @@
{- git-annex multicast receive callback
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Multicast where
import Annex.Path
import Utility.Env
import Utility.PartialPrelude
import System.Process
import System.IO
import GHC.IO.Handle.FD
import Control.Applicative
import Prelude
multicastReceiveEnv :: String
multicastReceiveEnv = "GIT_ANNEX_MULTICAST_RECEIVE"
multicastCallbackEnv :: IO (FilePath, [(String, String)], Handle)
multicastCallbackEnv = do
gitannex <- programPath
-- This will even work on Windows
(rfd, wfd) <- createPipeFd
rh <- fdToHandle rfd
environ <- addEntry multicastReceiveEnv (show wfd) <$> getEnvironment
return (gitannex, environ, rh)
-- This is run when uftpd has received a file. Rather than move
-- the file into the annex here, which would require starting up the
-- Annex monad, parsing git config, and verifying the content, simply
-- output to the specified FD the filename. This keeps the time
-- that uftpd is not receiving the next file as short as possible.
runMulticastReceive :: [String] -> String -> IO ()
runMulticastReceive ("-I":_sessionid:fs) hs = case readish hs of
Just fd -> do
h <- fdToHandle fd
mapM_ (hPutStrLn h) fs
hClose h
Nothing -> return ()
runMulticastReceive _ _ = return ()

108
Annex/Notification.hs Normal file
View file

@ -0,0 +1,108 @@
{- git-annex desktop notifications
-
- Copyright 2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
{-# LANGUAGE CPP #-}
module Annex.Notification (NotifyWitness, noNotification, notifyTransfer, notifyDrop) where
import Annex.Common
import Types.Transfer
#ifdef WITH_DBUS_NOTIFICATIONS
import qualified Annex
import Types.DesktopNotify
import qualified DBus.Notify as Notify
import qualified DBus.Client
#endif
-- Witness that notification has happened.
data NotifyWitness = NotifyWitness
-- Only use when no notification should be done.
noNotification :: NotifyWitness
noNotification = NotifyWitness
{- Wrap around an action that performs a transfer, which may run multiple
- attempts. Displays notification when supported and when the user asked
- for it. -}
notifyTransfer :: Transferrable t => Observable v => Direction -> t -> (NotifyWitness -> Annex v) -> Annex v
#ifdef WITH_DBUS_NOTIFICATIONS
notifyTransfer direction t a = case descTransfrerrable t of
Nothing -> a NotifyWitness
Just desc -> do
wanted <- Annex.getState Annex.desktopnotify
if (notifyStart wanted || notifyFinish wanted)
then do
client <- liftIO DBus.Client.connectSession
startnotification <- liftIO $ if notifyStart wanted
then Just <$> Notify.notify client (startedTransferNote direction desc)
else pure Nothing
res <- a NotifyWitness
let ok = observeBool res
when (notifyFinish wanted) $ liftIO $ void $ maybe
(Notify.notify client $ finishedTransferNote ok direction desc)
(\n -> Notify.replace client n $ finishedTransferNote ok direction desc)
startnotification
return res
else a NotifyWitness
#else
notifyTransfer _ _ a = a NotifyWitness
#endif
notifyDrop :: AssociatedFile -> Bool -> Annex ()
notifyDrop (AssociatedFile Nothing) _ = noop
#ifdef WITH_DBUS_NOTIFICATIONS
notifyDrop (AssociatedFile (Just f)) ok = do
wanted <- Annex.getState Annex.desktopnotify
when (notifyFinish wanted) $ liftIO $ do
client <- DBus.Client.connectSession
void $ Notify.notify client (droppedNote ok (fromRawFilePath f))
#else
notifyDrop (AssociatedFile (Just _)) _ = noop
#endif
#ifdef WITH_DBUS_NOTIFICATIONS
startedTransferNote :: Direction -> String -> Notify.Note
startedTransferNote Upload = mkNote Notify.Transfer Notify.Low iconUpload
"Uploading"
startedTransferNote Download = mkNote Notify.Transfer Notify.Low iconDownload
"Downloading"
finishedTransferNote :: Bool -> Direction -> String -> Notify.Note
finishedTransferNote False Upload = mkNote Notify.TransferError Notify.Normal iconFailure
"Failed to upload"
finishedTransferNote False Download = mkNote Notify.TransferError Notify.Normal iconFailure
"Failed to download"
finishedTransferNote True Upload = mkNote Notify.TransferComplete Notify.Low iconSuccess
"Finished uploading"
finishedTransferNote True Download = mkNote Notify.TransferComplete Notify.Low iconSuccess
"Finished downloading"
droppedNote :: Bool -> String -> Notify.Note
droppedNote False = mkNote Notify.TransferError Notify.Normal iconFailure
"Failed to drop"
droppedNote True = mkNote Notify.TransferComplete Notify.Low iconSuccess
"Dropped"
iconUpload, iconDownload, iconFailure, iconSuccess :: String
iconUpload = "network-transmit"
iconDownload = "network-receive"
iconFailure = "dialog-error"
iconSuccess = "git-annex" -- Is there a standard icon for success/completion?
mkNote :: Notify.Category -> Notify.UrgencyLevel -> String -> String -> FilePath -> Notify.Note
mkNote category urgency icon desc path = Notify.blankNote
{ Notify.appName = "git-annex"
, Notify.appImage = Just (Notify.Icon icon)
, Notify.summary = desc ++ " " ++ path
, Notify.hints =
[ Notify.Category category
, Notify.Urgency urgency
, Notify.SuppressSound True
]
}
#endif

225
Annex/NumCopies.hs Normal file
View file

@ -0,0 +1,225 @@
{- git-annex numcopies configuration and checking
-
- Copyright 2014-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE ScopedTypeVariables, DeriveDataTypeable #-}
module Annex.NumCopies (
module Types.NumCopies,
module Logs.NumCopies,
getFileNumCopies,
getAssociatedFileNumCopies,
getGlobalFileNumCopies,
getNumCopies,
deprecatedNumCopies,
defaultNumCopies,
numCopiesCheck,
numCopiesCheck',
verifyEnoughCopiesToDrop,
verifiableCopies,
UnVerifiedCopy(..),
) where
import Annex.Common
import qualified Annex
import Types.NumCopies
import Logs.NumCopies
import Logs.Trust
import Annex.CheckAttr
import qualified Remote
import qualified Types.Remote as Remote
import Annex.Content
import Annex.UUID
import Control.Exception
import qualified Control.Monad.Catch as M
import Data.Typeable
defaultNumCopies :: NumCopies
defaultNumCopies = NumCopies 1
fromSources :: [Annex (Maybe NumCopies)] -> Annex NumCopies
fromSources = fromMaybe defaultNumCopies <$$> getM id
{- The git config annex.numcopies is deprecated. -}
deprecatedNumCopies :: Annex (Maybe NumCopies)
deprecatedNumCopies = annexNumCopies <$> Annex.getGitConfig
{- Value forced on the command line by --numcopies. -}
getForcedNumCopies :: Annex (Maybe NumCopies)
getForcedNumCopies = Annex.getState Annex.forcenumcopies
{- Numcopies value from any of the non-.gitattributes configuration
- sources. -}
getNumCopies :: Annex NumCopies
getNumCopies = fromSources
[ getForcedNumCopies
, getGlobalNumCopies
, deprecatedNumCopies
]
{- Numcopies value for a file, from any configuration source, including the
- deprecated git config. -}
getFileNumCopies :: FilePath -> Annex NumCopies
getFileNumCopies f = fromSources
[ getForcedNumCopies
, getFileNumCopies' f
, deprecatedNumCopies
]
getAssociatedFileNumCopies :: AssociatedFile -> Annex NumCopies
getAssociatedFileNumCopies (AssociatedFile afile) =
maybe getNumCopies getFileNumCopies (fromRawFilePath <$> afile)
{- This is the globally visible numcopies value for a file. So it does
- not include local configuration in the git config or command line
- options. -}
getGlobalFileNumCopies :: FilePath -> Annex NumCopies
getGlobalFileNumCopies f = fromSources
[ getFileNumCopies' f
]
getFileNumCopies' :: FilePath -> Annex (Maybe NumCopies)
getFileNumCopies' file = maybe getGlobalNumCopies (return . Just) =<< getattr
where
getattr = (NumCopies <$$> readish)
<$> checkAttr "annex.numcopies" file
{- Checks if numcopies are satisfied for a file by running a comparison
- between the number of (not untrusted) copies that are
- belived to exist, and the configured value.
-
- This is good enough for everything except dropping the file, which
- requires active verification of the copies.
-}
numCopiesCheck :: FilePath -> Key -> (Int -> Int -> v) -> Annex v
numCopiesCheck file key vs = do
have <- trustExclude UnTrusted =<< Remote.keyLocations key
numCopiesCheck' file vs have
numCopiesCheck' :: FilePath -> (Int -> Int -> v) -> [UUID] -> Annex v
numCopiesCheck' file vs have = do
NumCopies needed <- getFileNumCopies file
return $ length have `vs` needed
data UnVerifiedCopy = UnVerifiedRemote Remote | UnVerifiedHere
deriving (Ord, Eq)
{- Verifies that enough copies of a key exist amoung the listed remotes,
- to safely drop it, running an action with a proof if so, and
- printing an informative message if not.
-}
verifyEnoughCopiesToDrop
:: String -- message to print when there are no known locations
-> Key
-> Maybe ContentRemovalLock
-> NumCopies
-> [UUID] -- repos to skip considering (generally untrusted remotes)
-> [VerifiedCopy] -- copies already verified to exist
-> [UnVerifiedCopy] -- places to check to see if they have copies
-> (SafeDropProof -> Annex a) -- action to perform the drop
-> Annex a -- action to perform when unable to drop
-> Annex a
verifyEnoughCopiesToDrop nolocmsg key removallock need skip preverified tocheck dropaction nodropaction =
helper [] [] preverified (nub tocheck) []
where
helper bad missing have [] lockunsupported =
liftIO (mkSafeDropProof need have removallock) >>= \case
Right proof -> dropaction proof
Left stillhave -> do
notEnoughCopies key need stillhave (skip++missing) bad nolocmsg lockunsupported
nodropaction
helper bad missing have (c:cs) lockunsupported
| isSafeDrop need have removallock =
liftIO (mkSafeDropProof need have removallock) >>= \case
Right proof -> dropaction proof
Left stillhave -> helper bad missing stillhave (c:cs) lockunsupported
| otherwise = case c of
UnVerifiedHere -> lockContentShared key contverified
UnVerifiedRemote r -> checkremote r contverified $
let lockunsupported' = r : lockunsupported
in Remote.hasKey r key >>= \case
Right True -> helper bad missing (mkVerifiedCopy RecentlyVerifiedCopy r : have) cs lockunsupported'
Left _ -> helper (r:bad) missing have cs lockunsupported'
Right False -> helper bad (Remote.uuid r:missing) have cs lockunsupported'
where
contverified vc = helper bad missing (vc : have) cs lockunsupported
checkremote r cont fallback = case Remote.lockContent r of
Just lockcontent -> do
-- The remote's lockContent will throw an exception
-- when it is unable to lock, in which case the
-- fallback should be run.
--
-- On the other hand, the continuation could itself
-- throw an exception (ie, the eventual drop action
-- fails), and in this case we don't want to run the
-- fallback since part of the drop action may have
-- already been performed.
--
-- Differentiate between these two sorts
-- of exceptions by using DropException.
let a = lockcontent key $ \v ->
cont v `catchNonAsync` (throw . DropException)
a `M.catches`
[ M.Handler (\ (e :: AsyncException) -> throwM e)
, M.Handler (\ (e :: SomeAsyncException) -> throwM e)
, M.Handler (\ (DropException e') -> throwM e')
, M.Handler (\ (_e :: SomeException) -> fallback)
]
Nothing -> fallback
data DropException = DropException SomeException
deriving (Typeable, Show)
instance Exception DropException
notEnoughCopies :: Key -> NumCopies -> [VerifiedCopy] -> [UUID] -> [Remote] -> String -> [Remote] -> Annex ()
notEnoughCopies key need have skip bad nolocmsg lockunsupported = do
showNote "unsafe"
if length have < fromNumCopies need
then showLongNote $
"Could only verify the existence of " ++
show (length have) ++ " out of " ++ show (fromNumCopies need) ++
" necessary copies"
else do
showLongNote $ "Unable to lock down 1 copy of file that is required to safely drop it."
if null lockunsupported
then showLongNote "(This could have happened because of a concurrent drop, or because a remote has too old a version of git-annex-shell installed.)"
else showLongNote $ "These remotes do not support locking: "
++ Remote.listRemoteNames lockunsupported
Remote.showTriedRemotes bad
Remote.showLocations True key (map toUUID have++skip) nolocmsg
{- Finds locations of a key that can be used to get VerifiedCopies,
- in order to allow dropping the key.
-
- Provide a list of UUIDs that the key is being dropped from.
- The returned lists will exclude any of those UUIDs.
-
- The return lists also exclude any repositories that are untrusted,
- since those should not be used for verification.
-
- The UnVerifiedCopy list is cost ordered.
- The VerifiedCopy list contains repositories that are trusted to
- contain the key.
-}
verifiableCopies :: Key -> [UUID] -> Annex ([UnVerifiedCopy], [VerifiedCopy])
verifiableCopies key exclude = do
locs <- Remote.keyLocations key
(remotes, trusteduuids) <- Remote.remoteLocations locs
=<< trustGet Trusted
untrusteduuids <- trustGet UnTrusted
let exclude' = exclude ++ untrusteduuids
let remotes' = Remote.remotesWithoutUUID remotes (exclude' ++ trusteduuids)
let verified = map (mkVerifiedCopy TrustedCopy) $
filter (`notElem` exclude') trusteduuids
u <- getUUID
let herec = if u `elem` locs && u `notElem` exclude'
then [UnVerifiedHere]
else []
return (herec ++ map UnVerifiedRemote remotes', verified)

64
Annex/Path.hs Normal file
View file

@ -0,0 +1,64 @@
{- git-annex program path
-
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Path where
import Annex.Common
import Config.Files
import Utility.Env
import Annex.PidLock
import System.Environment (getExecutablePath)
{- A fully qualified path to the currently running git-annex program.
-
- getExecutablePath is used when possible. On OSs it supports
- well, it returns the complete path to the program. But, on other OSs,
- it might return just the basename. Fall back to reading the programFile,
- or searching for the command name in PATH.
-
- The standalone build runs git-annex via ld.so, and defeats
- getExecutablePath. It sets GIT_ANNEX_PROGRAMPATH to the correct path
- to the wrapper script to use.
-}
programPath :: IO FilePath
programPath = go =<< getEnv "GIT_ANNEX_PROGRAMPATH"
where
go (Just p) = return p
go Nothing = do
exe <- getExecutablePath
p <- if isAbsolute exe
then return exe
else fromMaybe exe <$> readProgramFile
maybe cannotFindProgram return =<< searchPath p
{- Returns the path for git-annex that is recorded in the programFile. -}
readProgramFile :: IO (Maybe FilePath)
readProgramFile = do
programfile <- programFile
headMaybe . lines <$> readFile programfile
cannotFindProgram :: IO a
cannotFindProgram = do
f <- programFile
giveup $ "cannot find git-annex program in PATH or in " ++ f
{- Runs a git-annex child process.
-
- Like runsGitAnnexChildProcessViaGit, when pid locking is in use,
- this takes the pid lock, while running it, and sets an env var
- that prevents the child process trying to take the pid lock,
- to avoid it deadlocking.
-}
gitAnnexChildProcess
:: [String]
-> (CreateProcess -> CreateProcess)
-> (Maybe Handle -> Maybe Handle -> Maybe Handle -> ProcessHandle -> IO a)
-> Annex a
gitAnnexChildProcess ps f a = do
cmd <- liftIO programPath
pidLockChildProcess cmd ps f a

225
Annex/Perms.hs Normal file
View file

@ -0,0 +1,225 @@
{- git-annex file permissions
-
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Perms (
FileMode,
setAnnexFilePerm,
setAnnexDirPerm,
resetAnnexFilePerm,
annexFileMode,
createAnnexDirectory,
createWorkTreeDirectory,
noUmask,
freezeContent,
isContentWritePermOk,
thawContent,
chmodContent,
createContentDir,
freezeContentDir,
thawContentDir,
modifyContent,
withShared,
) where
import Annex.Common
import Utility.FileMode
import Git
import Git.ConfigTypes
import qualified Annex
import Config
withShared :: (SharedRepository -> Annex a) -> Annex a
withShared a = a =<< coreSharedRepository <$> Annex.getGitConfig
setAnnexFilePerm :: FilePath -> Annex ()
setAnnexFilePerm = setAnnexPerm False
setAnnexDirPerm :: FilePath -> Annex ()
setAnnexDirPerm = setAnnexPerm True
{- Sets appropriate file mode for a file or directory in the annex,
- other than the content files and content directory. Normally,
- don't change the mode, but with core.sharedRepository set,
- allow the group to write, etc. -}
setAnnexPerm :: Bool -> FilePath -> Annex ()
setAnnexPerm = setAnnexPerm' Nothing
setAnnexPerm' :: Maybe ([FileMode] -> FileMode -> FileMode) -> Bool -> FilePath -> Annex ()
setAnnexPerm' modef isdir file = unlessM crippledFileSystem $
withShared $ liftIO . go
where
go GroupShared = void $ tryIO $ modifyFileMode file $ modef' $
groupSharedModes ++
if isdir then [ ownerExecuteMode, groupExecuteMode ] else []
go AllShared = void $ tryIO $ modifyFileMode file $ modef' $
readModes ++
[ ownerWriteMode, groupWriteMode ] ++
if isdir then executeModes else []
go _ = case modef of
Nothing -> noop
Just f -> void $ tryIO $
modifyFileMode file $ f []
modef' = fromMaybe addModes modef
resetAnnexFilePerm :: FilePath -> Annex ()
resetAnnexFilePerm = resetAnnexPerm False
{- Like setAnnexPerm, but ignores the current mode of the file entirely,
- and sets the same mode that the umask would result in when creating a
- new file.
-
- Useful eg, after creating a temporary file with locked down modes,
- which is going to be moved to a non-temporary location and needs
- usual modes.
-}
resetAnnexPerm :: Bool -> FilePath -> Annex ()
resetAnnexPerm isdir file = unlessM crippledFileSystem $ do
defmode <- liftIO defaultFileMode
let modef moremodes _oldmode = addModes moremodes defmode
setAnnexPerm' (Just modef) isdir file
{- Gets the appropriate mode to use for creating a file in the annex
- (other than content files, which are locked down more). The umask is not
- taken into account; this is for use with actions that create the file
- and apply the umask automatically. -}
annexFileMode :: Annex FileMode
annexFileMode = withShared $ return . go
where
go GroupShared = sharedmode
go AllShared = combineModes (sharedmode:readModes)
go _ = stdFileMode
sharedmode = combineModes groupSharedModes
{- Creates a directory inside the gitAnnexDir, creating any parent
- directories up to and including the gitAnnexDir.
- Makes directories with appropriate permissions. -}
createAnnexDirectory :: FilePath -> Annex ()
createAnnexDirectory dir = do
top <- parentDir . fromRawFilePath <$> fromRepo gitAnnexDir
createDirectoryUnder' top dir createdir
where
createdir p = do
liftIO $ createDirectory p
setAnnexDirPerm p
{- Create a directory in the git work tree, creating any parent
- directories up to the top of the work tree.
-
- Uses default permissions.
-}
createWorkTreeDirectory :: FilePath -> Annex ()
createWorkTreeDirectory dir = do
fromRepo repoWorkTree >>= liftIO . \case
Just wt -> createDirectoryUnder (fromRawFilePath wt) dir
-- Should never happen, but let whatever tries to write
-- to the directory be what throws an exception, as that
-- will be clearer than an exception from here.
Nothing -> noop
{- Normally, blocks writing to an annexed file, and modifies file
- permissions to allow reading it.
-
- When core.sharedRepository is set, the write bits are not removed from
- the file, but instead the appropriate group write bits are set. This is
- necessary to let other users in the group lock the file. But, in a
- shared repository, the current user may not be able to change a file
- owned by another user, so failure to set this mode is ignored.
-}
freezeContent :: FilePath -> Annex ()
freezeContent file = unlessM crippledFileSystem $
withShared go
where
go GroupShared = liftIO $ void $ tryIO $ modifyFileMode file $
addModes [ownerReadMode, groupReadMode, ownerWriteMode, groupWriteMode]
go AllShared = liftIO $ void $ tryIO $ modifyFileMode file $
addModes (readModes ++ writeModes)
go _ = liftIO $ modifyFileMode file $
removeModes writeModes .
addModes [ownerReadMode]
isContentWritePermOk :: FilePath -> Annex Bool
isContentWritePermOk file = ifM crippledFileSystem
( return True
, withShared go
)
where
go GroupShared = want [ownerWriteMode, groupWriteMode]
go AllShared = want writeModes
go _ = return True
want wantmode =
liftIO (catchMaybeIO $ fileMode <$> getFileStatus file) >>= return . \case
Nothing -> True
Just havemode -> havemode == combineModes (havemode:wantmode)
{- Adjusts read mode of annexed file per core.sharedRepository setting. -}
chmodContent :: FilePath -> Annex ()
chmodContent file = unlessM crippledFileSystem $
withShared go
where
go GroupShared = liftIO $ void $ tryIO $ modifyFileMode file $
addModes [ownerReadMode, groupReadMode]
go AllShared = liftIO $ void $ tryIO $ modifyFileMode file $
addModes readModes
go _ = liftIO $ modifyFileMode file $
addModes [ownerReadMode]
{- Allows writing to an annexed file that freezeContent was called on
- before. -}
thawContent :: FilePath -> Annex ()
thawContent file = thawPerms $ withShared go
where
go GroupShared = liftIO $ void $ tryIO $ groupWriteRead file
go AllShared = liftIO $ void $ tryIO $ groupWriteRead file
go _ = liftIO $ allowWrite file
{- Runs an action that thaws a file's permissions. This will probably
- fail on a crippled filesystem. But, if file modes are supported on a
- crippled filesystem, the file may be frozen, so try to thaw it. -}
thawPerms :: Annex () -> Annex ()
thawPerms a = ifM crippledFileSystem
( void $ tryNonAsync a
, a
)
{- Blocks writing to the directory an annexed file is in, to prevent the
- file accidentally being deleted. However, if core.sharedRepository
- is set, this is not done, since the group must be allowed to delete the
- file.
-}
freezeContentDir :: FilePath -> Annex ()
freezeContentDir file = unlessM crippledFileSystem $
withShared go
where
dir = parentDir file
go GroupShared = liftIO $ void $ tryIO $ groupWriteRead dir
go AllShared = liftIO $ void $ tryIO $ groupWriteRead dir
go _ = liftIO $ preventWrite dir
thawContentDir :: FilePath -> Annex ()
thawContentDir file = thawPerms $ liftIO $ allowWrite $ parentDir file
{- Makes the directory tree to store an annexed file's content,
- with appropriate permissions on each level. -}
createContentDir :: FilePath -> Annex ()
createContentDir dest = do
unlessM (liftIO $ doesDirectoryExist dir) $
createAnnexDirectory dir
-- might have already existed with restricted perms
unlessM crippledFileSystem $
liftIO $ allowWrite dir
where
dir = parentDir dest
{- Creates the content directory for a file if it doesn't already exist,
- or thaws it if it does, then runs an action to modify the file, and
- finally, freezes the content directory. -}
modifyContent :: FilePath -> Annex a -> Annex a
modifyContent f a = do
createContentDir f -- also thaws it
v <- tryNonAsync a
freezeContentDir f
either throwM return v

127
Annex/PidLock.hs Normal file
View file

@ -0,0 +1,127 @@
{- Pid locking support.
-
- Copyright 2014-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.PidLock where
import Annex.Common
import Annex.GitOverlay
import Git
import Git.Env
#ifndef mingw32_HOST_OS
import qualified Utility.LockFile.PidLock as PidF
import qualified Utility.LockPool.PidLock as PidP
import Utility.LockPool (dropLock)
import Utility.Env
import Config
#endif
{- When pid locking is in use, this tries to take the pid lock (unless
- the process already has it), and if successful, holds it while
- running the child process. The child process is run with an env var
- set, which prevents it from trying to take the pid lock itself.
-
- This way, any locking the parent does will not get in the way of
- the child. The child is assumed to not do any locking that conflicts
- with the parent, but if it did happen to do that, it would be noticed
- when git-annex is used without pid locking.
-
- If another process is already holding the pid lock, the child process
- is still run, but without setting the env var, so it can try to take the
- pid lock itself, and fail however is appropriate for it in that
- situation.
-}
pidLockChildProcess
:: FilePath
-> [String]
-> (CreateProcess -> CreateProcess)
-> (Maybe Handle -> Maybe Handle -> Maybe Handle -> ProcessHandle -> IO a)
-> Annex a
pidLockChildProcess cmd ps f a = do
let p = f (proc cmd ps)
let gonopidlock = withCreateProcess p a
#ifndef mingw32_HOST_OS
pidLockFile >>= liftIO . \case
Nothing -> gonopidlock
Just pidlock -> bracket
(setup pidlock)
cleanup
(go gonopidlock p pidlock)
where
setup pidlock = PidP.tryLock pidlock
cleanup (Just h) = dropLock h
cleanup Nothing = return ()
go gonopidlock _ _ Nothing = gonopidlock
go _ p pidlock (Just _h) = do
v <- PidF.pidLockEnv pidlock
baseenv <- case env p of
Nothing -> getEnvironment
Just baseenv -> pure baseenv
let p' = p { env = Just ((v, PidF.pidLockEnvValue) : baseenv) }
withCreateProcess p' a
#else
liftIO gonopidlock
#endif
{- Wrap around actions that may run a git-annex child process via a git
- command.
-
- This is like pidLockChildProcess, but rather than running a process
- itself, it runs the action with a modified Annex state that passes the
- necessary env var.
-}
runsGitAnnexChildProcessViaGit :: Annex a -> Annex a
#ifndef mingw32_HOST_OS
runsGitAnnexChildProcessViaGit a = pidLockFile >>= \case
Nothing -> a
Just pidlock -> bracket (setup pidlock) cleanup (go pidlock)
where
setup pidlock = liftIO $ PidP.tryLock pidlock
cleanup (Just h) = liftIO $ dropLock h
cleanup Nothing = return ()
go _ Nothing = a
go pidlock (Just _h) = do
v <- liftIO $ PidF.pidLockEnv pidlock
let addenv g = do
g' <- liftIO $ addGitEnv g v PidF.pidLockEnvValue
return (g', ())
let rmenv oldg g
| any (\(k, _) -> k == v) (fromMaybe [] (Git.gitEnv oldg)) = g
| otherwise =
let e' = case Git.gitEnv g of
Just e -> Just (delEntry v e)
Nothing -> Nothing
in g { Git.gitEnv = e' }
withAltRepo addenv rmenv (const a)
#else
runsGitAnnexChildProcessViaGit a = a
#endif
runsGitAnnexChildProcessViaGit' :: Git.Repo -> (Git.Repo -> IO a) -> Annex a
#ifndef mingw32_HOST_OS
runsGitAnnexChildProcessViaGit' r a = pidLockFile >>= \case
Nothing -> liftIO $ a r
Just pidlock -> liftIO $ bracket (setup pidlock) cleanup (go pidlock)
where
setup pidlock = PidP.tryLock pidlock
cleanup (Just h) = dropLock h
cleanup Nothing = return ()
go _ Nothing = a r
go pidlock (Just _h) = do
v <- PidF.pidLockEnv pidlock
r' <- addGitEnv r v PidF.pidLockEnvValue
a r'
#else
runsGitAnnexChildProcessViaGit' r a = liftIO $ a r
#endif

94
Annex/Queue.hs Normal file
View file

@ -0,0 +1,94 @@
{- git-annex command queue
-
- Copyright 2011, 2012, 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
module Annex.Queue (
addCommand,
addInternalAction,
addUpdateIndex,
flush,
flushWhenFull,
size,
get,
mergeFrom,
) where
import Annex.Common
import Annex hiding (new)
import Annex.LockFile
import qualified Git.Queue
import qualified Git.UpdateIndex
{- Adds a git command to the queue. -}
addCommand :: String -> [CommandParam] -> [FilePath] -> Annex ()
addCommand command params files = do
q <- get
store =<< flushWhenFull =<<
(Git.Queue.addCommand command params files q =<< gitRepo)
addInternalAction :: Git.Queue.InternalActionRunner Annex -> [(FilePath, IO Bool)] -> Annex ()
addInternalAction runner files = do
q <- get
store =<< flushWhenFull =<<
(Git.Queue.addInternalAction runner files q =<< gitRepo)
{- Adds an update-index stream to the queue. -}
addUpdateIndex :: Git.UpdateIndex.Streamer -> Annex ()
addUpdateIndex streamer = do
q <- get
store =<< flushWhenFull =<<
(Git.Queue.addUpdateIndex streamer q =<< gitRepo)
{- Runs the queue if it is full. -}
flushWhenFull :: Git.Queue.Queue Annex -> Annex (Git.Queue.Queue Annex)
flushWhenFull q
| Git.Queue.full q = flush' q
| otherwise = return q
{- Runs (and empties) the queue. -}
flush :: Annex ()
flush = do
q <- get
unless (0 == Git.Queue.size q) $ do
store =<< flush' q
{- When there are multiple worker threads, each has its own queue.
- And of course multiple git-annex processes may be running each with its
- own queue.
-
- But, flushing two queues at the same time could lead to failures due to
- git locking files. So, only one queue is allowed to flush at a time.
-}
flush' :: Git.Queue.Queue Annex -> Annex (Git.Queue.Queue Annex)
flush' q = withExclusiveLock gitAnnexGitQueueLock $ do
showStoringStateAction
Git.Queue.flush q =<< gitRepo
{- Gets the size of the queue. -}
size :: Annex Int
size = Git.Queue.size <$> get
get :: Annex (Git.Queue.Queue Annex)
get = maybe new return =<< getState repoqueue
new :: Annex (Git.Queue.Queue Annex)
new = do
q <- Git.Queue.new . annexQueueSize <$> getGitConfig
store q
return q
store :: Git.Queue.Queue Annex -> Annex ()
store q = changeState $ \s -> s { repoqueue = Just q }
mergeFrom :: AnnexState -> Annex ()
mergeFrom st = case repoqueue st of
Nothing -> noop
Just newq -> do
q <- get
let !q' = Git.Queue.merge q newq
store =<< flushWhenFull q'

View file

@ -0,0 +1,96 @@
{- git-annex remote tracking branches
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.RemoteTrackingBranch
( RemoteTrackingBranch
, mkRemoteTrackingBranch
, fromRemoteTrackingBranch
, setRemoteTrackingBranch
, makeRemoteTrackingBranchMergeCommit
, makeRemoteTrackingBranchMergeCommit'
, getRemoteTrackingBranchImportHistory
) where
import Annex.Common
import Annex.CatFile
import qualified Annex
import Git.Types
import qualified Git.Ref
import qualified Git.Branch
import Git.History
import qualified Types.Remote as Remote
import qualified Data.Set as S
newtype RemoteTrackingBranch = RemoteTrackingBranch
{ fromRemoteTrackingBranch :: Ref }
deriving (Show, Eq)
{- Makes a remote tracking branch corresponding to a local branch.
- Note that the local branch does not need to exist yet. -}
mkRemoteTrackingBranch :: Remote -> Branch -> RemoteTrackingBranch
mkRemoteTrackingBranch remote ref = RemoteTrackingBranch $
Git.Ref.underBase ("refs/remotes/" ++ Remote.name remote) ref
{- Set remote tracking branch to point to a commit. -}
setRemoteTrackingBranch :: RemoteTrackingBranch -> Sha -> Annex ()
setRemoteTrackingBranch tb commit =
inRepo $ Git.Branch.update' (fromRemoteTrackingBranch tb) commit
{- Makes a merge commit that preserves the import history of the
- RemoteTrackingBranch, while grafting new git history into it.
-
- The second parent of the merge commit is the past history of the
- RemoteTrackingBranch as imported from a remote. When importing a
- history of trees from a remote, commits can be sythesized from
- them, but such commits won't have the same sha due to eg date differing.
- But since we know that the second parent consists entirely of such
- import commits, they can be reused when updating the
- RemoteTrackingBranch.
-}
makeRemoteTrackingBranchMergeCommit :: RemoteTrackingBranch -> Sha -> Annex Sha
makeRemoteTrackingBranchMergeCommit tb commitsha =
-- Check if the tracking branch exists.
inRepo (Git.Ref.sha (fromRemoteTrackingBranch tb)) >>= \case
Nothing -> return commitsha
Just _ -> inRepo (getHistoryToDepth 1 (fromRemoteTrackingBranch tb)) >>= \case
Nothing -> return commitsha
Just (History hc _) -> case historyCommitParents hc of
[_, importhistory] -> do
treesha <- maybe
(giveup $ "Unable to cat commit " ++ fromRef commitsha)
commitTree
<$> catCommit commitsha
makeRemoteTrackingBranchMergeCommit' commitsha importhistory treesha
-- Earlier versions of git-annex did not
-- make the merge commit, or perhaps
-- something else changed where the
-- tracking branch pointed.
_ -> return commitsha
makeRemoteTrackingBranchMergeCommit' :: Sha -> Sha -> Sha -> Annex Sha
makeRemoteTrackingBranchMergeCommit' commitsha importedhistory treesha = do
cmode <- annexCommitMode <$> Annex.getGitConfig
inRepo $ Git.Branch.commitTree
cmode
"remote tracking branch"
[commitsha, importedhistory]
treesha
{- When makeRemoteTrackingBranchMergeCommit was used, this finds the
- import history, starting from the second parent of the merge commit.
-}
getRemoteTrackingBranchImportHistory :: History HistoryCommit -> Maybe (History HistoryCommit)
getRemoteTrackingBranchImportHistory (History hc s) =
case historyCommitParents hc of
[_, importhistory] -> go importhistory (S.toList s)
_ -> Nothing
where
go _ [] = Nothing
go i (h@(History hc' _):hs)
| historyCommit hc' == i = Just h
| otherwise = go i hs

80
Annex/ReplaceFile.hs Normal file
View file

@ -0,0 +1,80 @@
{- git-annex file replacing
-
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.ReplaceFile (
replaceGitAnnexDirFile,
replaceGitDirFile,
replaceWorkTreeFile,
replaceFile,
) where
import Annex.Common
import Annex.Tmp
import Annex.Perms
import Git
import Utility.Tmp.Dir
#ifndef mingw32_HOST_OS
import Utility.Path.Max
#endif
{- replaceFile on a file located inside the gitAnnexDir. -}
replaceGitAnnexDirFile :: FilePath -> (FilePath -> Annex a) -> Annex a
replaceGitAnnexDirFile = replaceFile createAnnexDirectory
{- replaceFile on a file located inside the .git directory. -}
replaceGitDirFile :: FilePath -> (FilePath -> Annex a) -> Annex a
replaceGitDirFile = replaceFile $ \dir -> do
top <- fromRawFilePath <$> fromRepo localGitDir
liftIO $ createDirectoryUnder top dir
{- replaceFile on a worktree file. -}
replaceWorkTreeFile :: FilePath -> (FilePath -> Annex a) -> Annex a
replaceWorkTreeFile = replaceFile createWorkTreeDirectory
{- Replaces a possibly already existing file with a new version,
- atomically, by running an action.
-
- The action is passed the name of temp file, in a temp directory,
- which it can write to, and once done the temp file is moved into place
- and anything else in the temp directory is deleted.
-
- The action can throw an exception, in which case the temp directory
- will be deleted, and the existing file will be preserved.
-
- Throws an IO exception when it was unable to replace the file.
-
- The createdirectory action is only run when moving the file into place
- fails, and can create any parent directory structure needed.
-}
replaceFile :: (FilePath -> Annex ()) -> FilePath -> (FilePath -> Annex a) -> Annex a
replaceFile createdirectory file action = withOtherTmp $ \othertmpdir -> do
#ifndef mingw32_HOST_OS
-- Use part of the filename as the template for the temp
-- directory. This does not need to be unique, but it
-- makes it more clear what this temp directory is for.
filemax <- liftIO $ fileNameLengthLimit othertmpdir
let basetmp = take (filemax `div` 2) (takeFileName file)
#else
-- Windows has limits on the whole path length, so keep
-- it short.
let basetmp = "t"
#endif
withTmpDirIn othertmpdir basetmp $ \tmpdir -> do
let tmpfile = tmpdir </> basetmp
r <- action tmpfile
replaceFileFrom tmpfile file createdirectory
return r
replaceFileFrom :: FilePath -> FilePath -> (FilePath -> Annex ()) -> Annex ()
replaceFileFrom src dest createdirectory = go `catchIO` fallback
where
go = liftIO $ moveFile src dest
fallback _ = do
createdirectory (parentDir dest)
go

108
Annex/SpecialRemote.hs Normal file
View file

@ -0,0 +1,108 @@
{- git-annex special remote configuration
-
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.SpecialRemote (
module Annex.SpecialRemote,
module Annex.SpecialRemote.Config
) where
import Annex.Common
import Annex.SpecialRemote.Config
import Types.Remote (RemoteConfig, SetupStage(..), typename, setup)
import Types.GitConfig
import Types.ProposedAccepted
import Config
import Remote.List
import Logs.Remote
import Logs.Trust
import qualified Types.Remote as Remote
import Git.Types (RemoteName)
import qualified Data.Map as M
import Data.Ord
{- See if there's an existing special remote with this name.
-
- Prefer remotes that are not dead when a name appears multiple times. -}
findExisting :: RemoteName -> Annex (Maybe (UUID, RemoteConfig, Maybe (ConfigFrom UUID)))
findExisting name = do
t <- trustMap
headMaybe
. sortBy (comparing $ \(u, _, _) -> Down $ M.lookup u t)
. findByRemoteConfig (\c -> lookupName c == Just name)
<$> Logs.Remote.remoteConfigMap
newConfig
:: RemoteName
-> Maybe (Sameas UUID)
-> RemoteConfig
-- ^ configuration provided by the user
-> M.Map UUID RemoteConfig
-- ^ configuration of other special remotes, to inherit from
-- when sameas is used
-> RemoteConfig
newConfig name sameas fromuser m = case sameas of
Nothing -> M.insert nameField (Proposed name) fromuser
Just (Sameas u) -> addSameasInherited m $ M.fromList
[ (sameasNameField, Proposed name)
, (sameasUUIDField, Proposed (fromUUID u))
] `M.union` fromuser
specialRemoteMap :: Annex (M.Map UUID RemoteName)
specialRemoteMap = do
m <- Logs.Remote.remoteConfigMap
return $ M.fromList $ mapMaybe go (M.toList m)
where
go (u, c) = case lookupName c of
Nothing -> Nothing
Just n -> Just (u, n)
{- find the remote type -}
findType :: RemoteConfig -> Either String RemoteType
findType config = maybe unspecified (specified . fromProposedAccepted) $
M.lookup typeField config
where
unspecified = Left "Specify the type of remote with type="
specified s = case filter (findtype s) remoteTypes of
[] -> Left $ "Unknown remote type " ++ s
++ " (pick from: "
++ intercalate " " (map typename remoteTypes)
++ ")"
(t:_) -> Right t
findtype s i = typename i == s
autoEnable :: Annex ()
autoEnable = do
remotemap <- M.filter configured <$> remoteConfigMap
enabled <- getenabledremotes
forM_ (M.toList remotemap) $ \(cu, c) -> unless (cu `M.member` enabled) $ do
let u = case findSameasUUID c of
Just (Sameas u') -> u'
Nothing -> cu
case (lookupName c, findType c) of
(Just name, Right t) -> whenM (canenable u) $ do
showSideAction $ "Auto enabling special remote " ++ name
dummycfg <- liftIO dummyRemoteGitConfig
tryNonAsync (setup t (Enable c) (Just u) Nothing c dummycfg) >>= \case
Left e -> warning (show e)
Right (_c, _u) ->
when (cu /= u) $
setConfig (remoteAnnexConfig c "config-uuid") (fromUUID cu)
_ -> return ()
where
configured rc = fromMaybe False $
trueFalseParser' . fromProposedAccepted
=<< M.lookup autoEnableField rc
canenable u = (/= DeadTrusted) <$> lookupTrust u
getenabledremotes = M.fromList
. map (\r -> (getcu r, r))
<$> remoteList
getcu r = fromMaybe
(Remote.uuid r)
(remoteAnnexConfigUUID (Remote.gitconfig r))

View file

@ -0,0 +1,280 @@
{- git-annex special remote configuration
-
- Copyright 2019-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Annex.SpecialRemote.Config where
import Common
import Types.Remote (configParser)
import Types
import Types.UUID
import Types.ProposedAccepted
import Types.RemoteConfig
import Types.GitConfig
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Typeable
import GHC.Stack
newtype Sameas t = Sameas t
deriving (Show)
newtype ConfigFrom t = ConfigFrom t
deriving (Show)
{- The name of a configured remote is stored in its config using this key. -}
nameField :: RemoteConfigField
nameField = Accepted "name"
{- The name of a sameas remote is stored using this key instead.
- This prevents old versions of git-annex getting confused. -}
sameasNameField :: RemoteConfigField
sameasNameField = Accepted "sameas-name"
lookupName :: RemoteConfig -> Maybe String
lookupName c = fmap fromProposedAccepted $
M.lookup nameField c <|> M.lookup sameasNameField c
instance RemoteNameable RemoteConfig where
getRemoteName c = fromMaybe "" (lookupName c)
{- The uuid that a sameas remote is the same as is stored in this key. -}
sameasUUIDField :: RemoteConfigField
sameasUUIDField = Accepted "sameas-uuid"
{- The type of a remote is stored in its config using this key. -}
typeField :: RemoteConfigField
typeField = Accepted "type"
autoEnableField :: RemoteConfigField
autoEnableField = Accepted "autoenable"
encryptionField :: RemoteConfigField
encryptionField = Accepted "encryption"
macField :: RemoteConfigField
macField = Accepted "mac"
cipherField :: RemoteConfigField
cipherField = Accepted "cipher"
cipherkeysField :: RemoteConfigField
cipherkeysField = Accepted "cipherkeys"
pubkeysField :: RemoteConfigField
pubkeysField = Accepted "pubkeys"
chunkField :: RemoteConfigField
chunkField = Accepted "chunk"
chunksizeField :: RemoteConfigField
chunksizeField = Accepted "chunksize"
embedCredsField :: RemoteConfigField
embedCredsField = Accepted "embedcreds"
preferreddirField :: RemoteConfigField
preferreddirField = Accepted "preferreddir"
exportTreeField :: RemoteConfigField
exportTreeField = Accepted "exporttree"
importTreeField :: RemoteConfigField
importTreeField = Accepted "importtree"
exportTree :: ParsedRemoteConfig -> Bool
exportTree = fromMaybe False . getRemoteConfigValue exportTreeField
importTree :: ParsedRemoteConfig -> Bool
importTree = fromMaybe False . getRemoteConfigValue importTreeField
{- Parsers for fields that are common to all special remotes. -}
commonFieldParsers :: [RemoteConfigFieldParser]
commonFieldParsers =
[ optionalStringParser nameField
(FieldDesc "name for the special remote")
, optionalStringParser sameasNameField HiddenField
, optionalStringParser sameasUUIDField HiddenField
, optionalStringParser typeField
(FieldDesc "type of special remote")
, trueFalseParser autoEnableField (Just False)
(FieldDesc "automatically enable special remote")
, yesNoParser exportTreeField (Just False)
(FieldDesc "export trees of files to this remote")
, yesNoParser importTreeField (Just False)
(FieldDesc "import trees of files from this remote")
, optionalStringParser preferreddirField
(FieldDesc "directory whose content is preferred")
]
{- A remote with sameas-uuid set will inherit these values from the config
- of that uuid. These values cannot be overridden in the remote's config. -}
sameasInherits :: S.Set RemoteConfigField
sameasInherits = S.fromList
-- encryption configuration is necessarily the same for two
-- remotes that access the same data store
[ encryptionField
, macField
, cipherField
, cipherkeysField
, pubkeysField
-- legacy chunking was either enabled or not, so has to be the same
-- across configs for remotes that access the same data
-- (new-style chunking does not have that limitation)
, chunksizeField
]
{- Each RemoteConfig that has a sameas-uuid inherits some fields
- from it. Such fields can only be set by inheritance; the RemoteConfig
- cannot provide values from them. -}
addSameasInherited :: M.Map UUID RemoteConfig -> RemoteConfig -> RemoteConfig
addSameasInherited m c = case findSameasUUID c of
Nothing -> c
Just (Sameas sameasuuid) -> case M.lookup sameasuuid m of
Nothing -> c
Just parentc ->
M.withoutKeys c sameasInherits
`M.union`
M.restrictKeys parentc sameasInherits
findSameasUUID :: RemoteConfig -> Maybe (Sameas UUID)
findSameasUUID c = Sameas . toUUID . fromProposedAccepted
<$> M.lookup sameasUUIDField c
{- Remove any fields inherited from a sameas-uuid. When storing a
- RemoteConfig, those fields don't get stored, since they were already
- inherited. -}
removeSameasInherited :: RemoteConfig -> RemoteConfig
removeSameasInherited c = case M.lookup sameasUUIDField c of
Nothing -> c
Just _ -> M.withoutKeys c sameasInherits
{- Finds remote uuids with matching RemoteConfig. -}
findByRemoteConfig :: (RemoteConfig -> Bool) -> M.Map UUID RemoteConfig -> [(UUID, RemoteConfig, Maybe (ConfigFrom UUID))]
findByRemoteConfig matching = map sameasuuid . filter (matching . snd) . M.toList
where
sameasuuid (u, c) = case M.lookup sameasUUIDField c of
Nothing -> (u, c, Nothing)
Just u' -> (toUUID (fromProposedAccepted u'), c, Just (ConfigFrom u))
{- Extracts a value from ParsedRemoteConfig. -}
getRemoteConfigValue :: HasCallStack => Typeable v => RemoteConfigField -> ParsedRemoteConfig -> Maybe v
getRemoteConfigValue f (ParsedRemoteConfig m _) = case M.lookup f m of
Just (RemoteConfigValue v) -> case cast v of
Just v' -> Just v'
Nothing -> error $ unwords
[ "getRemoteConfigValue"
, fromProposedAccepted f
, "found value of unexpected type"
, show (typeOf v) ++ "."
, "This is a bug in git-annex!"
]
Nothing -> Nothing
{- Gets all fields that remoteConfigRestPassthrough matched. -}
getRemoteConfigPassedThrough :: ParsedRemoteConfig -> M.Map RemoteConfigField String
getRemoteConfigPassedThrough (ParsedRemoteConfig m _) =
flip M.mapMaybe m $ \(RemoteConfigValue v) ->
case cast v of
Just (PassedThrough s) -> Just s
Nothing -> Nothing
newtype PassedThrough = PassedThrough String
parsedRemoteConfig :: RemoteType -> RemoteConfig -> Annex ParsedRemoteConfig
parsedRemoteConfig t c = either (const emptycfg) id . parseRemoteConfig c
<$> configParser t c
where
emptycfg = ParsedRemoteConfig mempty c
parseRemoteConfig :: RemoteConfig -> RemoteConfigParser -> Either String ParsedRemoteConfig
parseRemoteConfig c rpc =
go [] c (remoteConfigFieldParsers rpc ++ commonFieldParsers)
where
go l c' [] =
let (passover, leftovers) = partition
(maybe (const False) fst (remoteConfigRestPassthrough rpc) . fst)
(M.toList c')
leftovers' = filter (notaccepted . fst) leftovers
in if not (null leftovers')
then Left $ "Unexpected parameters: " ++
unwords (map (fromProposedAccepted . fst) leftovers')
else
let m = M.fromList $
l ++ map (uncurry passthrough) passover
in Right (ParsedRemoteConfig m c)
go l c' (p:rest) = do
let f = parserForField p
(valueParser p) (M.lookup f c) c >>= \case
Just v -> go ((f,v):l) (M.delete f c') rest
Nothing -> go l (M.delete f c') rest
passthrough f v = (f, RemoteConfigValue (PassedThrough (fromProposedAccepted v)))
notaccepted (Proposed _) = True
notaccepted (Accepted _) = False
optionalStringParser :: RemoteConfigField -> FieldDesc -> RemoteConfigFieldParser
optionalStringParser f fielddesc = RemoteConfigFieldParser
{ parserForField = f
, valueParser = p
, fieldDesc = fielddesc
, valueDesc = Nothing
}
where
p (Just v) _c = Right (Just (RemoteConfigValue (fromProposedAccepted v)))
p Nothing _c = Right Nothing
yesNoParser :: RemoteConfigField -> Maybe Bool -> FieldDesc -> RemoteConfigFieldParser
yesNoParser f mdef fd = genParser yesno f mdef fd
(Just (ValueDesc "yes or no"))
where
yesno "yes" = Just True
yesno "no" = Just False
yesno _ = Nothing
trueFalseParser :: RemoteConfigField -> Maybe Bool -> FieldDesc -> RemoteConfigFieldParser
trueFalseParser f mdef fd = genParser trueFalseParser' f mdef fd
(Just (ValueDesc "true or false"))
-- Not using Git.Config.isTrueFalse because git supports
-- a lot of other values for true and false in its configs,
-- and this is not a git config and we want to avoid that mess.
trueFalseParser' :: String -> Maybe Bool
trueFalseParser' "true" = Just True
trueFalseParser' "false" = Just False
trueFalseParser' _ = Nothing
genParser
:: Typeable t
=> (String -> Maybe t)
-> RemoteConfigField
-> Maybe t -- ^ default if not configured
-> FieldDesc
-> Maybe ValueDesc
-> RemoteConfigFieldParser
genParser parse f mdef fielddesc valuedesc = RemoteConfigFieldParser
{ parserForField = f
, valueParser = p
, fieldDesc = fielddesc
, valueDesc = valuedesc
}
where
p Nothing _c = Right (fmap RemoteConfigValue mdef)
p (Just v) _c = case parse (fromProposedAccepted v) of
Just b -> Right (Just (RemoteConfigValue b))
Nothing -> case v of
Accepted _ -> Right (fmap RemoteConfigValue mdef)
Proposed _ -> Left $
"Bad value for " ++ fromProposedAccepted f ++
case valuedesc of
Just (ValueDesc vd) ->
" (expected " ++ vd ++ ")"
Nothing -> ""

475
Annex/Ssh.hs Normal file
View file

@ -0,0 +1,475 @@
{- git-annex ssh interface, with connection caching
-
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Ssh (
ConsumeStdin(..),
SshCommand,
sshCommand,
sshOptions,
sshCacheDir,
sshReadPort,
forceSshCleanup,
sshOptionsEnv,
sshOptionsTo,
inRepoWithSshOptionsTo,
runSshOptions,
sshAskPassEnv,
runSshAskPass
) where
import Annex.Common
import Annex.LockFile
import qualified BuildInfo
import qualified Annex
import qualified Git
import qualified Git.Url
import Config
import Annex.Path
import Utility.Env
import Utility.Hash
import Types.CleanupActions
import Annex.Concurrent.Utility
import Types.Concurrency
import Git.Env
import Git.Ssh
import Annex.Perms
#ifndef mingw32_HOST_OS
import Annex.LockPool
#endif
import Control.Concurrent.STM
import qualified Data.ByteString as S
{- Some ssh commands are fed stdin on a pipe and so should be allowed to
- consume it. But ssh commands that are not piped stdin should generally
- not be allowed to consume the process's stdin. -}
data ConsumeStdin = ConsumeStdin | NoConsumeStdin
{- Generates a command to ssh to a given host (or user@host) on a given
- port. This includes connection caching parameters, and any ssh-options.
- If GIT_SSH or GIT_SSH_COMMAND is enabled, they are used instead. -}
sshCommand :: ConsumeStdin -> (SshHost, Maybe SshPort) -> RemoteGitConfig -> SshCommand -> Annex (FilePath, [CommandParam])
sshCommand cs (host, port) gc remotecmd = ifM (liftIO safe_GIT_SSH)
( maybe go return
=<< liftIO (gitSsh' host port remotecmd (consumeStdinParams cs))
, go
)
where
go = do
ps <- sshOptions cs (host, port) gc []
return ("ssh", Param (fromSshHost host):ps++[Param remotecmd])
{- Generates parameters to ssh to a given host (or user@host) on a given
- port. This includes connection caching parameters, and any
- ssh-options. Note that the host to ssh to and the command to run
- are not included in the returned options. -}
sshOptions :: ConsumeStdin -> (SshHost, Maybe Integer) -> RemoteGitConfig -> [CommandParam] -> Annex [CommandParam]
sshOptions cs (host, port) gc opts = go =<< sshCachingInfo (host, port)
where
go (Nothing, params) = return $ mkparams cs params
go (Just socketfile, params) = do
prepSocket socketfile host (mkparams NoConsumeStdin params)
return $ mkparams cs params
mkparams cs' ps = concat
[ ps
, map Param (remoteAnnexSshOptions gc)
, opts
, portParams port
, consumeStdinParams cs'
, [Param "-T"]
]
{- Due to passing -n to GIT_SSH and GIT_SSH_COMMAND, some settings
- of those that expect exactly git's parameters will break. So only
- use those if the user set GIT_ANNEX_USE_GIT_SSH to say it's ok. -}
safe_GIT_SSH :: IO Bool
safe_GIT_SSH = (== Just "1") <$> getEnv "GIT_ANNEX_USE_GIT_SSH"
consumeStdinParams :: ConsumeStdin -> [CommandParam]
consumeStdinParams ConsumeStdin = []
consumeStdinParams NoConsumeStdin = [Param "-n"]
{- Returns a filename to use for a ssh connection caching socket, and
- parameters to enable ssh connection caching. -}
sshCachingInfo :: (SshHost, Maybe Integer) -> Annex (Maybe FilePath, [CommandParam])
sshCachingInfo (host, port) = go =<< sshCacheDir'
where
go (Right dir) =
liftIO (bestSocketPath $ dir </> hostport2socket host port) >>= return . \case
Nothing -> (Nothing, [])
Just socketfile -> (Just socketfile, sshConnectionCachingParams socketfile)
-- No connection caching with concurrency is not a good
-- combination, so warn the user.
go (Left whynocaching) = do
getConcurrency >>= \case
NonConcurrent -> return ()
Concurrent {} -> warnnocaching whynocaching
ConcurrentPerCpu -> warnnocaching whynocaching
return (Nothing, [])
warnnocaching whynocaching = do
warning nocachingwarning
warning whynocaching
nocachingwarning = unwords
[ "You have enabled concurrency, but git-annex is not able"
, "to use ssh connection caching. This may result in"
, "multiple ssh processes prompting for passwords at the"
, "same time."
]
{- Given an absolute path to use for a socket file,
- returns whichever is shorter of that or the relative path to the same
- file.
-
- If no path can be constructed that is a valid socket, returns Nothing. -}
bestSocketPath :: FilePath -> IO (Maybe FilePath)
bestSocketPath abssocketfile = do
relsocketfile <- liftIO $ relPathCwdToFile abssocketfile
let socketfile = if length abssocketfile <= length relsocketfile
then abssocketfile
else relsocketfile
return $ if valid_unix_socket_path (socketfile ++ sshgarbage)
then Just socketfile
else Nothing
where
-- ssh appends a 16 char extension to the socket when setting it
-- up, which needs to be taken into account when checking
-- that a valid socket was constructed.
sshgarbage = replicate (1+16) 'X'
sshConnectionCachingParams :: FilePath -> [CommandParam]
sshConnectionCachingParams socketfile =
[ Param "-S", Param socketfile
, Param "-o", Param "ControlMaster=auto"
, Param "-o", Param "ControlPersist=yes"
]
sshSocketDirEnv :: String
sshSocketDirEnv = "GIT_ANNEX_SSH_SOCKET_DIR"
{- Returns the directory where ssh connection caching sockets can be
- stored.
-
- The directory will be created if it does not exist.
-}
sshCacheDir :: Annex (Maybe FilePath)
sshCacheDir = eitherToMaybe <$> sshCacheDir'
sshCacheDir' :: Annex (Either String FilePath)
sshCacheDir' =
ifM (fromMaybe BuildInfo.sshconnectioncaching . annexSshCaching <$> Annex.getGitConfig)
( ifM crippledFileSystem
( gettmpdir >>= \case
Nothing ->
return (Left crippledfswarning)
Just tmpdir ->
liftIO $ catchMsgIO $
usetmpdir tmpdir
, do
d <- fromRepo gitAnnexSshDir
createAnnexDirectory d
return (Right d)
)
, return (Left "annex.sshcaching is not set to true")
)
where
gettmpdir = liftIO $ getEnv sshSocketDirEnv
usetmpdir tmpdir = do
let socktmp = tmpdir </> "ssh"
createDirectoryIfMissing True socktmp
return socktmp
crippledfswarning = unwords
[ "This repository is on a crippled filesystem, so unix named"
, "pipes probably don't work, and ssh connection caching"
, "relies on those. One workaround is to set"
, sshSocketDirEnv
, "to point to a directory on a non-crippled filesystem."
]
portParams :: Maybe Integer -> [CommandParam]
portParams Nothing = []
portParams (Just port) = [Param "-p", Param $ show port]
{- Prepare to use a socket file for ssh connection caching.
-
- When concurrency is enabled, this blocks until a ssh connection
- has been made to the host. So, any password prompting by ssh will
- happen in this call, and only one ssh process will prompt at a time.
-
- Locks the socket lock file to prevent other git-annex processes from
- stopping the ssh multiplexer on this socket.
-}
prepSocket :: FilePath -> SshHost -> [CommandParam] -> Annex ()
prepSocket socketfile sshhost sshparams = do
-- There could be stale ssh connections hanging around
-- from a previous git-annex run that was interrupted.
-- This must run only once, before we have made any ssh connection,
-- and any other prepSocket calls must block while it's run.
tv <- Annex.getState Annex.sshstalecleaned
join $ liftIO $ atomically $ do
cleaned <- takeTMVar tv
if cleaned
then do
putTMVar tv cleaned
return noop
else return $ do
sshCleanup
liftIO $ atomically $ putTMVar tv True
-- Cleanup at shutdown.
Annex.addCleanup SshCachingCleanup sshCleanup
let socketlock = socket2lock socketfile
getConcurrency >>= \case
NonConcurrent -> return ()
Concurrent {} -> makeconnection socketlock
ConcurrentPerCpu -> makeconnection socketlock
lockFileCached socketlock
where
-- When the LockCache already has the socketlock in it,
-- the connection has already been started. Otherwise,
-- get the connection started now.
makeconnection socketlock = debugLocks $
whenM (isNothing <$> fromLockCache socketlock) $
-- See if ssh can connect in batch mode,
-- if so there's no need to block for a password
-- prompt.
unlessM (tryssh ["-o", "BatchMode=true"]) $
-- ssh needs to prompt (probably)
-- If the user enters the wrong password,
-- ssh will tell them, so we can ignore
-- failure.
void $ prompt $ tryssh []
-- Try to ssh to the host quietly. Returns True if ssh apparently
-- connected to the host successfully. If ssh failed to connect,
-- returns False.
-- Even if ssh is forced to run some specific command, this will
-- return True.
-- (Except there's an unlikely false positive where a forced
-- ssh command exits 255.)
tryssh extraps = liftIO $ withNullHandle $ \nullh -> do
let p = (proc "ssh" $ concat
[ extraps
, toCommand sshparams
, [fromSshHost sshhost, "true"]
])
{ std_out = UseHandle nullh
, std_err = UseHandle nullh
}
withCreateProcess p $ \_ _ _ pid -> do
exitcode <- waitForProcess pid
return $ case exitcode of
ExitFailure 255 -> False
_ -> True
{- Find ssh socket files.
-
- The check that the lock file exists makes only socket files
- that were set up by prepSocket be found. On some NFS systems,
- a deleted socket file may linger for a while under another filename;
- and this check makes such files be skipped since the corresponding lock
- file won't exist.
-}
enumSocketFiles :: Annex [FilePath]
enumSocketFiles = liftIO . go =<< sshCacheDir
where
go Nothing = return []
go (Just dir) = filterM (doesFileExist . socket2lock)
=<< filter (not . isLock)
<$> catchDefaultIO [] (dirContents dir)
{- Stop any unused ssh connection caching processes. -}
sshCleanup :: Annex ()
sshCleanup = mapM_ cleanup =<< enumSocketFiles
where
cleanup socketfile = do
#ifndef mingw32_HOST_OS
-- Drop any shared lock we have, and take an
-- exclusive lock, without blocking. If the lock
-- succeeds, nothing is using this ssh, and it can
-- be stopped.
--
-- After ssh is stopped cannot remove the lock file;
-- other processes may be waiting on our exclusive
-- lock to use it.
let lockfile = socket2lock socketfile
unlockFile lockfile
mode <- annexFileMode
noUmask mode (tryLockExclusive (Just mode) lockfile) >>= \case
Nothing -> noop
Just lck -> do
forceStopSsh socketfile
liftIO $ dropLock lck
#else
forceStopSsh socketfile
#endif
{- Stop all ssh connection caching processes, even when they're in use. -}
forceSshCleanup :: Annex ()
forceSshCleanup = mapM_ forceStopSsh =<< enumSocketFiles
forceStopSsh :: FilePath -> Annex ()
forceStopSsh socketfile = withNullHandle $ \nullh -> do
let (dir, base) = splitFileName socketfile
let p = (proc "ssh" $ toCommand $
[ Param "-O", Param "stop" ] ++
sshConnectionCachingParams base ++
[Param "localhost"])
{ cwd = Just dir
-- "ssh -O stop" is noisy on stderr even with -q
, std_out = UseHandle nullh
, std_err = UseHandle nullh
}
void $ liftIO $ catchMaybeIO $ withCreateProcess p $ \_ _ _ pid ->
forceSuccessProcess p pid
liftIO $ nukeFile socketfile
{- This needs to be as short as possible, due to limitations on the length
- of the path to a socket file. At the same time, it needs to be unique
- for each host.
-}
hostport2socket :: SshHost -> Maybe Integer -> FilePath
hostport2socket host Nothing = hostport2socket' $ fromSshHost host
hostport2socket host (Just port) = hostport2socket' $
fromSshHost host ++ "!" ++ show port
hostport2socket' :: String -> FilePath
hostport2socket' s
| length s > lengthofmd5s = show $ md5 $ encodeBL s
| otherwise = s
where
lengthofmd5s = 32
socket2lock :: FilePath -> FilePath
socket2lock socket = socket ++ lockExt
isLock :: FilePath -> Bool
isLock f = lockExt `isSuffixOf` f
lockExt :: String
lockExt = ".lock"
{- This is the size of the sun_path component of sockaddr_un, which
- is the limit to the total length of the filename of a unix socket.
-
- On Linux, this is 108. On OSX, 104. TODO: Probe
-}
sizeof_sockaddr_un_sun_path :: Int
sizeof_sockaddr_un_sun_path = 100
{- Note that this looks at the true length of the path in bytes, as it will
- appear on disk. -}
valid_unix_socket_path :: FilePath -> Bool
valid_unix_socket_path f = S.length (encodeBS f) < sizeof_sockaddr_un_sun_path
{- Parses the SSH port, and returns the other OpenSSH options. If
- several ports are found, the last one takes precedence. -}
sshReadPort :: [String] -> (Maybe Integer, [String])
sshReadPort params = (port, reverse args)
where
(port,args) = aux (Nothing, []) params
aux (p,ps) [] = (p,ps)
aux (_,ps) ("-p":p:rest) = aux (readPort p, ps) rest
aux (p,ps) (q:rest) | "-p" `isPrefixOf` q = aux (readPort $ drop 2 q, ps) rest
| otherwise = aux (p,q:ps) rest
readPort p = fmap fst $ listToMaybe $ reads p
{- When this env var is set, git-annex runs ssh with the specified
- options. (The options are separated by newlines.)
-
- This is a workaround for GIT_SSH not being able to contain
- additional parameters to pass to ssh. (GIT_SSH_COMMAND can,
- but is not supported by older versions of git.) -}
sshOptionsEnv :: String
sshOptionsEnv = "GIT_ANNEX_SSHOPTION"
toSshOptionsEnv :: [CommandParam] -> String
toSshOptionsEnv = unlines . toCommand
fromSshOptionsEnv :: String -> [CommandParam]
fromSshOptionsEnv = map Param . lines
{- Enables ssh caching for git push/pull to a particular
- remote git repo. (Can safely be used on non-ssh remotes.)
-
- Also propigates any configured ssh-options.
-
- Like inRepo, the action is run with the local git repo.
- But here it's a modified version, with gitEnv to set GIT_SSH=git-annex,
- and sshOptionsEnv set so that git-annex will know what socket
- file to use. -}
inRepoWithSshOptionsTo :: Git.Repo -> RemoteGitConfig -> (Git.Repo -> IO a) -> Annex a
inRepoWithSshOptionsTo remote gc a =
liftIO . a =<< sshOptionsTo remote gc =<< gitRepo
{- To make any git commands be run with ssh caching enabled,
- and configured ssh-options alters the local Git.Repo's gitEnv
- to set GIT_SSH=git-annex, and set sshOptionsEnv when running git
- commands.
-
- If GIT_SSH or GIT_SSH_COMMAND are enabled, this has no effect. -}
sshOptionsTo :: Git.Repo -> RemoteGitConfig -> Git.Repo -> Annex Git.Repo
sshOptionsTo remote gc localr
| not (Git.repoIsUrl remote) || Git.repoIsHttp remote = unchanged
| otherwise = case Git.Url.hostuser remote of
Nothing -> unchanged
Just host -> ifM (liftIO $ safe_GIT_SSH <&&> gitSshEnvSet)
( unchanged
, do
let port = Git.Url.port remote
let sshhost = either error id (mkSshHost host)
(msockfile, cacheparams) <- sshCachingInfo (sshhost, port)
case msockfile of
Nothing -> use []
Just sockfile -> do
prepSocket sockfile sshhost $ concat
[ cacheparams
, map Param (remoteAnnexSshOptions gc)
, portParams port
, consumeStdinParams NoConsumeStdin
, [Param "-T"]
]
use cacheparams
)
where
unchanged = return localr
use opts = do
let sshopts = concat
[ opts
, map Param (remoteAnnexSshOptions gc)
]
if null sshopts
then unchanged
else do
command <- liftIO programPath
liftIO $ do
localr' <- addGitEnv localr sshOptionsEnv
(toSshOptionsEnv sshopts)
addGitEnv localr' gitSshEnv command
runSshOptions :: [String] -> String -> IO ()
runSshOptions args s = do
let args' = toCommand (fromSshOptionsEnv s) ++ args
let p = proc "ssh" args'
exitcode <- withCreateProcess p $ \_ _ _ pid -> waitForProcess pid
exitWith exitcode
{- When this env var is set, git-annex is being used as a ssh-askpass
- program, and should read the password from the specified location,
- and output it for ssh to read. -}
sshAskPassEnv :: String
sshAskPassEnv = "GIT_ANNEX_SSHASKPASS"
runSshAskPass :: FilePath -> IO ()
runSshAskPass passfile = putStrLn =<< readFile passfile

68
Annex/TaggedPush.hs Normal file
View file

@ -0,0 +1,68 @@
{- git-annex tagged pushes
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.TaggedPush where
import Annex.Common
import qualified Remote
import qualified Annex.Branch
import qualified Git
import qualified Git.Ref
import qualified Git.Command
import qualified Git.Branch
import Utility.Base64
import qualified Data.ByteString as S
{- Converts a git branch into a branch that is tagged with a UUID, typically
- the UUID of the repo that will be pushing it, and possibly with other
- information.
-
- Pushing to branches on the remote that have our uuid in them is ugly,
- but it reserves those branches for pushing by us, and so our pushes will
- never conflict with other pushes.
-
- To avoid cluttering up the branch display, the branch is put under
- refs/synced/, rather than the usual refs/remotes/
-
- Both UUIDs and Base64 encoded data are always legal to be used in git
- refs, per git-check-ref-format.
-}
toTaggedBranch :: UUID -> Maybe String -> Git.Branch -> Git.Ref
toTaggedBranch u info b = Git.Ref $ S.intercalate "/" $ catMaybes
[ Just "refs/synced"
, Just $ fromUUID u
, toB64' . encodeBS <$> info
, Just $ Git.fromRef' $ Git.Ref.base b
]
fromTaggedBranch :: Git.Ref -> Maybe (UUID, Maybe String)
fromTaggedBranch b = case splitc '/' $ Git.fromRef b of
("refs":"synced":u:info:_base) ->
Just (toUUID u, fromB64Maybe info)
("refs":"synced":u:_base) ->
Just (toUUID u, Nothing)
_ -> Nothing
listTaggedBranches :: Annex [(Git.Sha, Git.Ref)]
listTaggedBranches = filter (isJust . fromTaggedBranch . snd)
<$> inRepo Git.Ref.list
taggedPush :: UUID -> Maybe String -> Git.Ref -> Remote -> Git.Repo -> IO Bool
taggedPush u info branch remote = Git.Command.runBool
[ Param "push"
, Param $ Remote.name remote
{- Using forcePush here is safe because we "own" the tagged branch
- we're pushing; it has no other writers. Ensures it is pushed
- even if it has been rewritten by a transition. -}
, Param $ Git.Branch.forcePush $ refspec Annex.Branch.name
, Param $ refspec branch
]
where
refspec b = Git.fromRef b ++ ":" ++ Git.fromRef (toTaggedBranch u info b)

71
Annex/Tmp.hs Normal file
View file

@ -0,0 +1,71 @@
{- git-annex tmp files
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Tmp where
import Annex.Common
import qualified Annex
import Annex.LockFile
import Annex.Perms
import Types.CleanupActions
import Data.Time.Clock.POSIX
-- | For creation of tmp files, other than for key's contents.
--
-- The action should normally clean up whatever files it writes to the temp
-- directory that is passed to it. However, once the action is done,
-- any files left in that directory may be cleaned up by another process at
-- any time.
withOtherTmp :: (FilePath -> Annex a) -> Annex a
withOtherTmp a = do
Annex.addCleanup OtherTmpCleanup cleanupOtherTmp
tmpdir <- fromRepo gitAnnexTmpOtherDir
tmplck <- fromRepo gitAnnexTmpOtherLock
withSharedLock (const tmplck) $ do
void $ createAnnexDirectory tmpdir
a tmpdir
-- | This uses an alternate temp directory. The action should normally
-- clean up whatever files it writes there, but if it leaves files
-- there (perhaps due to being interrupted), the files will be eventually
-- cleaned up by another git-annex process (after they're a week old).
--
-- Unlike withOtherTmp, this does not rely on locking working.
-- Its main use is in situations where the state of lockfile is not
-- determined yet, eg during initialization.
withEventuallyCleanedOtherTmp :: (FilePath -> Annex a) -> Annex a
withEventuallyCleanedOtherTmp = bracket setup cleanup
where
setup = do
tmpdir <- fromRepo gitAnnexTmpOtherDirOld
void $ createAnnexDirectory tmpdir
return tmpdir
cleanup = liftIO . void . tryIO . removeDirectory
-- | Cleans up any tmp files that were left by a previous
-- git-annex process that got interrupted or failed to clean up after
-- itself for some other reason.
--
-- Does not do anything if withOtherTmp is running.
cleanupOtherTmp :: Annex ()
cleanupOtherTmp = do
tmplck <- fromRepo gitAnnexTmpOtherLock
void $ tryIO $ tryExclusiveLock (const tmplck) $ do
tmpdir <- fromRepo gitAnnexTmpOtherDir
void $ liftIO $ tryIO $ removeDirectoryRecursive tmpdir
oldtmp <- fromRepo gitAnnexTmpOtherDirOld
liftIO $ mapM_ cleanold =<< dirContentsRecursive oldtmp
liftIO $ void $ tryIO $ removeDirectory oldtmp -- when empty
where
cleanold f = do
now <- liftIO getPOSIXTime
let oldenough = now - (60 * 60 * 24 * 7)
catchMaybeIO (modificationTime <$> getSymbolicLinkStatus f) >>= \case
Just mtime | realToFrac mtime <= oldenough ->
void $ tryIO $ nukeFile f
_ -> return ()

314
Annex/Transfer.hs Normal file
View file

@ -0,0 +1,314 @@
{- git-annex transfers
-
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP, BangPatterns #-}
module Annex.Transfer (
module X,
upload,
alwaysUpload,
download,
runTransfer,
alwaysRunTransfer,
noRetry,
stdRetry,
pickRemote,
) where
import Annex.Common
import qualified Annex
import Logs.Transfer as X
import Types.Transfer as X
import Annex.Notification as X
import Annex.Perms
import Utility.Metered
import Utility.ThreadScheduler
import Annex.LockPool
import Types.Key
import qualified Types.Remote as Remote
import Types.Concurrency
import Annex.Concurrent.Utility
import Types.WorkerPool
import Annex.WorkerPool
import Backend (isCryptographicallySecure)
import Control.Concurrent
import qualified Data.Map.Strict as M
import Data.Ord
upload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
upload u key f d a _witness = guardHaveUUID u $
runTransfer (Transfer Upload u (fromKey id key)) f d a
alwaysUpload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
alwaysUpload u key f d a _witness = guardHaveUUID u $
alwaysRunTransfer (Transfer Upload u (fromKey id key)) f d a
download :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
download u key f d a _witness = guardHaveUUID u $
runTransfer (Transfer Download u (fromKey id key)) f d a
guardHaveUUID :: Observable v => UUID -> Annex v -> Annex v
guardHaveUUID u a
| u == NoUUID = return observeFailure
| otherwise = a
{- Runs a transfer action. Creates and locks the lock file while the
- action is running, and stores info in the transfer information
- file.
-
- If the transfer action returns False, the transfer info is
- left in the failedTransferDir.
-
- If the transfer is already in progress, returns False.
-
- An upload can be run from a read-only filesystem, and in this case
- no transfer information or lock file is used.
-}
runTransfer :: Observable v => Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
runTransfer = runTransfer' False
{- Like runTransfer, but ignores any existing transfer lock file for the
- transfer, allowing re-running a transfer that is already in progress.
-}
alwaysRunTransfer :: Observable v => Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
alwaysRunTransfer = runTransfer' True
runTransfer' :: Observable v => Bool -> Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
runTransfer' ignorelock t afile retrydecider transferaction = enteringStage TransferStage $ debugLocks $ checkSecureHashes t $ do
info <- liftIO $ startTransferInfo afile
(meter, tfile, createtfile, metervar) <- mkProgressUpdater t info
mode <- annexFileMode
(lck, inprogress) <- prep tfile createtfile mode
if inprogress && not ignorelock
then do
showNote "transfer already in progress, or unable to take transfer lock"
return observeFailure
else do
v <- retry 0 info metervar (transferaction meter)
liftIO $ cleanup tfile lck
if observeBool v
then removeFailedTransfer t
else recordFailedTransfer t info
return v
where
prep :: FilePath -> Annex () -> FileMode -> Annex (Maybe LockHandle, Bool)
#ifndef mingw32_HOST_OS
prep tfile createtfile mode = catchPermissionDenied (const prepfailed) $ do
let lck = transferLockFile tfile
createAnnexDirectory $ takeDirectory lck
tryLockExclusive (Just mode) lck >>= \case
Nothing -> return (Nothing, True)
Just lockhandle -> ifM (checkSaneLock lck lockhandle)
( do
createtfile
return (Just lockhandle, False)
, do
liftIO $ dropLock lockhandle
return (Nothing, True)
)
#else
prep tfile createtfile _mode = catchPermissionDenied (const prepfailed) $ do
let lck = transferLockFile tfile
createAnnexDirectory $ takeDirectory lck
catchMaybeIO (liftIO $ lockExclusive lck) >>= \case
Nothing -> return (Nothing, False)
Just Nothing -> return (Nothing, True)
Just (Just lockhandle) -> do
createtfile
return (Just lockhandle, False)
#endif
prepfailed = return (Nothing, False)
cleanup _ Nothing = noop
cleanup tfile (Just lockhandle) = do
let lck = transferLockFile tfile
void $ tryIO $ removeFile tfile
#ifndef mingw32_HOST_OS
void $ tryIO $ removeFile lck
dropLock lockhandle
#else
{- Windows cannot delete the lockfile until the lock
- is closed. So it's possible to race with another
- process that takes the lock before it's removed,
- so ignore failure to remove.
-}
dropLock lockhandle
void $ tryIO $ removeFile lck
#endif
retry numretries oldinfo metervar run =
tryNonAsync run >>= \case
Right v
| observeBool v -> return v
| otherwise -> checkretry
Left e -> do
warning (show e)
checkretry
where
checkretry = do
b <- getbytescomplete metervar
let newinfo = oldinfo { bytesComplete = Just b }
let !numretries' = succ numretries
ifM (retrydecider numretries' oldinfo newinfo)
( retry numretries' newinfo metervar run
, return observeFailure
)
getbytescomplete metervar
| transferDirection t == Upload =
liftIO $ readMVar metervar
| otherwise = do
f <- fromRepo $ gitAnnexTmpObjectLocation (transferKey t)
liftIO $ catchDefaultIO 0 $ getFileSize f
{- Avoid download and upload of keys with insecure content when
- annex.securehashesonly is configured.
-
- This is not a security check. Even if this let the content be
- downloaded, the actual security checks would prevent the content from
- being added to the repository. The only reason this is done here is to
- avoid transferring content that's going to be rejected anyway.
-
- We assume that, if annex.securehashesonly is set and the local repo
- still contains content using an insecure hash, remotes will likewise
- tend to be configured to reject it, so Upload is also prevented.
-}
checkSecureHashes :: Observable v => Transfer -> Annex v -> Annex v
checkSecureHashes t a = ifM (isCryptographicallySecure (transferKey t))
( a
, ifM (annexSecureHashesOnly <$> Annex.getGitConfig)
( do
warning $ "annex.securehashesonly blocked transfer of " ++ decodeBS (formatKeyVariety variety) ++ " key"
return observeFailure
, a
)
)
where
variety = fromKey keyVariety (transferKey t)
type NumRetries = Integer
type RetryDecider = NumRetries -> TransferInfo -> TransferInfo -> Annex Bool
{- Both retry deciders are checked together, so if one chooses to delay,
- it will always take effect. -}
combineRetryDeciders :: RetryDecider -> RetryDecider -> RetryDecider
combineRetryDeciders a b = \n old new -> do
ar <- a n old new
br <- b n old new
return (ar || br)
noRetry :: RetryDecider
noRetry _ _ _ = pure False
stdRetry :: RetryDecider
stdRetry = combineRetryDeciders forwardRetry configuredRetry
{- Keep retrying failed transfers, as long as forward progress is being
- made.
-
- Up to a point -- while some remotes can resume where the previous
- transfer left off, and so it would make sense to keep retrying forever,
- other remotes restart each transfer from the beginning, and so even if
- forward progress is being made, it's not real progress. So, retry a
- maximum of 5 times by default.
-}
forwardRetry :: RetryDecider
forwardRetry numretries old new
| fromMaybe 0 (bytesComplete old) < fromMaybe 0 (bytesComplete new) =
(numretries <=) <$> maybe globalretrycfg pure remoteretrycfg
| otherwise = return False
where
globalretrycfg = fromMaybe 5 . annexForwardRetry
<$> Annex.getGitConfig
remoteretrycfg = remoteAnnexRetry =<<
(Remote.gitconfig <$> transferRemote new)
{- Retries a number of times with growing delays in between when enabled
- by git configuration. -}
configuredRetry :: RetryDecider
configuredRetry numretries _old new = do
(maxretries, Seconds initretrydelay) <- getcfg $
Remote.gitconfig <$> transferRemote new
if numretries < maxretries
then do
let retrydelay = Seconds (initretrydelay * 2^(numretries-1))
showSideAction $ "Delaying " ++ show (fromSeconds retrydelay) ++ "s before retrying."
liftIO $ threadDelaySeconds retrydelay
return True
else return False
where
globalretrycfg = fromMaybe 0 . annexRetry
<$> Annex.getGitConfig
globalretrydelaycfg = fromMaybe (Seconds 1) . annexRetryDelay
<$> Annex.getGitConfig
getcfg Nothing = (,) <$> globalretrycfg <*> globalretrydelaycfg
getcfg (Just gc) = (,)
<$> maybe globalretrycfg return (remoteAnnexRetry gc)
<*> maybe globalretrydelaycfg return (remoteAnnexRetryDelay gc)
{- Picks a remote from the list and tries a transfer to it. If the transfer
- does not succeed, goes on to try other remotes from the list.
-
- The list should already be ordered by remote cost, and is normally
- tried in order. However, when concurrent jobs are running, they will
- be assigned different remotes of the same cost when possible. This can
- increase total transfer speed.
-}
pickRemote :: Observable v => [Remote] -> (Remote -> Annex v) -> Annex v
pickRemote l a = debugLocks $ go l =<< getConcurrency
where
go [] _ = return observeFailure
go (r:[]) _ = a r
go rs NonConcurrent = gononconcurrent rs
go rs (Concurrent n)
| n <= 1 = gononconcurrent rs
| otherwise = goconcurrent rs
go rs ConcurrentPerCpu = goconcurrent rs
gononconcurrent [] = return observeFailure
gononconcurrent (r:rs) = do
ok <- a r
if observeBool ok
then return ok
else gononconcurrent rs
goconcurrent rs = do
mv <- Annex.getState Annex.activeremotes
active <- liftIO $ takeMVar mv
let rs' = sortBy (lessActiveFirst active) rs
goconcurrent' mv active rs'
goconcurrent' mv active [] = do
liftIO $ putMVar mv active
return observeFailure
goconcurrent' mv active (r:rs) = do
let !active' = M.insertWith (+) r 1 active
liftIO $ putMVar mv active'
let getnewactive = do
active'' <- liftIO $ takeMVar mv
let !active''' = M.update (\n -> if n > 1 then Just (n-1) else Nothing) r active''
return active'''
let removeactive = liftIO . putMVar mv =<< getnewactive
ok <- a r `onException` removeactive
if observeBool ok
then do
removeactive
return ok
else do
active'' <- getnewactive
-- Re-sort the remaining rs
-- because other threads could have
-- been assigned them in the meantime.
let rs' = sortBy (lessActiveFirst active'') rs
goconcurrent' mv active'' rs'
lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering
lessActiveFirst active a b
| Remote.cost a == Remote.cost b = comparing (`M.lookup` active) a b
| otherwise = comparing Remote.cost a b

122
Annex/UUID.hs Normal file
View file

@ -0,0 +1,122 @@
{- git-annex uuids
-
- Each git repository used by git-annex has an annex.uuid setting that
- uniquely identifies that repository.
-
- UUIDs of remotes are cached in git config, using keys named
- remote.<name>.annex-uuid
-
- Copyright 2010-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.UUID (
configkeyUUID,
getUUID,
getRepoUUID,
getUncachedUUID,
isUUIDConfigured,
prepUUID,
genUUID,
genUUIDInNameSpace,
gCryptNameSpace,
removeRepoUUID,
storeUUID,
storeUUIDIn,
setUUID,
webUUID,
bitTorrentUUID,
) where
import Annex.Common
import qualified Annex
import qualified Git
import qualified Git.Config
import Git.Types
import Config
import qualified Data.UUID as U
import qualified Data.UUID.V4 as U4
import qualified Data.UUID.V5 as U5
import Data.String
configkeyUUID :: ConfigKey
configkeyUUID = annexConfig "uuid"
{- Generates a random UUID, that does not include the MAC address. -}
genUUID :: IO UUID
genUUID = toUUID <$> U4.nextRandom
{- Generates a UUID from a given string, using a namespace.
- Given the same namespace, the same string will always result
- in the same UUID. -}
genUUIDInNameSpace :: U.UUID -> String -> UUID
genUUIDInNameSpace namespace = toUUID . U5.generateNamed namespace . s2w8
{- Namespace used for UUIDs derived from git-remote-gcrypt ids. -}
gCryptNameSpace :: U.UUID
gCryptNameSpace = U5.generateNamed U5.namespaceURL $
s2w8 "http://git-annex.branchable.com/design/gcrypt/"
{- Get current repository's UUID. -}
getUUID :: Annex UUID
getUUID = annexUUID <$> Annex.getGitConfig
{- Looks up a remote repo's UUID, caching it in .git/config if
- it's not already. -}
getRepoUUID :: Git.Repo -> Annex UUID
getRepoUUID r = do
c <- toUUID <$> getConfig cachekey ""
let u = getUncachedUUID r
if c /= u && u /= NoUUID
then do
updatecache u
return u
else return c
where
updatecache u = do
g <- gitRepo
when (g /= r) $ storeUUIDIn cachekey u
cachekey = remoteAnnexConfig r "uuid"
removeRepoUUID :: Annex ()
removeRepoUUID = do
unsetConfig configkeyUUID
storeUUID NoUUID
getUncachedUUID :: Git.Repo -> UUID
getUncachedUUID = toUUID . Git.Config.get configkeyUUID ""
-- Does the repo's config have a key for the UUID?
-- True even when the key has no value.
isUUIDConfigured :: Git.Repo -> Bool
isUUIDConfigured = isJust . Git.Config.getMaybe configkeyUUID
{- Make sure that the repo has an annex.uuid setting. -}
prepUUID :: Annex ()
prepUUID = whenM ((==) NoUUID <$> getUUID) $
storeUUID =<< liftIO genUUID
storeUUID :: UUID -> Annex ()
storeUUID = storeUUIDIn configkeyUUID
storeUUIDIn :: ConfigKey -> UUID -> Annex ()
storeUUIDIn configfield = setConfig configfield . fromUUID
{- Only sets the configkey in the Repo; does not change .git/config -}
setUUID :: Git.Repo -> UUID -> IO Git.Repo
setUUID r u = do
let s = encodeBS' $ show configkeyUUID ++ "=" ++ fromUUID u
Git.Config.store s Git.Config.ConfigList r
-- Dummy uuid for the whole web. Do not alter.
webUUID :: UUID
webUUID = UUID (fromString "00000000-0000-0000-0000-000000000001")
-- Dummy uuid for bittorrent. Do not alter.
bitTorrentUUID :: UUID
bitTorrentUUID = UUID (fromString "00000000-0000-0000-0000-000000000002")

View file

@ -0,0 +1,73 @@
{- handling untrusted filepaths
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.UntrustedFilePath where
import Data.Char
import System.FilePath
{- Given a string that we'd like to use as the basis for FilePath, but that
- was provided by a third party and is not to be trusted, returns the closest
- sane FilePath.
-
- All spaces and punctuation and other wacky stuff are replaced
- with '_', except for '.' and '-'
-
- "../" becomes ".._", which is safe.
- "/foo" becomes "_foo", which is safe.
- "c:foo" becomes "c_foo", which is safe even on windows.
-
- Leading '.' and '-' are also replaced with '_', so
- so no dotfiles that might control a program are inadvertently created,
- and to avoid filenames being treated as options to commands the user
- might run.
-
- Also there's an off chance the string might be empty, so to avoid
- needing to handle such an invalid filepath, return a dummy "file" in
- that case.
-}
sanitizeFilePath :: String -> FilePath
sanitizeFilePath = sanitizeLeadingFilePathCharacter . sanitizeFilePathComponent
{- For when the filepath is being built up out of components that should be
- individually sanitized, this can be used for each component, followed by
- sanitizeLeadingFilePathCharacter for the whole thing.
-}
sanitizeFilePathComponent :: String -> String
sanitizeFilePathComponent = map sanitize
where
sanitize c
| c == '.' || c == '-' = c
| isSpace c || isPunctuation c || isSymbol c || isControl c || c == '/' = '_'
| otherwise = c
sanitizeLeadingFilePathCharacter :: String -> FilePath
sanitizeLeadingFilePathCharacter [] = "file"
sanitizeLeadingFilePathCharacter ('.':s) = '_':s
sanitizeLeadingFilePathCharacter ('-':s) = '_':s
sanitizeLeadingFilePathCharacter ('/':s) = '_':s
sanitizeLeadingFilePathCharacter s = s
escapeSequenceInFilePath :: FilePath -> Bool
escapeSequenceInFilePath f = '\ESC' `elem` f
{- ../ is a path traversal, no matter where it appears.
-
- An absolute path is, of course.
-}
pathTraversalInFilePath :: FilePath -> Bool
pathTraversalInFilePath f
| isAbsolute f = True
| any (== "..") (splitPath f) = True
-- On windows, C:foo with no directory is not considered absolute
| hasDrive f = True
| otherwise = False
gitDirectoryInFilePath :: FilePath -> Bool
gitDirectoryInFilePath = any (== ".git")
. map dropTrailingPathSeparator
. splitPath

23
Annex/UpdateInstead.hs Normal file
View file

@ -0,0 +1,23 @@
{- git-annex UpdateIntead emulation
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.UpdateInstead where
import qualified Annex
import Annex.Common
import Annex.AdjustedBranch
import Git.Branch
import Git.ConfigTypes
{- receive.denyCurrentBranch=updateInstead does not work
- when an adjusted branch is checked out, so must be emulated. -}
needUpdateInsteadEmulation :: Annex Bool
needUpdateInsteadEmulation = updateinsteadset <&&> isadjusted
where
updateinsteadset = (== UpdateInstead) . receiveDenyCurrentBranch
<$> Annex.getGitConfig
isadjusted = (maybe False (isJust . getAdjustment) <$> inRepo Git.Branch.current)

192
Annex/Url.hs Normal file
View file

@ -0,0 +1,192 @@
{- Url downloading, with git-annex user agent and configured http
- headers, security restrictions, etc.
-
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Url (
withUrlOptions,
withUrlOptionsPromptingCreds,
getUrlOptions,
getUserAgent,
ipAddressesUnlimited,
checkBoth,
download,
download',
exists,
getUrlInfo,
U.downloadQuiet,
U.URLString,
U.UrlOptions(..),
U.UrlInfo(..),
U.sinkResponseFile,
U.matchStatusCodeException,
U.downloadConduit,
U.downloadPartial,
U.parseURIRelaxed,
U.allowedScheme,
U.assumeUrlExists,
) where
import Annex.Common
import qualified Annex
import qualified Utility.Url as U
import Utility.IPAddress
#ifdef WITH_HTTP_CLIENT_RESTRICTED
import Network.HTTP.Client.Restricted
#else
import Utility.HttpManagerRestricted
#endif
import Utility.Metered
import Git.Credential
import qualified BuildInfo
import Network.Socket
import Network.HTTP.Client
import Network.HTTP.Client.TLS
import Text.Read
defaultUserAgent :: U.UserAgent
defaultUserAgent = "git-annex/" ++ BuildInfo.packageversion
getUserAgent :: Annex U.UserAgent
getUserAgent = Annex.getState $
fromMaybe defaultUserAgent . Annex.useragent
getUrlOptions :: Annex U.UrlOptions
getUrlOptions = Annex.getState Annex.urloptions >>= \case
Just uo -> return uo
Nothing -> do
uo <- mk
Annex.changeState $ \s -> s
{ Annex.urloptions = Just uo }
return uo
where
mk = do
(urldownloader, manager) <- checkallowedaddr
U.mkUrlOptions
<$> (Just <$> getUserAgent)
<*> headers
<*> pure urldownloader
<*> pure manager
<*> (annexAllowedUrlSchemes <$> Annex.getGitConfig)
<*> pure U.noBasicAuth
headers = annexHttpHeadersCommand <$> Annex.getGitConfig >>= \case
Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd])
Nothing -> annexHttpHeaders <$> Annex.getGitConfig
checkallowedaddr = words . annexAllowedIPAddresses <$> Annex.getGitConfig >>= \case
["all"] -> do
-- Only allow curl when all are allowed,
-- as its interface does not allow preventing
-- it from accessing specific IP addresses.
curlopts <- map Param . annexWebOptions <$> Annex.getGitConfig
let urldownloader = if null curlopts
then U.DownloadWithConduit $
U.DownloadWithCurlRestricted mempty
else U.DownloadWithCurl curlopts
manager <- liftIO $ U.newManager $
avoidtimeout $ tlsManagerSettings
return (urldownloader, manager)
allowedaddrsports -> do
addrmatcher <- liftIO $
(\l v -> any (\f -> f v) l) . catMaybes
<$> mapM (uncurry makeAddressMatcher)
(mapMaybe splitAddrPort allowedaddrsports)
-- Default to not allowing access to loopback
-- and private IP addresses to avoid data
-- leakage.
let isallowed addr
| addrmatcher addr = True
| isLoopbackAddress addr = False
| isPrivateAddress addr = False
| otherwise = True
let connectionrestricted = connectionRestricted
("Configuration of annex.security.allowed-ip-addresses does not allow accessing address " ++)
let r = addressRestriction $ \addr ->
if isallowed (addrAddress addr)
then Nothing
else Just (connectionrestricted addr)
(settings, pr) <- liftIO $
mkRestrictedManagerSettings r Nothing Nothing
case pr of
Nothing -> return ()
Just ProxyRestricted -> toplevelWarning True
"http proxy settings not used due to annex.security.allowed-ip-addresses configuration"
manager <- liftIO $ U.newManager $
avoidtimeout settings
let urldownloader = U.DownloadWithConduit $
U.DownloadWithCurlRestricted r
return (urldownloader, manager)
-- http-client defailts to timing out a request after 30 seconds
-- or so, but some web servers are slower and git-annex has its own
-- separate timeout controls, so disable that.
avoidtimeout s = s { managerResponseTimeout = responseTimeoutNone }
splitAddrPort :: String -> Maybe (String, Maybe PortNumber)
splitAddrPort s
-- "[addr]:port" (also allow "[addr]")
| "[" `isPrefixOf` s = case splitc ']' (drop 1 s) of
[a,cp] -> case splitc ':' cp of
["",p] -> do
pn <- readMaybe p
return (a, Just pn)
[""] -> Just (a, Nothing)
_ -> Nothing
_ -> Nothing
| otherwise = Just (s, Nothing)
ipAddressesUnlimited :: Annex Bool
ipAddressesUnlimited =
("all" == ) . annexAllowedIPAddresses <$> Annex.getGitConfig
withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a
withUrlOptions a = a =<< getUrlOptions
-- When downloading an url, if authentication is needed, uses
-- git-credential to prompt for username and password.
withUrlOptionsPromptingCreds :: (U.UrlOptions -> Annex a) -> Annex a
withUrlOptionsPromptingCreds a = do
g <- Annex.gitRepo
uo <- getUrlOptions
prompter <- mkPrompter
a $ uo
{ U.getBasicAuth = \u -> prompter $
getBasicAuthFromCredential g u
-- Can't download with curl and handle basic auth,
-- so make sure it uses conduit.
, U.urlDownloader = case U.urlDownloader uo of
U.DownloadWithCurl _ -> U.DownloadWithConduit $
U.DownloadWithCurlRestricted mempty
v -> v
}
checkBoth :: U.URLString -> Maybe Integer -> U.UrlOptions -> Annex Bool
checkBoth url expected_size uo =
liftIO (U.checkBoth url expected_size uo) >>= \case
Right r -> return r
Left err -> warning err >> return False
download :: MeterUpdate -> U.URLString -> FilePath -> U.UrlOptions -> Annex Bool
download meterupdate url file uo =
liftIO (U.download meterupdate url file uo) >>= \case
Right () -> return True
Left err -> warning err >> return False
download' :: MeterUpdate -> U.URLString -> FilePath -> U.UrlOptions -> Annex (Either String ())
download' meterupdate url file uo =
liftIO (U.download meterupdate url file uo)
exists :: U.URLString -> U.UrlOptions -> Annex Bool
exists url uo = liftIO (U.exists url uo) >>= \case
Right b -> return b
Left err -> warning err >> return False
getUrlInfo :: U.URLString -> U.UrlOptions -> Annex (Either String U.UrlInfo)
getUrlInfo url uo = liftIO (U.getUrlInfo url uo)

45
Annex/VariantFile.hs Normal file
View file

@ -0,0 +1,45 @@
{- git-annex .variant files for automatic merge conflict resolution
-
- Copyright 2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.VariantFile where
import Annex.Common
import Utility.Hash
import qualified Data.ByteString as S
variantMarker :: String
variantMarker = ".variant-"
mkVariant :: FilePath -> String -> FilePath
mkVariant file variant = takeDirectory file
</> dropExtension (takeFileName file)
++ variantMarker ++ variant
++ takeExtension file
{- The filename to use when resolving a conflicted merge of a file,
- that points to a key.
-
- Something derived from the key needs to be included in the filename,
- but rather than exposing the whole key to the user, a very weak hash
- is used. There is a very real, although still unlikely, chance of
- conflicts using this hash.
-
- In the event that there is a conflict with the filename generated
- for some other key, that conflict will itself be handled by the
- conflicted merge resolution code. That case is detected, and the full
- key is used in the filename.
-}
variantFile :: FilePath -> Key -> FilePath
variantFile file key
| doubleconflict = mkVariant file (fromRawFilePath (keyFile key))
| otherwise = mkVariant file (shortHash $ serializeKey' key)
where
doubleconflict = variantMarker `isInfixOf` file
shortHash :: S.ByteString -> String
shortHash = take 4 . show . md5s

54
Annex/VectorClock.hs Normal file
View file

@ -0,0 +1,54 @@
{- git-annex vector clocks
-
- We don't have a way yet to keep true distributed vector clocks.
- The next best thing is a timestamp.
-
- Copyright 2017-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.VectorClock where
import Data.Time.Clock.POSIX
import Data.ByteString.Builder
import Control.Applicative
import Prelude
import Utility.Env
import Utility.TimeStamp
import Utility.QuickCheck
import qualified Data.Attoparsec.ByteString.Lazy as A
-- | Some very old logs did not have any time stamp at all;
-- Unknown is used for those.
data VectorClock = Unknown | VectorClock POSIXTime
deriving (Eq, Ord, Show)
-- Unknown is oldest.
prop_VectorClock_sane :: Bool
prop_VectorClock_sane = Unknown < VectorClock 1
instance Arbitrary VectorClock where
arbitrary = VectorClock <$> arbitrary
currentVectorClock :: IO VectorClock
currentVectorClock = go =<< getEnv "GIT_ANNEX_VECTOR_CLOCK"
where
go Nothing = VectorClock <$> getPOSIXTime
go (Just s) = case parsePOSIXTime s of
Just t -> return (VectorClock t)
Nothing -> VectorClock <$> getPOSIXTime
formatVectorClock :: VectorClock -> String
formatVectorClock Unknown = "0"
formatVectorClock (VectorClock t) = show t
buildVectorClock :: VectorClock -> Builder
buildVectorClock = string7 . formatVectorClock
parseVectorClock :: String -> Maybe VectorClock
parseVectorClock t = VectorClock <$> parsePOSIXTime t
vectorClockParser :: A.Parser VectorClock
vectorClockParser = VectorClock <$> parserPOSIXTime

56
Annex/Version.hs Normal file
View file

@ -0,0 +1,56 @@
{- git-annex repository versioning
-
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Version where
import Annex.Common
import Config
import Git.Types
import Types.RepoVersion
import qualified Annex
import qualified Data.Map as M
defaultVersion :: RepoVersion
defaultVersion = RepoVersion 8
latestVersion :: RepoVersion
latestVersion = RepoVersion 8
supportedVersions :: [RepoVersion]
supportedVersions = map RepoVersion [8]
upgradableVersions :: [RepoVersion]
#ifndef mingw32_HOST_OS
upgradableVersions = map RepoVersion [0..7]
#else
upgradableVersions = map RepoVersion [2..7]
#endif
autoUpgradeableVersions :: M.Map RepoVersion RepoVersion
autoUpgradeableVersions = M.fromList
[ (RepoVersion 3, latestVersion)
, (RepoVersion 4, latestVersion)
, (RepoVersion 5, latestVersion)
, (RepoVersion 6, latestVersion)
, (RepoVersion 7, latestVersion)
]
versionField :: ConfigKey
versionField = annexConfig "version"
getVersion :: Annex (Maybe RepoVersion)
getVersion = annexVersion <$> Annex.getGitConfig
setVersion :: RepoVersion -> Annex ()
setVersion (RepoVersion v) = setConfig versionField (show v)
removeVersion :: Annex ()
removeVersion = unsetConfig versionField

427
Annex/View.hs Normal file
View file

@ -0,0 +1,427 @@
{- metadata based branch views
-
- Copyright 2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.View where
import Annex.Common
import Annex.View.ViewedFile
import Types.View
import Types.MetaData
import Annex.MetaData
import qualified Annex
import qualified Git
import qualified Git.DiffTree as DiffTree
import qualified Git.Branch
import qualified Git.LsFiles
import qualified Git.Ref
import Git.UpdateIndex
import Git.Sha
import Git.Types
import Git.FilePath
import Annex.WorkTree
import Annex.GitOverlay
import Annex.Link
import Annex.CatFile
import Logs.MetaData
import Logs.View
import Utility.Glob
import Types.Command
import CmdLine.Action
import qualified Data.Text as T
import qualified Data.ByteString as B
import qualified Data.Set as S
import qualified Data.Map as M
import "mtl" Control.Monad.Writer
{- Each visible ViewFilter in a view results in another level of
- subdirectory nesting. When a file matches multiple ways, it will appear
- in multiple subdirectories. This means there is a bit of an exponential
- blowup with a single file appearing in a crazy number of places!
-
- Capping the view size to 5 is reasonable; why wants to dig
- through 5+ levels of subdirectories to find anything?
-}
viewTooLarge :: View -> Bool
viewTooLarge view = visibleViewSize view > 5
visibleViewSize :: View -> Int
visibleViewSize = length . filter viewVisible . viewComponents
{- Parses field=value, field!=value, tag, and !tag
-
- Note that the field may not be a legal metadata field name,
- but it's let through anyway.
- This is useful when matching on directory names with spaces,
- which are not legal MetaFields.
-}
parseViewParam :: String -> (MetaField, ViewFilter)
parseViewParam s = case separate (== '=') s of
('!':tag, []) | not (null tag) ->
( tagMetaField
, mkExcludeValues tag
)
(tag, []) ->
( tagMetaField
, mkFilterValues tag
)
(field, wanted)
| end field == "!" ->
( mkMetaFieldUnchecked (T.pack (beginning field))
, mkExcludeValues wanted
)
| otherwise ->
( mkMetaFieldUnchecked (T.pack field)
, mkFilterValues wanted
)
where
mkFilterValues v
| any (`elem` v) ['*', '?'] = FilterGlob v
| otherwise = FilterValues $ S.singleton $ toMetaValue $ encodeBS v
mkExcludeValues = ExcludeValues . S.singleton . toMetaValue . encodeBS
data ViewChange = Unchanged | Narrowing | Widening
deriving (Ord, Eq, Show)
{- Updates a view, adding new fields to filter on (Narrowing),
- or allowing new values in an existing field (Widening). -}
refineView :: View -> [(MetaField, ViewFilter)] -> (View, ViewChange)
refineView origview = checksize . calc Unchanged origview
where
calc c v [] = (v, c)
calc c v ((f, vf):rest) =
let (v', c') = refine v f vf
in calc (max c c') v' rest
refine view field vf
| field `elem` map viewField (viewComponents view) =
let (components', viewchanges) = runWriter $
mapM (\c -> updateViewComponent c field vf) (viewComponents view)
viewchange = if field `elem` map viewField (viewComponents origview)
then maximum viewchanges
else Narrowing
in (view { viewComponents = components' }, viewchange)
| otherwise =
let component = mkViewComponent field vf
view' = view { viewComponents = component : viewComponents view }
in (view', Narrowing)
checksize r@(v, _)
| viewTooLarge v = giveup $ "View is too large (" ++ show (visibleViewSize v) ++ " levels of subdirectories)"
| otherwise = r
updateViewComponent :: ViewComponent -> MetaField -> ViewFilter -> Writer [ViewChange] ViewComponent
updateViewComponent c field vf
| viewField c == field = do
let (newvf, viewchange) = combineViewFilter (viewFilter c) vf
tell [viewchange]
return $ mkViewComponent field newvf
| otherwise = return c
{- Adds an additional filter to a view. This can only result in narrowing
- the view. Multivalued filters are added in non-visible form. -}
filterView :: View -> [(MetaField, ViewFilter)] -> View
filterView v vs = v { viewComponents = viewComponents f' ++ viewComponents v}
where
f = fst $ refineView (v {viewComponents = []}) vs
f' = f { viewComponents = map toinvisible (viewComponents f) }
toinvisible c = c { viewVisible = False }
{- Combine old and new ViewFilters, yielding a result that matches
- either old+new, or only new.
-
- If we have FilterValues and change to a FilterGlob,
- it's always a widening change, because the glob could match other
- values. OTOH, going the other way, it's a Narrowing change if the old
- glob matches all the new FilterValues.
-
- With two globs, the old one is discarded, and the new one is used.
- We can tell if that's a narrowing change by checking if the old
- glob matches the new glob. For example, "*" matches "foo*",
- so that's narrowing. While "f?o" does not match "f??", so that's
- widening.
-}
combineViewFilter :: ViewFilter -> ViewFilter -> (ViewFilter, ViewChange)
combineViewFilter old@(FilterValues olds) (FilterValues news)
| combined == old = (combined, Unchanged)
| otherwise = (combined, Widening)
where
combined = FilterValues (S.union olds news)
combineViewFilter old@(ExcludeValues olds) (ExcludeValues news)
| combined == old = (combined, Unchanged)
| otherwise = (combined, Narrowing)
where
combined = ExcludeValues (S.union olds news)
combineViewFilter (FilterValues _) newglob@(FilterGlob _) =
(newglob, Widening)
combineViewFilter (FilterGlob oldglob) new@(FilterValues s)
| all (matchGlob (compileGlob oldglob CaseInsensative) . decodeBS . fromMetaValue) (S.toList s) = (new, Narrowing)
| otherwise = (new, Widening)
combineViewFilter (FilterGlob old) newglob@(FilterGlob new)
| old == new = (newglob, Unchanged)
| matchGlob (compileGlob old CaseInsensative) new = (newglob, Narrowing)
| otherwise = (newglob, Widening)
combineViewFilter (FilterGlob _) new@(ExcludeValues _) = (new, Narrowing)
combineViewFilter (ExcludeValues _) new@(FilterGlob _) = (new, Widening)
combineViewFilter (FilterValues _) new@(ExcludeValues _) = (new, Narrowing)
combineViewFilter (ExcludeValues _) new@(FilterValues _) = (new, Widening)
{- Generates views for a file from a branch, based on its metadata
- and the filename used in the branch.
-
- Note that a file may appear multiple times in a view, when it
- has multiple matching values for a MetaField used in the View.
-
- Of course if its MetaData does not match the View, it won't appear at
- all.
-
- Note that for efficiency, it's useful to partially
- evaluate this function with the view parameter and reuse
- the result. The globs in the view will then be compiled and memoized.
-}
viewedFiles :: View -> MkViewedFile -> FilePath -> MetaData -> [ViewedFile]
viewedFiles view =
let matchers = map viewComponentMatcher (viewComponents view)
in \mkviewedfile file metadata ->
let matches = map (\m -> m metadata) matchers
in if any isNothing matches
then []
else
let paths = pathProduct $
map (map toViewPath) (visible matches)
in if null paths
then [mkviewedfile file]
else map (</> mkviewedfile file) paths
where
visible = map (fromJust . snd) .
filter (viewVisible . fst) .
zip (viewComponents view)
{- Checks if metadata matches a ViewComponent filter, and if so
- returns the value, or values that match. Self-memoizing on ViewComponent. -}
viewComponentMatcher :: ViewComponent -> (MetaData -> Maybe [MetaValue])
viewComponentMatcher viewcomponent = \metadata ->
matcher (currentMetaDataValues metafield metadata)
where
metafield = viewField viewcomponent
matcher = case viewFilter viewcomponent of
FilterValues s -> \values -> setmatches $
S.intersection s values
FilterGlob glob ->
let cglob = compileGlob glob CaseInsensative
in \values -> setmatches $
S.filter (matchGlob cglob . decodeBS . fromMetaValue) values
ExcludeValues excludes -> \values ->
if S.null (S.intersection values excludes)
then Just []
else Nothing
setmatches s
| S.null s = Nothing
| otherwise = Just (S.toList s)
-- This is '', a unicode character that displays the same as '/' but is
-- not it. It is encoded using the filesystem encoding, which allows it
-- to be used even when not in a unicode capable locale.
pseudoSlash :: String
pseudoSlash = "\56546\56456\56469"
-- And this is '╲' similarly.
pseudoBackslash :: String
pseudoBackslash = "\56546\56469\56498"
toViewPath :: MetaValue -> FilePath
toViewPath = escapeslash [] . decodeBS . fromMetaValue
where
escapeslash s ('/':cs) = escapeslash (pseudoSlash:s) cs
escapeslash s ('\\':cs) = escapeslash (pseudoBackslash:s) cs
escapeslash s ('%':cs) = escapeslash ("%%":s) cs
escapeslash s (c1:c2:c3:cs)
| [c1,c2,c3] == pseudoSlash = escapeslash ("%":pseudoSlash:s) cs
| [c1,c2,c3] == pseudoBackslash = escapeslash ("%":pseudoBackslash:s) cs
| otherwise = escapeslash ([c1]:s) (c2:c3:cs)
escapeslash s cs = concat (reverse (cs:s))
fromViewPath :: FilePath -> MetaValue
fromViewPath = toMetaValue . encodeBS . deescapeslash []
where
deescapeslash s ('%':escapedc:cs) = deescapeslash ([escapedc]:s) cs
deescapeslash s (c1:c2:c3:cs)
| [c1,c2,c3] == pseudoSlash = deescapeslash ("/":s) cs
| [c1,c2,c3] == pseudoBackslash = deescapeslash ("\\":s) cs
| otherwise = deescapeslash ([c1]:s) (c2:c3:cs)
deescapeslash s cs = concat (reverse (cs:s))
prop_viewPath_roundtrips :: MetaValue -> Bool
prop_viewPath_roundtrips v = fromViewPath (toViewPath v) == v
pathProduct :: [[FilePath]] -> [FilePath]
pathProduct [] = []
pathProduct (l:ls) = foldl combinel l ls
where
combinel xs ys = [combine x y | x <- xs, y <- ys]
{- Extracts the metadata from a ViewedFile, based on the view that was used
- to construct it.
-
- Derived metadata is excluded.
-}
fromView :: View -> ViewedFile -> MetaData
fromView view f = MetaData $
M.fromList (zip fields values) `M.difference` derived
where
visible = filter viewVisible (viewComponents view)
fields = map viewField visible
paths = splitDirectories (dropFileName f)
values = map (S.singleton . fromViewPath) paths
MetaData derived = getViewedFileMetaData f
{- Constructing a view that will match arbitrary metadata, and applying
- it to a file yields a set of ViewedFile which all contain the same
- MetaFields that were present in the input metadata
- (excluding fields that are not visible). -}
prop_view_roundtrips :: FilePath -> MetaData -> Bool -> Bool
prop_view_roundtrips f metadata visible = or
[ null f
, null (takeFileName f) && null (takeDirectory f)
, viewTooLarge view
, all hasfields (viewedFiles view viewedFileFromReference f metadata)
]
where
view = View (Git.Ref "foo") $
map (\(mf, mv) -> ViewComponent mf (FilterValues $ S.filter (not . B.null . fromMetaValue) mv) visible)
(fromMetaData metadata)
visiblefields = sort (map viewField $ filter viewVisible (viewComponents view))
hasfields fv = sort (map fst (fromMetaData (fromView view fv))) == visiblefields
{- A directory foo/bar/baz/ is turned into metadata fields
- /=foo, foo/=bar, foo/bar/=baz.
-
- Note that this may generate MetaFields that legalField rejects.
- This is necessary to have a 1:1 mapping between directory names and
- fields. So this MetaData cannot safely be serialized. -}
getDirMetaData :: FilePath -> MetaData
getDirMetaData d = MetaData $ M.fromList $ zip fields values
where
dirs = splitDirectories d
fields = map (mkMetaFieldUnchecked . T.pack . addTrailingPathSeparator . joinPath)
(inits dirs)
values = map (S.singleton . toMetaValue . encodeBS . fromMaybe "" . headMaybe)
(tails dirs)
getWorkTreeMetaData :: FilePath -> MetaData
getWorkTreeMetaData = getDirMetaData . dropFileName
getViewedFileMetaData :: FilePath -> MetaData
getViewedFileMetaData = getDirMetaData . dirFromViewedFile . takeFileName
{- Applies a view to the currently checked out branch, generating a new
- branch for the view.
-}
applyView :: View -> Annex Git.Branch
applyView = applyView' viewedFileFromReference getWorkTreeMetaData
{- Generates a new branch for a View, which must be a more narrow
- version of the View originally used to generate the currently
- checked out branch. That is, it must match a subset of the files
- in view, not any others.
-}
narrowView :: View -> Annex Git.Branch
narrowView = applyView' viewedFileReuse getViewedFileMetaData
{- Go through each staged file.
- If the file is not annexed, skip it, unless it's a dotfile in the top,
- or a file in a dotdir in the top.
- Look up the metadata of annexed files, and generate any ViewedFiles,
- and stage them.
-
- Must be run from top of repository.
-}
applyView' :: MkViewedFile -> (FilePath -> MetaData) -> View -> Annex Git.Branch
applyView' mkviewedfile getfilemetadata view = do
top <- fromRepo Git.repoPath
(l, clean) <- inRepo $ Git.LsFiles.inRepoDetails [] [top]
liftIO . nukeFile =<< fromRepo gitAnnexViewIndex
viewg <- withViewIndex gitRepo
withUpdateIndex viewg $ \uh -> do
forM_ l $ \(f, sha, mode) -> do
topf <- inRepo (toTopFilePath f)
go uh topf sha (toTreeItemType mode) =<< lookupKey f
liftIO $ void clean
genViewBranch view
where
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
go uh topf _sha _mode (Just k) = do
metadata <- getCurrentMetaData k
let f = fromRawFilePath $ getTopFilePath topf
let metadata' = getfilemetadata f `unionMetaData` metadata
forM_ (genviewedfiles f metadata') $ \fv -> do
f' <- fromRawFilePath <$>
fromRepo (fromTopFilePath $ asTopFilePath $ toRawFilePath fv)
stagesymlink uh f' =<< calcRepo (gitAnnexLink f' k)
go uh topf sha (Just treeitemtype) Nothing
| "." `B.isPrefixOf` getTopFilePath topf =
liftIO $ Git.UpdateIndex.streamUpdateIndex' uh $
pureStreamer $ updateIndexLine sha treeitemtype topf
go _ _ _ _ _ = noop
stagesymlink uh f linktarget = do
sha <- hashSymlink linktarget
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
=<< inRepo (Git.UpdateIndex.stageSymlink f sha)
{- Diff between currently checked out branch and staged changes, and
- update metadata to reflect the changes that are being committed to the
- view.
-
- Adding a file to a directory adds the metadata represented by
- that directory to the file, and removing a file from a directory
- removes the metadata.
-
- Note that removes must be handled before adds. This is so
- that moving a file from x/foo/ to x/bar/ adds back the metadata for x.
-}
withViewChanges :: (ViewedFile -> Key -> CommandStart) -> (ViewedFile -> Key -> CommandStart) -> Annex ()
withViewChanges addmeta removemeta = do
(diffs, cleanup) <- inRepo $ DiffTree.diffIndex Git.Ref.headRef
forM_ diffs handleremovals
forM_ diffs handleadds
void $ liftIO cleanup
where
handleremovals item
| DiffTree.srcsha item `notElem` nullShas =
handlechange item removemeta
=<< catKey (DiffTree.srcsha item)
| otherwise = noop
handleadds item
| DiffTree.dstsha item `notElem` nullShas =
handlechange item addmeta
=<< catKey (DiffTree.dstsha item)
| otherwise = noop
handlechange item a = maybe noop
(void . commandAction . a (fromRawFilePath $ getTopFilePath $ DiffTree.file item))
{- Runs an action using the view index file.
- Note that the file does not necessarily exist, or can contain
- info staged for an old view. -}
withViewIndex :: Annex a -> Annex a
withViewIndex = withIndexFile ViewIndexFile . const
{- Generates a branch for a view, using the view index file
- to make a commit to the view branch. The view branch is not
- checked out, but entering it will display the view. -}
genViewBranch :: View -> Annex Git.Branch
genViewBranch view = withViewIndex $ do
let branch = branchView view
cmode <- annexCommitMode <$> Annex.getGitConfig
void $ inRepo $ Git.Branch.commit cmode True (fromRef branch) branch []
return branch
withCurrentView :: (View -> Annex a) -> Annex a
withCurrentView a = maybe (giveup "Not in a view.") a =<< currentView

86
Annex/View/ViewedFile.hs Normal file
View file

@ -0,0 +1,86 @@
{- filenames (not paths) used in views
-
- Copyright 2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.View.ViewedFile (
ViewedFile,
MkViewedFile,
viewedFileFromReference,
viewedFileReuse,
dirFromViewedFile,
prop_viewedFile_roundtrips,
) where
import Annex.Common
type FileName = String
type ViewedFile = FileName
type MkViewedFile = FilePath -> ViewedFile
{- Converts a filepath used in a reference branch to the
- filename that will be used in the view.
-
- No two filepaths from the same branch should yeild the same result,
- so all directory structure needs to be included in the output filename
- in some way.
-
- So, from dir/subdir/file.foo, generate file_%dir%subdir%.foo
-}
viewedFileFromReference :: MkViewedFile
viewedFileFromReference f = concat
[ escape base
, if null dirs then "" else "_%" ++ intercalate "%" (map escape dirs) ++ "%"
, escape $ concat extensions
]
where
(path, basefile) = splitFileName f
dirs = filter (/= ".") $ map dropTrailingPathSeparator (splitPath path)
(base, extensions) = splitShortExtensions basefile
{- To avoid collisions with filenames or directories that contain
- '%', and to allow the original directories to be extracted
- from the ViewedFile, '%' is escaped. )
-}
escape :: String -> String
escape = replace "%" (escchar:'%':[]) . replace [escchar] [escchar, escchar]
escchar :: Char
#ifndef mingw32_HOST_OS
escchar = '\\'
#else
-- \ is path separator on Windows, so instead use !
escchar = '!'
#endif
{- For use when operating already within a view, so whatever filepath
- is present in the work tree is already a ViewedFile. -}
viewedFileReuse :: MkViewedFile
viewedFileReuse = takeFileName
{- Extracts from a ViewedFile the directory where the file is located on
- in the reference branch. -}
dirFromViewedFile :: ViewedFile -> FilePath
dirFromViewedFile = joinPath . drop 1 . sep [] ""
where
sep l _ [] = reverse l
sep l curr (c:cs)
| c == '%' = sep (reverse curr:l) "" cs
| c == escchar = case cs of
(c':cs') -> sep l (c':curr) cs'
[] -> sep l curr cs
| otherwise = sep l (c:curr) cs
prop_viewedFile_roundtrips :: FilePath -> Bool
prop_viewedFile_roundtrips f
-- Relative filenames wanted, not directories.
| any (isPathSeparator) (end f ++ beginning f) = True
| isAbsolute f = True
| otherwise = dir == dirFromViewedFile (viewedFileFromReference f)
where
dir = joinPath $ beginning $ splitDirectories f

29
Annex/Wanted.hs Normal file
View file

@ -0,0 +1,29 @@
{- git-annex checking whether content is wanted
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Wanted where
import Annex.Common
import Logs.PreferredContent
import Annex.UUID
import qualified Data.Set as S
{- Check if a file is preferred content for the local repository. -}
wantGet :: Bool -> Maybe Key -> AssociatedFile -> Annex Bool
wantGet d key file = isPreferredContent Nothing S.empty key file d
{- Check if a file is preferred content for a remote. -}
wantSend :: Bool -> Maybe Key -> AssociatedFile -> UUID -> Annex Bool
wantSend d key file to = isPreferredContent (Just to) S.empty key file d
{- Check if a file can be dropped, maybe from a remote.
- Don't drop files that are preferred content. -}
wantDrop :: Bool -> Maybe UUID -> Maybe Key -> AssociatedFile -> Annex Bool
wantDrop d from key file = do
u <- maybe getUUID (return . id) from
not <$> isPreferredContent (Just u) (S.singleton u) key file d

117
Annex/WorkTree.hs Normal file
View file

@ -0,0 +1,117 @@
{- git-annex worktree files
-
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.WorkTree where
import Annex.Common
import Annex.Link
import Annex.CatFile
import Annex.Content
import Annex.ReplaceFile
import Annex.CurrentBranch
import Annex.InodeSentinal
import Utility.InodeCache
import Git.FilePath
import qualified Git.Ref
import qualified Git.LsTree
import qualified Git.Types
import qualified Database.Keys
import qualified Database.Keys.SQL
import Config
import qualified Utility.RawFilePath as R
import Control.Concurrent
{- Looks up the key corresponding to an annexed file in the work tree,
- by examining what the file links to.
-
- An unlocked file will not have a link on disk, so fall back to
- looking for a pointer to a key in git.
-
- When in an adjusted branch that may have hidden the file, looks for a
- pointer to a key in the original branch.
-}
lookupKey :: RawFilePath -> Annex (Maybe Key)
lookupKey = lookupKey' catkeyfile
where
catkeyfile file =
ifM (liftIO $ doesFileExist $ fromRawFilePath file)
( catKeyFile file
, catKeyFileHidden file =<< getCurrentBranch
)
lookupKeyNotHidden :: RawFilePath -> Annex (Maybe Key)
lookupKeyNotHidden = lookupKey' catkeyfile
where
catkeyfile file =
ifM (liftIO $ doesFileExist $ fromRawFilePath file)
( catKeyFile file
, return Nothing
)
lookupKey' :: (RawFilePath -> Annex (Maybe Key)) -> RawFilePath -> Annex (Maybe Key)
lookupKey' catkeyfile file = isAnnexLink file >>= \case
Just key -> return (Just key)
Nothing -> catkeyfile file
{- Modifies an action to only act on files that are already annexed,
- and passes the key on to it. -}
whenAnnexed :: (RawFilePath -> Key -> Annex (Maybe a)) -> RawFilePath -> Annex (Maybe a)
whenAnnexed a file = ifAnnexed file (a file) (return Nothing)
ifAnnexed :: RawFilePath -> (Key -> Annex a) -> Annex a -> Annex a
ifAnnexed file yes no = maybe no yes =<< lookupKey file
{- Find all unlocked files and update the keys database for them.
-
- This is expensive, and so normally the associated files are updated
- incrementally when changes are noticed. So, this only needs to be done
- when initializing/upgrading a v6+ mode repository.
-
- Also, the content for the unlocked file may already be present as
- an annex object. If so, populate the pointer file with it.
- But if worktree file does not have a pointer file's content, it is left
- as-is.
-}
scanUnlockedFiles :: Annex ()
scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $ do
dropold <- liftIO $ newMVar $
Database.Keys.runWriter $
liftIO . Database.Keys.SQL.dropAllAssociatedFiles
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive Git.Ref.headRef
forM_ l $ \i ->
when (isregfile i) $
maybe noop (add dropold i)
=<< catKey (Git.LsTree.sha i)
liftIO $ void cleanup
where
isregfile i = case Git.Types.toTreeItemType (Git.LsTree.mode i) of
Just Git.Types.TreeFile -> True
Just Git.Types.TreeExecutable -> True
_ -> False
add dropold i k = do
join $ fromMaybe noop <$> liftIO (tryTakeMVar dropold)
let tf = Git.LsTree.file i
Database.Keys.runWriter $
liftIO . Database.Keys.SQL.addAssociatedFileFast k tf
whenM (inAnnex k) $ do
f <- fromRepo $ fromTopFilePath tf
liftIO (isPointerFile f) >>= \case
Just k' | k' == k -> do
destmode <- liftIO $ catchMaybeIO $
fileMode <$> R.getFileStatus f
ic <- replaceWorkTreeFile (fromRawFilePath f) $ \tmp -> do
let tmp' = toRawFilePath tmp
linkFromAnnex k tmp destmode >>= \case
LinkAnnexOk ->
withTSDelta (liftIO . genInodeCache tmp')
LinkAnnexNoop -> return Nothing
LinkAnnexFailed -> liftIO $ do
writePointerFile tmp' k destmode
return Nothing
maybe noop (restagePointerFile (Restage True) f) ic
_ -> noop

126
Annex/WorkerPool.hs Normal file
View file

@ -0,0 +1,126 @@
{- git-annex worker thread pool
-
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.WorkerPool where
import Annex
import Annex.Common
import Types.WorkerPool
import Control.Concurrent
import Control.Concurrent.STM
{- Runs an action and makes the current thread have the specified stage
- while doing so. If too many other threads are running in the specified
- stage, waits for one of them to become idle.
-
- Noop if the current thread already has the requested stage, or if the
- current thread is not in the worker pool, or if concurrency is not
- enabled.
-
- Also a noop if the stage is not one of the stages that the worker pool
- uses.
-}
enteringStage :: WorkerStage -> Annex a -> Annex a
enteringStage newstage a = Annex.getState Annex.workers >>= \case
Nothing -> a
Just tv -> do
mytid <- liftIO myThreadId
let set = changeStageTo mytid tv (const newstage)
let restore = maybe noop (void . changeStageTo mytid tv . const)
bracket set restore (const a)
{- Transition the current thread to the initial stage.
- This is done once the thread is ready to begin work.
-}
enteringInitialStage :: Annex ()
enteringInitialStage = Annex.getState Annex.workers >>= \case
Nothing -> noop
Just tv -> do
mytid <- liftIO myThreadId
void $ changeStageTo mytid tv initialStage
{- This needs to leave the WorkerPool with the same number of
- idle and active threads, and with the same number of threads for each
- WorkerStage. So, all it can do is swap the WorkerStage of our thread's
- ActiveWorker with an IdleWorker.
-
- Must avoid a deadlock if all worker threads end up here at the same
- time, or if there are no suitable IdleWorkers left. So if necessary
- we first replace our ActiveWorker with an IdleWorker in the pool, to allow
- some other thread to use it, before waiting for a suitable IdleWorker
- for us to use.
-
- Note that the spareVals in the WorkerPool does not get anything added to
- it when adding the IdleWorker, so there will for a while be more IdleWorkers
- in the pool than spareVals. That does not prevent other threads that call
- this from using them though, so it's fine.
-}
changeStageTo :: ThreadId -> TMVar (WorkerPool AnnexState) -> (UsedStages -> WorkerStage) -> Annex (Maybe WorkerStage)
changeStageTo mytid tv getnewstage = liftIO $
replaceidle >>= maybe
(return Nothing)
(either waitidle (return . Just))
where
replaceidle = atomically $ do
pool <- takeTMVar tv
let newstage = getnewstage (usedStages pool)
let notchanging = do
putTMVar tv pool
return Nothing
if memberStage newstage (usedStages pool)
then case removeThreadIdWorkerPool mytid pool of
Just ((myaid, oldstage), pool')
| oldstage /= newstage -> case getIdleWorkerSlot newstage pool' of
Nothing -> do
putTMVar tv $
addWorkerPool (IdleWorker oldstage) pool'
return $ Just $ Left (myaid, newstage, oldstage)
Just pool'' -> do
-- optimisation
putTMVar tv $
addWorkerPool (IdleWorker oldstage) $
addWorkerPool (ActiveWorker myaid newstage) pool''
return $ Just $ Right oldstage
| otherwise -> notchanging
_ -> notchanging
else notchanging
waitidle (myaid, newstage, oldstage) = atomically $ do
pool <- waitIdleWorkerSlot newstage =<< takeTMVar tv
putTMVar tv $ addWorkerPool (ActiveWorker myaid newstage) pool
return (Just oldstage)
-- | Waits until there's an idle StartStage worker in the worker pool,
-- removes it from the pool, and returns its state.
--
-- If the worker pool is not already allocated, returns Nothing.
waitStartWorkerSlot :: TMVar (WorkerPool Annex.AnnexState) -> STM (Maybe (Annex.AnnexState, WorkerStage))
waitStartWorkerSlot tv = do
pool <- takeTMVar tv
st <- go pool
return $ Just (st, StartStage)
where
go pool = case spareVals pool of
[] -> retry
(v:vs) -> do
let pool' = pool { spareVals = vs }
putTMVar tv =<< waitIdleWorkerSlot StartStage pool'
return v
waitIdleWorkerSlot :: WorkerStage -> WorkerPool Annex.AnnexState -> STM (WorkerPool Annex.AnnexState)
waitIdleWorkerSlot wantstage = maybe retry return . getIdleWorkerSlot wantstage
getIdleWorkerSlot :: WorkerStage -> WorkerPool Annex.AnnexState -> Maybe (WorkerPool Annex.AnnexState)
getIdleWorkerSlot wantstage pool = do
l <- findidle [] (workerList pool)
return $ pool { workerList = l }
where
findidle _ [] = Nothing
findidle c ((IdleWorker stage):rest)
| stage == wantstage = Just (c ++ rest)
findidle c (w:rest) = findidle (w:c) rest

288
Annex/YoutubeDl.hs Normal file
View file

@ -0,0 +1,288 @@
{- youtube-dl integration for git-annex
-
- Copyright 2017-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.YoutubeDl (
youtubeDl,
youtubeDlTo,
youtubeDlSupported,
youtubeDlCheck,
youtubeDlFileName,
youtubeDlFileNameHtmlOnly,
) where
import Annex.Common
import qualified Annex
import Annex.Content
import Annex.Url
import Utility.DiskFree
import Utility.HtmlDetect
import Utility.Process.Transcript
import Utility.Metered
import Utility.DataUnits
import Messages.Progress
import Logs.Transfer
import Network.URI
import Control.Concurrent.Async
import Data.Char
import Text.Read
-- youtube-dl can follow redirects to anywhere, including potentially
-- localhost or a private address. So, it's only allowed to download
-- content if the user has allowed access to all addresses.
youtubeDlAllowed :: Annex Bool
youtubeDlAllowed = ipAddressesUnlimited
youtubeDlNotAllowedMessage :: String
youtubeDlNotAllowedMessage = unwords
[ "This url is supported by youtube-dl, but"
, "youtube-dl could potentially access any address, and the"
, "configuration of annex.security.allowed-ip-addresses"
, "does not allow that. Not using youtube-dl."
]
-- Runs youtube-dl in a work directory, to download a single media file
-- from the url. Reutrns the path to the media file in the work directory.
--
-- Displays a progress meter as youtube-dl downloads.
--
-- If youtube-dl fails without writing any files to the work directory,
-- or is not installed, returns Right Nothing.
--
-- The work directory can contain files from a previous run of youtube-dl
-- and it will resume. It should not contain any other files though,
-- and youtube-dl needs to finish up with only one file in the directory
-- so we know which one it downloaded.
--
-- (Note that we can't use --output to specifiy the file to download to,
-- due to <https://github.com/rg3/youtube-dl/issues/14864>)
youtubeDl :: URLString -> FilePath -> MeterUpdate -> Annex (Either String (Maybe FilePath))
youtubeDl url workdir p = ifM ipAddressesUnlimited
( withUrlOptions $ youtubeDl' url workdir p
, return $ Left youtubeDlNotAllowedMessage
)
youtubeDl' :: URLString -> FilePath -> MeterUpdate -> UrlOptions -> Annex (Either String (Maybe FilePath))
youtubeDl' url workdir p uo
| supportedScheme uo url = ifM (liftIO $ inPath "youtube-dl")
( runcmd >>= \case
Right True -> workdirfiles >>= \case
(f:[]) -> return (Right (Just f))
[] -> return nofiles
fs -> return (toomanyfiles fs)
Right False -> workdirfiles >>= \case
[] -> return (Right Nothing)
_ -> return (Left "youtube-dl download is incomplete. Run the command again to resume.")
Left msg -> return (Left msg)
, return (Right Nothing)
)
| otherwise = return (Right Nothing)
where
nofiles = Left "youtube-dl did not put any media in its work directory, perhaps it's been configured to store files somewhere else?"
toomanyfiles fs = Left $ "youtube-dl downloaded multiple media files; git-annex is only able to deal with one per url: " ++ show fs
workdirfiles = liftIO $ filterM (doesFileExist) =<< dirContents workdir
runcmd = youtubeDlMaxSize workdir >>= \case
Left msg -> return (Left msg)
Right maxsize -> do
opts <- youtubeDlOpts (dlopts ++ maxsize)
oh <- mkOutputHandlerQuiet
-- The size is unknown to start. Once youtube-dl
-- outputs some progress, the meter will be updated
-- with the size, which is why it's important the
-- meter is passed into commandMeter'
let unknownsize = Nothing :: Maybe FileSize
ok <- metered (Just p) unknownsize $ \meter meterupdate ->
liftIO $ commandMeter'
parseYoutubeDlProgress oh (Just meter) meterupdate "youtube-dl" opts
(\pr -> pr { cwd = Just workdir })
return (Right ok)
dlopts =
[ Param url
-- To make youtube-dl only download one file when given a
-- page with a video and a playlist, download only the video.
, Param "--no-playlist"
-- And when given a page with only a playlist, download only
-- the first video on the playlist. (Assumes the video is
-- somewhat stable, but this is the only way to prevent
-- youtube-dl from downloading the whole playlist.)
, Param "--playlist-items", Param "0"
]
-- To honor annex.diskreserve, ask youtube-dl to not download too
-- large a media file. Factors in other downloads that are in progress,
-- and any files in the workdir that it may have partially downloaded
-- before.
youtubeDlMaxSize :: FilePath -> Annex (Either String [CommandParam])
youtubeDlMaxSize workdir = ifM (Annex.getState Annex.force)
( return $ Right []
, liftIO (getDiskFree workdir) >>= \case
Just have -> do
inprogress <- sizeOfDownloadsInProgress (const True)
partial <- liftIO $ sum
<$> (mapM getFileSize =<< dirContents workdir)
reserve <- annexDiskReserve <$> Annex.getGitConfig
let maxsize = have - reserve - inprogress + partial
if maxsize > 0
then return $ Right
[ Param "--max-filesize"
, Param (show maxsize)
]
else return $ Left $
needMoreDiskSpace $
negate maxsize + 1024
Nothing -> return $ Right []
)
-- Download a media file to a destination,
youtubeDlTo :: Key -> URLString -> FilePath -> MeterUpdate -> Annex Bool
youtubeDlTo key url dest p = do
res <- withTmpWorkDir key $ \workdir ->
youtubeDl url workdir p >>= \case
Right (Just mediafile) -> do
liftIO $ renameFile mediafile dest
return (Just True)
Right Nothing -> return (Just False)
Left msg -> do
warning msg
return Nothing
return (fromMaybe False res)
-- youtube-dl supports downloading urls that are not html pages,
-- but we don't want to use it for such urls, since they can be downloaded
-- without it. So, this first downloads part of the content and checks
-- if it's a html page; only then is youtube-dl used.
htmlOnly :: URLString -> a -> Annex a -> Annex a
htmlOnly url fallback a = withUrlOptions $ \uo ->
liftIO (downloadPartial url uo htmlPrefixLength) >>= \case
Just bs | isHtmlBs bs -> a
_ -> return fallback
-- Check if youtube-dl supports downloading content from an url.
youtubeDlSupported :: URLString -> Annex Bool
youtubeDlSupported url = either (const False) id
<$> withUrlOptions (youtubeDlCheck' url)
-- Check if youtube-dl can find media in an url.
--
-- While this does not download anything, it checks youtubeDlAllowed
-- for symmetry with youtubeDl; the check should not succeed if the
-- download won't succeed.
youtubeDlCheck :: URLString -> Annex (Either String Bool)
youtubeDlCheck url = ifM youtubeDlAllowed
( withUrlOptions $ youtubeDlCheck' url
, return $ Left youtubeDlNotAllowedMessage
)
youtubeDlCheck' :: URLString -> UrlOptions -> Annex (Either String Bool)
youtubeDlCheck' url uo
| supportedScheme uo url = catchMsgIO $ htmlOnly url False $ do
opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing
| otherwise = return (Right False)
-- Ask youtube-dl for the filename of media in an url.
--
-- (This is not always identical to the filename it uses when downloading.)
youtubeDlFileName :: URLString -> Annex (Either String FilePath)
youtubeDlFileName url = withUrlOptions go
where
go uo
| supportedScheme uo url = flip catchIO (pure . Left . show) $
htmlOnly url nomedia (youtubeDlFileNameHtmlOnly' url uo)
| otherwise = return nomedia
nomedia = Left "no media in url"
-- Does not check if the url contains htmlOnly; use when that's already
-- been verified.
youtubeDlFileNameHtmlOnly :: URLString -> Annex (Either String FilePath)
youtubeDlFileNameHtmlOnly = withUrlOptions . youtubeDlFileNameHtmlOnly'
youtubeDlFileNameHtmlOnly' :: URLString -> UrlOptions -> Annex (Either String FilePath)
youtubeDlFileNameHtmlOnly' url uo
| supportedScheme uo url = flip catchIO (pure . Left . show) go
| otherwise = return nomedia
where
go = do
-- Sometimes youtube-dl will fail with an ugly backtrace
-- (eg, http://bugs.debian.org/874321)
-- so catch stderr as well as stdout to avoid the user
-- seeing it. --no-warnings avoids warning messages that
-- are output to stdout.
opts <- youtubeDlOpts
[ Param url
, Param "--get-filename"
, Param "--no-warnings"
, Param "--no-playlist"
]
let p = (proc "youtube-dl" (toCommand opts))
{ std_out = CreatePipe
, std_err = CreatePipe
}
liftIO $ withCreateProcess p waitproc
waitproc Nothing (Just o) (Just e) pid = do
output <- fmap fst $
hGetContentsStrict o
`concurrently`
hGetContentsStrict e
ok <- liftIO $ checkSuccessProcess pid
return $ case (ok, lines output) of
(True, (f:_)) | not (null f) -> Right f
_ -> nomedia
waitproc _ _ _ _ = error "internal"
nomedia = Left "no media in url"
youtubeDlOpts :: [CommandParam] -> Annex [CommandParam]
youtubeDlOpts addopts = do
opts <- map Param . annexYoutubeDlOptions <$> Annex.getGitConfig
return (opts ++ addopts)
supportedScheme :: UrlOptions -> URLString -> Bool
supportedScheme uo url = case parseURIRelaxed url of
Nothing -> False
Just u -> case uriScheme u of
-- avoid ugly message from youtube-dl about not supporting file:
"file:" -> False
-- ftp indexes may look like html pages, and there's no point
-- involving youtube-dl in a ftp download
"ftp:" -> False
_ -> allowedScheme uo u
{- Strategy: Look for chunks prefixed with \r, which look approximately
- like this:
- "ESC[K[download] 26.6% of 60.22MiB at 254.69MiB/s ETA 00:00"
- Look at the number before "% of " and the number and unit after,
- to determine the number of bytes.
-}
parseYoutubeDlProgress :: ProgressParser
parseYoutubeDlProgress = go [] . reverse . progresschunks
where
delim = '\r'
progresschunks = drop 1 . splitc delim
go remainder [] = (Nothing, Nothing, remainder)
go remainder (x:xs) = case split "% of " x of
(p:r:[]) -> case (parsepercent p, parsebytes r) of
(Just percent, Just total) ->
( Just (toBytesProcessed (calc percent total))
, Just (TotalSize total)
, remainder
)
_ -> go (delim:x++remainder) xs
_ -> go (delim:x++remainder) xs
calc :: Double -> Integer -> Integer
calc percent total = round (percent * fromIntegral total / 100)
parsepercent :: String -> Maybe Double
parsepercent = readMaybe . reverse . takeWhile (not . isSpace) . reverse
parsebytes = readSize units . takeWhile (not . isSpace)
units = memoryUnits ++ storageUnits

195
Assistant.hs Normal file
View file

@ -0,0 +1,195 @@
{- git-annex assistant daemon
-
- Copyright 2012-2013 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Assistant where
import qualified Annex
import Assistant.Common
import Assistant.DaemonStatus
import Assistant.NamedThread
import Assistant.Types.ThreadedMonad
import Assistant.Threads.DaemonStatus
import Assistant.Threads.Watcher
import Assistant.Threads.Committer
import Assistant.Threads.Pusher
import Assistant.Threads.Exporter
import Assistant.Threads.Merger
import Assistant.Threads.TransferWatcher
import Assistant.Threads.Transferrer
import Assistant.Threads.RemoteControl
import Assistant.Threads.SanityChecker
import Assistant.Threads.Cronner
import Assistant.Threads.ProblemFixer
#ifndef mingw32_HOST_OS
import Assistant.Threads.MountWatcher
#endif
import Assistant.Threads.NetWatcher
import Assistant.Threads.Upgrader
import Assistant.Threads.UpgradeWatcher
import Assistant.Threads.TransferScanner
import Assistant.Threads.TransferPoller
import Assistant.Threads.ConfigMonitor
import Assistant.Threads.Glacier
#ifdef WITH_WEBAPP
import Assistant.WebApp
import Assistant.Threads.WebApp
#ifdef WITH_PAIRING
import Assistant.Threads.PairListener
#endif
#else
import Assistant.Types.UrlRenderer
#endif
import qualified Utility.Daemon
import Utility.ThreadScheduler
import Utility.HumanTime
import qualified BuildInfo
import Annex.Perms
import Annex.BranchState
import Utility.LogFile
#ifdef mingw32_HOST_OS
import Utility.Env
import Annex.Path
import System.Environment (getArgs)
#endif
import System.Log.Logger
import Network.Socket (HostName)
stopDaemon :: Annex ()
stopDaemon = liftIO . Utility.Daemon.stopDaemon =<< fromRepo gitAnnexPidFile
{- Starts the daemon. If the daemon is run in the foreground, once it's
- running, can start the browser.
-
- startbrowser is passed the url and html shim file, as well as the original
- stdout and stderr descriptors. -}
startDaemon :: Bool -> Bool -> Maybe Duration -> Maybe String -> Maybe HostName -> Maybe (Maybe Handle -> Maybe Handle -> String -> FilePath -> IO ()) -> Annex ()
startDaemon assistant foreground startdelay cannotrun listenhost startbrowser = do
Annex.changeState $ \s -> s { Annex.daemon = True }
enableInteractiveBranchAccess
pidfile <- fromRepo gitAnnexPidFile
logfile <- fromRepo gitAnnexLogFile
liftIO $ debugM desc $ "logging to " ++ logfile
createAnnexDirectory (parentDir pidfile)
#ifndef mingw32_HOST_OS
createAnnexDirectory (parentDir logfile)
logfd <- liftIO $ handleToFd =<< openLog logfile
if foreground
then do
origout <- liftIO $ catchMaybeIO $
fdToHandle =<< dup stdOutput
origerr <- liftIO $ catchMaybeIO $
fdToHandle =<< dup stdError
let undaemonize = Utility.Daemon.foreground logfd (Just pidfile)
start undaemonize $
case startbrowser of
Nothing -> Nothing
Just a -> Just $ a origout origerr
else
start (Utility.Daemon.daemonize logfd (Just pidfile) False) Nothing
#else
-- Windows doesn't daemonize, but does redirect output to the
-- log file. The only way to do so is to restart the program.
when (foreground || not foreground) $ do
let flag = "GIT_ANNEX_OUTPUT_REDIR"
createAnnexDirectory (parentDir logfile)
ifM (liftIO $ isNothing <$> getEnv flag)
( liftIO $ withNullHandle $ \nullh -> do
loghandle <- openLog logfile
e <- getEnvironment
cmd <- programPath
ps <- getArgs
let p = (proc cmd ps)
{ env = Just (addEntry flag "1" e)
, std_in = UseHandle nullh
, std_out = UseHandle loghandle
, std_err = UseHandle loghandle
}
exitcode <- withCreateProcess p $ \_ _ _ pid ->
waitForProcess pid
exitWith exitcode
, start (Utility.Daemon.foreground (Just pidfile)) $
case startbrowser of
Nothing -> Nothing
Just a -> Just $ a Nothing Nothing
)
#endif
where
desc
| assistant = "assistant"
| otherwise = "watch"
start daemonize webappwaiter = withThreadState $ \st -> do
checkCanWatch
dstatus <- startDaemonStatus
logfile <- fromRepo gitAnnexLogFile
liftIO $ debugM desc $ "logging to " ++ logfile
liftIO $ daemonize $
flip runAssistant (go webappwaiter)
=<< newAssistantData st dstatus
#ifdef WITH_WEBAPP
go webappwaiter = do
d <- getAssistant id
#else
go _webappwaiter = do
#endif
notice ["starting", desc, "version", BuildInfo.packageversion]
urlrenderer <- liftIO newUrlRenderer
#ifdef WITH_WEBAPP
let webappthread = [ assist $ webAppThread d urlrenderer False cannotrun Nothing listenhost webappwaiter ]
#else
let webappthread = []
#endif
let threads = if isJust cannotrun
then webappthread
else webappthread ++
[ watch commitThread
#ifdef WITH_WEBAPP
#ifdef WITH_PAIRING
, assist $ pairListenerThread urlrenderer
#endif
#endif
, assist pushThread
, assist pushRetryThread
, assist exportThread
, assist exportRetryThread
, assist mergeThread
, assist transferWatcherThread
, assist transferPollerThread
, assist transfererThread
, assist remoteControlThread
, assist daemonStatusThread
, assist $ sanityCheckerDailyThread urlrenderer
, assist sanityCheckerHourlyThread
, assist $ problemFixerThread urlrenderer
#ifndef mingw32_HOST_OS
, assist $ mountWatcherThread urlrenderer
#endif
, assist netWatcherThread
, assist $ upgraderThread urlrenderer
, assist $ upgradeWatcherThread urlrenderer
, assist netWatcherFallbackThread
, assist $ transferScannerThread urlrenderer
, assist $ cronnerThread urlrenderer
, assist configMonitorThread
, assist glacierThread
, watch watchThread
-- must come last so that all threads that wait
-- on it have already started waiting
, watch $ sanityCheckerStartupThread startdelay
]
mapM_ (startthread urlrenderer) threads
liftIO waitForTermination
watch a = (True, a)
assist a = (False, a)
startthread urlrenderer (watcher, t)
| watcher || assistant = startNamedThread urlrenderer t
| otherwise = noop

460
Assistant/Alert.hs Normal file
View file

@ -0,0 +1,460 @@
{- git-annex assistant alerts
-
- Copyright 2012-2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings, CPP, BangPatterns #-}
module Assistant.Alert where
import Annex.Common
import Assistant.Types.Alert
import Assistant.Alert.Utility
import qualified Remote
import Utility.Tense
import Types.Transfer
import Types.Distribution
import Git.Types (RemoteName)
import Data.String
import qualified Data.Text as T
import qualified Control.Exception as E
#ifdef WITH_WEBAPP
import Assistant.DaemonStatus
import Assistant.WebApp.Types
import Assistant.WebApp (renderUrl)
#endif
import Assistant.Monad
import Assistant.Types.UrlRenderer
{- Makes a button for an alert that opens a Route.
-
- If autoclose is set, the button will close the alert it's
- attached to when clicked. -}
#ifdef WITH_WEBAPP
mkAlertButton :: Bool -> T.Text -> UrlRenderer -> Route WebApp -> Assistant AlertButton
mkAlertButton autoclose label urlrenderer route = do
close <- asIO1 removeAlert
url <- liftIO $ renderUrl urlrenderer route []
return $ AlertButton
{ buttonLabel = label
, buttonUrl = url
, buttonAction = if autoclose then Just close else Nothing
, buttonPrimary = True
}
#endif
renderData :: Alert -> TenseText
renderData = tenseWords . alertData
baseActivityAlert :: Alert
baseActivityAlert = Alert
{ alertClass = Activity
, alertHeader = Nothing
, alertMessageRender = renderData
, alertData = []
, alertCounter = 0
, alertBlockDisplay = False
, alertClosable = False
, alertPriority = Medium
, alertIcon = Just ActivityIcon
, alertCombiner = Nothing
, alertName = Nothing
, alertButtons = []
}
warningAlert :: String -> String -> Alert
warningAlert name msg = Alert
{ alertClass = Warning
, alertHeader = Just $ tenseWords ["warning"]
, alertMessageRender = renderData
, alertData = [UnTensed $ T.pack msg]
, alertCounter = 0
, alertBlockDisplay = True
, alertClosable = True
, alertPriority = High
, alertIcon = Just ErrorIcon
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertName = Just $ WarningAlert name
, alertButtons = []
}
errorAlert :: String -> [AlertButton] -> Alert
errorAlert msg buttons = Alert
{ alertClass = Error
, alertHeader = Nothing
, alertMessageRender = renderData
, alertData = [UnTensed $ T.pack msg]
, alertCounter = 0
, alertBlockDisplay = True
, alertClosable = True
, alertPriority = Pinned
, alertIcon = Just ErrorIcon
, alertCombiner = Nothing
, alertName = Nothing
, alertButtons = buttons
}
activityAlert :: Maybe TenseText -> [TenseChunk] -> Alert
activityAlert header dat = baseActivityAlert
{ alertHeader = header
, alertData = dat
}
startupScanAlert :: Alert
startupScanAlert = activityAlert Nothing
[Tensed "Performing" "Performed", "startup scan"]
{- Displayed when a shutdown is occurring, so will be seen after shutdown
- has happened. -}
shutdownAlert :: Alert
shutdownAlert = warningAlert "shutdown" "git-annex has been shut down"
commitAlert :: Alert
commitAlert = activityAlert Nothing
[Tensed "Committing" "Committed", "changes to git"]
showRemotes :: [RemoteName] -> TenseChunk
showRemotes = UnTensed . T.intercalate ", " . map T.pack
syncAlert :: [Remote] -> Alert
syncAlert = syncAlert' . map Remote.name
syncAlert' :: [RemoteName] -> Alert
syncAlert' rs = baseActivityAlert
{ alertName = Just SyncAlert
, alertHeader = Just $ tenseWords
[Tensed "Syncing" "Synced", "with", showRemotes rs]
, alertPriority = Low
, alertIcon = Just SyncIcon
}
syncResultAlert :: [Remote] -> [Remote] -> Alert
syncResultAlert succeeded failed = syncResultAlert'
(map Remote.name succeeded)
(map Remote.name failed)
syncResultAlert' :: [RemoteName] -> [RemoteName] -> Alert
syncResultAlert' succeeded failed = makeAlertFiller (not $ null succeeded) $
baseActivityAlert
{ alertName = Just SyncAlert
, alertHeader = Just $ tenseWords msg
}
where
msg
| null succeeded = ["Failed to sync with", showRemotes failed]
| null failed = ["Synced with", showRemotes succeeded]
| otherwise =
[ "Synced with", showRemotes succeeded
, "but not with", showRemotes failed
]
sanityCheckAlert :: Alert
sanityCheckAlert = activityAlert
(Just $ tenseWords [Tensed "Running" "Ran", "daily sanity check"])
["to make sure everything is ok."]
sanityCheckFixAlert :: String -> Alert
sanityCheckFixAlert msg = Alert
{ alertClass = Warning
, alertHeader = Just $ tenseWords ["Fixed a problem"]
, alertMessageRender = render
, alertData = [UnTensed $ T.pack msg]
, alertCounter = 0
, alertBlockDisplay = True
, alertPriority = High
, alertClosable = True
, alertIcon = Just ErrorIcon
, alertName = Just SanityCheckFixAlert
, alertCombiner = Just $ dataCombiner (++)
, alertButtons = []
}
where
render alert = tenseWords $ alerthead : alertData alert ++ [alertfoot]
alerthead = "The daily sanity check found and fixed a problem:"
alertfoot = "If these problems persist, consider filing a bug report."
fsckingAlert :: AlertButton -> Maybe Remote -> Alert
fsckingAlert button mr = baseActivityAlert
{ alertData = case mr of
Nothing -> [ UnTensed $ T.pack $ "Consistency check in progress" ]
Just r -> [ UnTensed $ T.pack $ "Consistency check of " ++ Remote.name r ++ " in progress"]
, alertButtons = [button]
}
showFscking :: UrlRenderer -> Maybe Remote -> IO (Either E.SomeException a) -> Assistant a
showFscking urlrenderer mr a = do
#ifdef WITH_WEBAPP
button <- mkAlertButton False (T.pack "Configure") urlrenderer ConfigFsckR
r <- alertDuring (fsckingAlert button mr) $
liftIO a
#else
r <- liftIO a
#endif
either (liftIO . E.throwIO) return r
notFsckedNudge :: UrlRenderer -> Maybe Remote -> Assistant ()
#ifdef WITH_WEBAPP
notFsckedNudge urlrenderer mr = do
button <- mkAlertButton True (T.pack "Configure") urlrenderer ConfigFsckR
void $ addAlert (notFsckedAlert mr button)
#else
notFsckedNudge _ _ = noop
#endif
notFsckedAlert :: Maybe Remote -> AlertButton -> Alert
notFsckedAlert mr button = Alert
{ alertHeader = Just $ fromString $ concat
[ "You should enable consistency checking to protect your data"
, maybe "" (\r -> " in " ++ Remote.name r) mr
, "."
]
, alertIcon = Just InfoIcon
, alertPriority = High
, alertButtons = [button]
, alertClosable = True
, alertClass = Message
, alertMessageRender = renderData
, alertCounter = 0
, alertBlockDisplay = True
, alertName = Just NotFsckedAlert
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertData = []
}
baseUpgradeAlert :: [AlertButton] -> TenseText -> Alert
baseUpgradeAlert buttons message = Alert
{ alertHeader = Just message
, alertIcon = Just UpgradeIcon
, alertPriority = High
, alertButtons = buttons
, alertClosable = True
, alertClass = Message
, alertMessageRender = renderData
, alertCounter = 0
, alertBlockDisplay = True
, alertName = Just UpgradeAlert
, alertCombiner = Just $ fullCombiner $ \new _old -> new
, alertData = []
}
canUpgradeAlert :: AlertPriority -> GitAnnexVersion -> AlertButton -> Alert
canUpgradeAlert priority version button =
(baseUpgradeAlert [button] $ fromString msg)
{ alertPriority = priority
, alertData = [fromString $ " (version " ++ version ++ ")"]
}
where
msg = if priority >= High
then "An important upgrade of git-annex is available!"
else "An upgrade of git-annex is available."
upgradeReadyAlert :: AlertButton -> Alert
upgradeReadyAlert button = baseUpgradeAlert [button] $
fromString "A new version of git-annex has been installed."
upgradingAlert :: Alert
upgradingAlert = activityAlert Nothing [ fromString "Upgrading git-annex" ]
upgradeFinishedAlert :: Maybe AlertButton -> GitAnnexVersion -> Alert
upgradeFinishedAlert button version =
baseUpgradeAlert (maybeToList button) $ fromString $
"Finished upgrading git-annex to version " ++ version
upgradeFailedAlert :: String -> Alert
upgradeFailedAlert msg = (errorAlert msg [])
{ alertHeader = Just $ fromString "Upgrade failed." }
unusedFilesAlert :: [AlertButton] -> String -> Alert
unusedFilesAlert buttons message = Alert
{ alertHeader = Just $ fromString $ unwords
[ "Old and deleted files are piling up --"
, message
]
, alertIcon = Just InfoIcon
, alertPriority = High
, alertButtons = buttons
, alertClosable = True
, alertClass = Message
, alertMessageRender = renderData
, alertCounter = 0
, alertBlockDisplay = True
, alertName = Just UnusedFilesAlert
, alertCombiner = Just $ fullCombiner $ \new _old -> new
, alertData = []
}
brokenRepositoryAlert :: [AlertButton] -> Alert
brokenRepositoryAlert = errorAlert "Serious problems have been detected with your repository. This needs your immediate attention!"
repairingAlert :: String -> Alert
repairingAlert repodesc = activityAlert Nothing
[ Tensed "Attempting to repair" "Repaired"
, UnTensed $ T.pack repodesc
]
pairingAlert :: AlertButton -> Alert
pairingAlert button = baseActivityAlert
{ alertData = [ UnTensed "Pairing in progress" ]
, alertPriority = High
, alertButtons = [button]
}
pairRequestReceivedAlert :: String -> AlertButton -> Alert
pairRequestReceivedAlert who button = Alert
{ alertClass = Message
, alertHeader = Nothing
, alertMessageRender = renderData
, alertData = [UnTensed $ T.pack $ who ++ " is sending a pair request."]
, alertCounter = 0
, alertBlockDisplay = False
, alertPriority = High
, alertClosable = True
, alertIcon = Just InfoIcon
, alertName = Just $ PairAlert who
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertButtons = [button]
}
pairRequestAcknowledgedAlert :: String -> Maybe AlertButton -> Alert
pairRequestAcknowledgedAlert who button = baseActivityAlert
{ alertData = ["Pairing with", UnTensed (T.pack who), Tensed "in progress" "complete"]
, alertPriority = High
, alertName = Just $ PairAlert who
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertButtons = maybeToList button
}
connectionNeededAlert :: AlertButton -> Alert
connectionNeededAlert button = Alert
{ alertHeader = Just "Share with friends, and keep your devices in sync across the cloud."
, alertIcon = Just ConnectionIcon
, alertPriority = High
, alertButtons = [button]
, alertClosable = True
, alertClass = Message
, alertMessageRender = renderData
, alertCounter = 0
, alertBlockDisplay = True
, alertName = Just ConnectionNeededAlert
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertData = []
}
cloudRepoNeededAlert :: Maybe String -> AlertButton -> Alert
cloudRepoNeededAlert friendname button = Alert
{ alertHeader = Just $ fromString $ unwords
[ "Unable to download files from"
, (fromMaybe "your other devices" friendname) ++ "."
]
, alertIcon = Just ErrorIcon
, alertPriority = High
, alertButtons = [button]
, alertClosable = True
, alertClass = Message
, alertMessageRender = renderData
, alertCounter = 0
, alertBlockDisplay = True
, alertName = Just $ CloudRepoNeededAlert
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertData = []
}
remoteRemovalAlert :: String -> AlertButton -> Alert
remoteRemovalAlert desc button = Alert
{ alertHeader = Just $ fromString $
"The repository \"" ++ desc ++
"\" has been emptied, and can now be removed."
, alertIcon = Just InfoIcon
, alertPriority = High
, alertButtons = [button]
, alertClosable = True
, alertClass = Message
, alertMessageRender = renderData
, alertCounter = 0
, alertBlockDisplay = True
, alertName = Just $ RemoteRemovalAlert desc
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertData = []
}
{- Show a message that relates to a list of files.
-
- The most recent several files are shown, and a count of any others. -}
fileAlert :: TenseChunk -> [FilePath] -> Alert
fileAlert msg files = (activityAlert Nothing shortfiles)
{ alertName = Just $ FileAlert msg
, alertMessageRender = renderer
, alertCounter = counter
, alertCombiner = Just $ fullCombiner combiner
}
where
maxfilesshown = 10
(!somefiles, !counter) = splitcounter (dedupadjacent files)
!shortfiles = map (fromString . shortFile . takeFileName) somefiles
renderer alert = tenseWords $ msg : alertData alert ++ showcounter
where
showcounter = case alertCounter alert of
0 -> []
_ -> [fromString $ "and " ++ show (alertCounter alert) ++ " other files"]
dedupadjacent (x:y:rest)
| x == y = dedupadjacent (y:rest)
| otherwise = x : dedupadjacent (y:rest)
dedupadjacent (x:[]) = [x]
dedupadjacent [] = []
{- Note that this ensures the counter is never 1; no need to say
- "1 file" when the filename could be shown. -}
splitcounter l
| length l <= maxfilesshown = (l, 0)
| otherwise =
let (keep, rest) = splitAt (maxfilesshown - 1) l
in (keep, length rest)
combiner new old =
let (!fs, n) = splitcounter $
dedupadjacent $ alertData new ++ alertData old
!cnt = n + alertCounter new + alertCounter old
in old
{ alertData = fs
, alertCounter = cnt
}
addFileAlert :: [FilePath] -> Alert
addFileAlert = fileAlert (Tensed "Adding" "Added")
{- This is only used as a success alert after a transfer, not during it. -}
transferFileAlert :: Direction -> Bool -> FilePath -> Alert
transferFileAlert direction True file
| direction == Upload = fileAlert "Uploaded" [file]
| otherwise = fileAlert "Downloaded" [file]
transferFileAlert direction False file
| direction == Upload = fileAlert "Upload failed" [file]
| otherwise = fileAlert "Download failed" [file]
dataCombiner :: ([TenseChunk] -> [TenseChunk] -> [TenseChunk]) -> AlertCombiner
dataCombiner combiner = fullCombiner $
\new old -> old { alertData = alertData new `combiner` alertData old }
fullCombiner :: (Alert -> Alert -> Alert) -> AlertCombiner
fullCombiner combiner new old
| alertClass new /= alertClass old = Nothing
| alertName new == alertName old =
Just $! new `combiner` old
| otherwise = Nothing
shortFile :: FilePath -> String
shortFile f
| len < maxlen = f
| otherwise = take half f ++ ".." ++ drop (len - half) f
where
len = length f
maxlen = 20
half = (maxlen - 2) `div` 2

129
Assistant/Alert/Utility.hs Normal file
View file

@ -0,0 +1,129 @@
{- git-annex assistant alert utilities
-
- Copyright 2012, 2013 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Assistant.Alert.Utility where
import Annex.Common
import Assistant.Types.Alert
import Utility.Tense
import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Map.Strict as M
{- This is as many alerts as it makes sense to display at a time.
- A display might be smaller, or larger, the point is to not overwhelm the
- user with a ton of alerts. -}
displayAlerts :: Int
displayAlerts = 6
{- This is not a hard maximum, but there's no point in keeping a great
- many filler alerts in an AlertMap, so when there's more than this many,
- they start being pruned, down toward displayAlerts. -}
maxAlerts :: Int
maxAlerts = displayAlerts * 2
type AlertPair = (AlertId, Alert)
{- The desired order is the reverse of:
-
- - Pinned alerts
- - High priority alerts, newest first
- - Medium priority Activity, newest first (mostly used for Activity)
- - Low priority alerts, newest first
- - Filler priorty alerts, newest first
- - Ties are broken by the AlertClass, with Errors etc coming first.
-}
compareAlertPairs :: AlertPair -> AlertPair -> Ordering
compareAlertPairs
(aid, Alert { alertClass = aclass, alertPriority = aprio })
(bid, Alert { alertClass = bclass, alertPriority = bprio })
= compare aprio bprio
`mappend` compare aid bid
`mappend` compare aclass bclass
sortAlertPairs :: [AlertPair] -> [AlertPair]
sortAlertPairs = sortBy compareAlertPairs
{- Renders an alert's header for display, if it has one. -}
renderAlertHeader :: Alert -> Maybe Text
renderAlertHeader alert = renderTense (alertTense alert) <$> alertHeader alert
{- Renders an alert's message for display. -}
renderAlertMessage :: Alert -> Text
renderAlertMessage alert = renderTense (alertTense alert) $
(alertMessageRender alert) alert
showAlert :: Alert -> String
showAlert alert = T.unpack $ T.unwords $ catMaybes
[ renderAlertHeader alert
, Just $ renderAlertMessage alert
]
alertTense :: Alert -> Tense
alertTense alert
| alertClass alert == Activity = Present
| otherwise = Past
{- Checks if two alerts display the same. -}
effectivelySameAlert :: Alert -> Alert -> Bool
effectivelySameAlert x y = all id
[ alertClass x == alertClass y
, alertHeader x == alertHeader y
, alertData x == alertData y
, alertBlockDisplay x == alertBlockDisplay y
, alertClosable x == alertClosable y
, alertPriority x == alertPriority y
]
makeAlertFiller :: Bool -> Alert -> Alert
makeAlertFiller success alert
| isFiller alert = alert
| otherwise = alert
{ alertClass = if c == Activity then c' else c
, alertPriority = Filler
, alertClosable = True
, alertButtons = []
, alertIcon = Just $ if success then SuccessIcon else ErrorIcon
}
where
c = alertClass alert
c'
| success = Success
| otherwise = Error
isFiller :: Alert -> Bool
isFiller alert = alertPriority alert == Filler
{- Updates the Alertmap, adding or updating an alert.
-
- Any old filler that looks the same as the alert is removed.
-
- Or, if the alert has an alertCombiner that combines it with
- an old alert, the old alert is replaced with the result, and the
- alert is removed.
-
- Old filler alerts are pruned once maxAlerts is reached.
-}
mergeAlert :: AlertId -> Alert -> AlertMap -> AlertMap
mergeAlert i al m = maybe updatePrune updateCombine (alertCombiner al)
where
pruneSame k al' = k == i || not (effectivelySameAlert al al')
pruneBloat m'
| bloat > 0 = M.fromList $ pruneold $ M.toList m'
| otherwise = m'
where
bloat = M.size m' - maxAlerts
pruneold l =
let (f, rest) = partition (\(_, a) -> isFiller a) l
in drop bloat f ++ rest
updatePrune = pruneBloat $ M.filterWithKey pruneSame $ M.insert i al m
updateCombine combiner =
let combined = M.mapMaybe (combiner al) m
in if M.null combined
then updatePrune
else M.delete i $ M.union combined m

19
Assistant/BranchChange.hs Normal file
View file

@ -0,0 +1,19 @@
{- git-annex assistant git-annex branch change tracking
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Assistant.BranchChange where
import Assistant.Common
import Assistant.Types.BranchChange
import Control.Concurrent.MSampleVar
branchChanged :: Assistant ()
branchChanged = flip writeSV () <<~ (fromBranchChangeHandle . branchChangeHandle)
waitBranchChange :: Assistant ()
waitBranchChange = readSV <<~ (fromBranchChangeHandle . branchChangeHandle)

47
Assistant/Changes.hs Normal file
View file

@ -0,0 +1,47 @@
{- git-annex assistant change tracking
-
- Copyright 2012-2013 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Assistant.Changes where
import Assistant.Common
import Assistant.Types.Changes
import Utility.TList
import Data.Time.Clock
import Control.Concurrent.STM
{- Handlers call this when they made a change that needs to get committed. -}
madeChange :: FilePath -> ChangeInfo -> Assistant (Maybe Change)
madeChange f t = Just <$> (Change <$> liftIO getCurrentTime <*> pure f <*> pure t)
noChange :: Assistant (Maybe Change)
noChange = return Nothing
{- Indicates an add needs to be done, but has not started yet. -}
pendingAddChange :: FilePath -> Assistant (Maybe Change)
pendingAddChange f = Just <$> (PendingAddChange <$> liftIO getCurrentTime <*> pure f)
{- Gets all unhandled changes.
- Blocks until at least one change is made. -}
getChanges :: Assistant [Change]
getChanges = (atomically . getTList) <<~ changePool
{- Gets all unhandled changes, without blocking. -}
getAnyChanges :: Assistant [Change]
getAnyChanges = (atomically . takeTList) <<~ changePool
{- Puts unhandled changes back into the pool.
- Note: Original order is not preserved. -}
refillChanges :: [Change] -> Assistant ()
refillChanges cs = (atomically . flip appendTList cs) <<~ changePool
{- Records a change to the pool. -}
recordChange :: Change -> Assistant ()
recordChange c = (atomically . flip snocTList c) <<~ changePool
recordChanges :: [Change] -> Assistant ()
recordChanges = refillChanges

32
Assistant/Commits.hs Normal file
View file

@ -0,0 +1,32 @@
{- git-annex assistant commit tracking
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Assistant.Commits where
import Assistant.Common
import Assistant.Types.Commits
import Utility.TList
import Control.Concurrent.STM
{- Gets all unhandled commits.
- Blocks until at least one commit is made. -}
getCommits :: Assistant [Commit]
getCommits = (atomically . getTList) <<~ commitChan
{- Records a commit in the channel. -}
recordCommit :: Assistant ()
recordCommit = (atomically . flip consTList Commit) <<~ commitChan
{- Gets all unhandled export commits.
- Blocks until at least one export commit is made. -}
getExportCommits :: Assistant [Commit]
getExportCommits = (atomically . getTList) <<~ exportCommitChan
{- Records an export commit in the channel. -}
recordExportCommit :: Assistant ()
recordExportCommit = (atomically . flip consTList Commit) <<~ exportCommitChan

14
Assistant/Common.hs Normal file
View file

@ -0,0 +1,14 @@
{- Common infrastructure for the git-annex assistant.
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Assistant.Common (module X) where
import Annex.Common as X
import Assistant.Monad as X
import Assistant.Types.DaemonStatus as X
import Assistant.Types.NamedThread as X
import Assistant.Types.Alert as X

View file

@ -0,0 +1,53 @@
{- git-annex assistant CredPair cache.
-
- Copyright 2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
module Assistant.CredPairCache (
cacheCred,
getCachedCred,
expireCachedCred,
) where
import Assistant.Types.CredPairCache
import Types.Creds
import Assistant.Common
import Utility.ThreadScheduler
import qualified Data.Map as M
import Control.Concurrent
{- Caches a CredPair, but only for a limited time, after which it
- will expire.
-
- Note that repeatedly caching the same CredPair
- does not reset its expiry time.
-}
cacheCred :: CredPair -> Seconds -> Assistant ()
cacheCred (login, password) expireafter = do
cache <- getAssistant credPairCache
liftIO $ do
changeStrict cache $ M.insert login password
void $ forkIO $ do
threadDelaySeconds expireafter
changeStrict cache $ M.delete login
getCachedCred :: Login -> Assistant (Maybe Password)
getCachedCred login = do
cache <- getAssistant credPairCache
liftIO $ M.lookup login <$> readMVar cache
expireCachedCred :: Login -> Assistant ()
expireCachedCred login = do
cache <- getAssistant credPairCache
liftIO $ changeStrict cache $ M.delete login
{- Update map strictly to avoid keeping references to old creds in memory. -}
changeStrict :: CredPairCache -> (M.Map Login Password -> M.Map Login Password) -> IO ()
changeStrict cache a = modifyMVar_ cache $ \m -> do
let !m' = a m
return m'

267
Assistant/DaemonStatus.hs Normal file
View file

@ -0,0 +1,267 @@
{- git-annex assistant daemon status
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
module Assistant.DaemonStatus where
import Assistant.Common
import Assistant.Alert.Utility
import Utility.Tmp
import Utility.NotificationBroadcaster
import Types.Transfer
import Logs.Transfer
import Logs.Trust
import Utility.TimeStamp
import qualified Remote
import qualified Types.Remote as Remote
import Config.DynamicConfig
import Annex.SpecialRemote.Config
import Control.Concurrent.STM
import System.Posix.Types
import Data.Time.Clock.POSIX
import qualified Data.Map.Strict as M
import qualified Data.Set as S
getDaemonStatus :: Assistant DaemonStatus
getDaemonStatus = (atomically . readTVar) <<~ daemonStatusHandle
modifyDaemonStatus_ :: (DaemonStatus -> DaemonStatus) -> Assistant ()
modifyDaemonStatus_ a = modifyDaemonStatus $ \s -> (a s, ())
modifyDaemonStatus :: (DaemonStatus -> (DaemonStatus, b)) -> Assistant b
modifyDaemonStatus a = do
dstatus <- getAssistant daemonStatusHandle
liftIO $ do
(s, b) <- atomically $ do
r@(!s, _) <- a <$> readTVar dstatus
writeTVar dstatus s
return r
sendNotification $ changeNotifier s
return b
{- Returns a function that updates the lists of syncable remotes
- and other associated information. -}
calcSyncRemotes :: Annex (DaemonStatus -> DaemonStatus)
calcSyncRemotes = do
rs <- filterM (liftIO . getDynamicConfig . remoteAnnexSync . Remote.gitconfig)
=<< (concat . Remote.byCost <$> Remote.remoteList)
alive <- trustExclude DeadTrusted (map Remote.uuid rs)
let good r = Remote.uuid r `elem` alive
let syncable = filter good rs
contentremotes <- filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . Remote.gitconfig) $
filter (\r -> Remote.uuid r /= NoUUID) syncable
let (exportremotes, dataremotes) = partition (exportTree . Remote.config) contentremotes
return $ \dstatus -> dstatus
{ syncRemotes = syncable
, syncGitRemotes = filter (Remote.gitSyncableRemoteType . Remote.remotetype) syncable
, syncDataRemotes = dataremotes
, exportRemotes = exportremotes
, downloadRemotes = contentremotes
, syncingToCloudRemote = any iscloud contentremotes
}
where
iscloud r = not (Remote.readonly r) && Remote.availability r == Remote.GloballyAvailable
{- Updates the syncRemotes list from the list of all remotes in Annex state. -}
updateSyncRemotes :: Assistant ()
updateSyncRemotes = do
modifyDaemonStatus_ =<< liftAnnex calcSyncRemotes
status <- getDaemonStatus
liftIO $ sendNotification $ syncRemotesNotifier status
when (syncingToCloudRemote status) $
updateAlertMap $
M.filter $ \alert ->
alertName alert /= Just CloudRepoNeededAlert
changeCurrentlyConnected :: (S.Set UUID -> S.Set UUID) -> Assistant ()
changeCurrentlyConnected sm = do
modifyDaemonStatus_ $ \ds -> ds
{ currentlyConnectedRemotes = sm (currentlyConnectedRemotes ds)
}
v <- currentlyConnectedRemotes <$> getDaemonStatus
debug [show v]
liftIO . sendNotification =<< syncRemotesNotifier <$> getDaemonStatus
updateScheduleLog :: Assistant ()
updateScheduleLog =
liftIO . sendNotification =<< scheduleLogNotifier <$> getDaemonStatus
{- Load any previous daemon status file, and store it in a MVar for this
- process to use as its DaemonStatus. Also gets current transfer status. -}
startDaemonStatus :: Annex DaemonStatusHandle
startDaemonStatus = do
file <- fromRepo gitAnnexDaemonStatusFile
status <- liftIO $
flip catchDefaultIO (readDaemonStatusFile file) =<< newDaemonStatus
transfers <- M.fromList <$> getTransfers
addsync <- calcSyncRemotes
liftIO $ atomically $ newTVar $ addsync $ status
{ scanComplete = False
, sanityCheckRunning = False
, currentTransfers = transfers
}
{- Don't just dump out the structure, because it will change over time,
- and parts of it are not relevant. -}
writeDaemonStatusFile :: FilePath -> DaemonStatus -> IO ()
writeDaemonStatusFile file status =
viaTmp writeFile file =<< serialized <$> getPOSIXTime
where
serialized now = unlines
[ "lastRunning:" ++ show now
, "scanComplete:" ++ show (scanComplete status)
, "sanityCheckRunning:" ++ show (sanityCheckRunning status)
, "lastSanityCheck:" ++ maybe "" show (lastSanityCheck status)
]
readDaemonStatusFile :: FilePath -> IO DaemonStatus
readDaemonStatusFile file = parse <$> newDaemonStatus <*> readFile file
where
parse status = foldr parseline status . lines
parseline line status
| key == "lastRunning" = parseval parsePOSIXTime $ \v ->
status { lastRunning = Just v }
| key == "scanComplete" = parseval readish $ \v ->
status { scanComplete = v }
| key == "sanityCheckRunning" = parseval readish $ \v ->
status { sanityCheckRunning = v }
| key == "lastSanityCheck" = parseval parsePOSIXTime $ \v ->
status { lastSanityCheck = Just v }
| otherwise = status -- unparsable line
where
(key, value) = separate (== ':') line
parseval parser a = maybe status a (parser value)
{- Checks if a time stamp was made after the daemon was lastRunning.
-
- Some slop is built in; this really checks if the time stamp was made
- at least ten minutes after the daemon was lastRunning. This is to
- ensure the daemon shut down cleanly, and deal with minor clock skew.
-
- If the daemon has never ran before, this always returns False.
-}
afterLastDaemonRun :: EpochTime -> DaemonStatus -> Bool
afterLastDaemonRun timestamp status = maybe False (< t) (lastRunning status)
where
t = realToFrac (timestamp + slop) :: POSIXTime
slop = fromIntegral tenMinutes
tenMinutes :: Int
tenMinutes = 10 * 60
{- Mutates the transfer map. Runs in STM so that the transfer map can
- be modified in the same transaction that modifies the transfer queue.
- Note that this does not send a notification of the change; that's left
- to the caller. -}
adjustTransfersSTM :: DaemonStatusHandle -> (TransferMap -> TransferMap) -> STM ()
adjustTransfersSTM dstatus a = do
s <- readTVar dstatus
let !v = a (currentTransfers s)
writeTVar dstatus $ s { currentTransfers = v }
{- Checks if a transfer is currently running. -}
checkRunningTransferSTM :: DaemonStatusHandle -> Transfer -> STM Bool
checkRunningTransferSTM dstatus t = M.member t . currentTransfers
<$> readTVar dstatus
{- Alters a transfer's info, if the transfer is in the map. -}
alterTransferInfo :: Transfer -> (TransferInfo -> TransferInfo) -> Assistant ()
alterTransferInfo t a = updateTransferInfo' $ M.adjust a t
{- Updates a transfer's info. Adds the transfer to the map if necessary,
- or if already present, updates it while preserving the old transferTid,
- transferPaused, and bytesComplete values, which are not written to disk. -}
updateTransferInfo :: Transfer -> TransferInfo -> Assistant ()
updateTransferInfo t info = updateTransferInfo' $ M.insertWith merge t info
where
merge new old = new
{ transferTid = maybe (transferTid new) Just (transferTid old)
, transferPaused = transferPaused new || transferPaused old
, bytesComplete = maybe (bytesComplete new) Just (bytesComplete old)
}
updateTransferInfo' :: (TransferMap -> TransferMap) -> Assistant ()
updateTransferInfo' a = notifyTransfer `after` modifyDaemonStatus_ update
where
update s = s { currentTransfers = a (currentTransfers s) }
{- Removes a transfer from the map, and returns its info. -}
removeTransfer :: Transfer -> Assistant (Maybe TransferInfo)
removeTransfer t = notifyTransfer `after` modifyDaemonStatus remove
where
remove s =
let (info, ts) = M.updateLookupWithKey
(\_k _v -> Nothing)
t (currentTransfers s)
in (s { currentTransfers = ts }, info)
{- Send a notification when a transfer is changed. -}
notifyTransfer :: Assistant ()
notifyTransfer = do
dstatus <- getAssistant daemonStatusHandle
liftIO $ sendNotification
=<< transferNotifier <$> atomically (readTVar dstatus)
{- Send a notification when alerts are changed. -}
notifyAlert :: Assistant ()
notifyAlert = do
dstatus <- getAssistant daemonStatusHandle
liftIO $ sendNotification
=<< alertNotifier <$> atomically (readTVar dstatus)
{- Returns the alert's identifier, which can be used to remove it. -}
addAlert :: Alert -> Assistant AlertId
addAlert alert = do
notice [showAlert alert]
notifyAlert `after` modifyDaemonStatus add
where
add s = (s { lastAlertId = i, alertMap = m }, i)
where
!i = nextAlertId $ lastAlertId s
!m = mergeAlert i alert (alertMap s)
removeAlert :: AlertId -> Assistant ()
removeAlert i = updateAlert i (const Nothing)
updateAlert :: AlertId -> (Alert -> Maybe Alert) -> Assistant ()
updateAlert i a = updateAlertMap $ \m -> M.update a i m
updateAlertMap :: (AlertMap -> AlertMap) -> Assistant ()
updateAlertMap a = notifyAlert `after` modifyDaemonStatus_ update
where
update s =
let !m = a (alertMap s)
in s { alertMap = m }
{- Displays an alert while performing an activity that returns True on
- success.
-
- The alert is left visible afterwards, as filler.
- Old filler is pruned, to prevent the map growing too large. -}
alertWhile :: Alert -> Assistant Bool -> Assistant Bool
alertWhile alert a = alertWhile' alert $ do
r <- a
return (r, r)
{- Like alertWhile, but allows the activity to return a value too. -}
alertWhile' :: Alert -> Assistant (Bool, a) -> Assistant a
alertWhile' alert a = do
let alert' = alert { alertClass = Activity }
i <- addAlert alert'
(ok, r) <- a
updateAlertMap $ mergeAlert i $ makeAlertFiller ok alert'
return r
{- Displays an alert while performing an activity, then removes it. -}
alertDuring :: Alert -> Assistant a -> Assistant a
alertDuring alert a = do
i <- addAlert $ alert { alertClass = Activity }
removeAlert i `after` a

89
Assistant/DeleteRemote.hs Normal file
View file

@ -0,0 +1,89 @@
{- git-annex assistant remote deletion utilities
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Assistant.DeleteRemote where
import Assistant.Common
import Assistant.Types.UrlRenderer
import Assistant.TransferQueue
import Types.Transfer
import Logs.Location
import Assistant.DaemonStatus
import qualified Remote
import Remote.List.Util
import qualified Git.Remote.Remove
import Logs.Trust
import qualified Annex
#ifdef WITH_WEBAPP
import Assistant.WebApp.Types
import Assistant.Alert
import qualified Data.Text as T
#endif
{- Removes a remote (but leave the repository as-is), and returns the old
- Remote data. -}
disableRemote :: UUID -> Assistant Remote
disableRemote uuid = do
remote <- fromMaybe (error "unknown remote")
<$> liftAnnex (Remote.remoteFromUUID uuid)
liftAnnex $ do
inRepo $ Git.Remote.Remove.remove (Remote.name remote)
remotesChanged
updateSyncRemotes
return remote
{- Removes a remote, marking it dead .-}
removeRemote :: UUID -> Assistant Remote
removeRemote uuid = do
liftAnnex $ trustSet uuid DeadTrusted
disableRemote uuid
{- Called when a Remote is probably empty, to remove it.
-
- This does one last check for any objects remaining in the Remote,
- and if there are any, queues Downloads of them, and defers removing
- the remote for later. This is to catch any objects not referred to
- in keys in the current branch.
-}
removableRemote :: UrlRenderer -> UUID -> Assistant ()
removableRemote urlrenderer uuid = do
keys <- getkeys
if null keys
then finishRemovingRemote urlrenderer uuid
else do
r <- fromMaybe (error "unknown remote")
<$> liftAnnex (Remote.remoteFromUUID uuid)
mapM_ (queueremaining r) keys
where
queueremaining r k =
queueTransferWhenSmall "remaining object in unwanted remote"
(AssociatedFile Nothing) (Transfer Download uuid (fromKey id k)) r
{- Scanning for keys can take a long time; do not tie up
- the Annex monad while doing it, so other threads continue to
- run. -}
getkeys = do
a <- liftAnnex $ Annex.withCurrentState $ loggedKeysFor uuid
liftIO a
{- With the webapp, this asks the user to click on a button to finish
- removing the remote.
-
- Without the webapp, just do the removal now.
-}
finishRemovingRemote :: UrlRenderer -> UUID -> Assistant ()
#ifdef WITH_WEBAPP
finishRemovingRemote urlrenderer uuid = do
desc <- liftAnnex $ Remote.prettyUUID uuid
button <- mkAlertButton True (T.pack "Finish deletion process") urlrenderer $
FinishDeleteRepositoryR uuid
void $ addAlert $ remoteRemovalAlert desc button
#else
finishRemovingRemote _ uuid = void $ removeRemote uuid
#endif

30
Assistant/Drop.hs Normal file
View file

@ -0,0 +1,30 @@
{- git-annex assistant dropping of unwanted content
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Assistant.Drop (
handleDrops,
handleDropsFrom,
) where
import Assistant.Common
import Assistant.DaemonStatus
import Annex.Drop (handleDropsFrom, Reason)
import Logs.Location
import CmdLine.Action
import Types.NumCopies
import Types.Command
{- Drop from local and/or remote when allowed by the preferred content and
- numcopies settings. -}
handleDrops :: Reason -> Bool -> Key -> AssociatedFile -> [VerifiedCopy] -> Assistant ()
handleDrops reason fromhere key f preverified = do
syncrs <- syncDataRemotes <$> getDaemonStatus
locs <- liftAnnex $ loggedLocations key
liftAnnex $ handleDropsFrom
locs syncrs reason fromhere key f
(SeekInput [])
preverified callCommandAction

50
Assistant/Fsck.hs Normal file
View file

@ -0,0 +1,50 @@
{- git-annex assistant fscking
-
- Copyright 2013 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Assistant.Fsck where
import Assistant.Common
import Types.ScheduledActivity
import qualified Types.Remote as Remote
import Annex.UUID
import Assistant.Alert
import Assistant.Types.UrlRenderer
import Logs.Schedule
import qualified Annex
import qualified Data.Set as S
{- Displays a nudge in the webapp if a fsck is not configured for
- the specified remote, or for the local repository. -}
fsckNudge :: UrlRenderer -> Maybe Remote -> Assistant ()
fsckNudge urlrenderer mr
| maybe True fsckableRemote mr =
whenM (liftAnnex $ annexFsckNudge <$> Annex.getGitConfig) $
unlessM (liftAnnex $ checkFscked mr) $
notFsckedNudge urlrenderer mr
| otherwise = noop
fsckableRemote :: Remote -> Bool
fsckableRemote = isJust . Remote.remoteFsck
{- Checks if the remote, or the local repository, has a fsck scheduled.
- Only looks at fscks configured to run via the local repository, not
- other repositories. -}
checkFscked :: Maybe Remote -> Annex Bool
checkFscked mr = any wanted . S.toList <$> (scheduleGet =<< getUUID)
where
wanted = case mr of
Nothing -> isSelfFsck
Just r -> flip isFsckOf (Remote.uuid r)
isSelfFsck :: ScheduledActivity -> Bool
isSelfFsck (ScheduledSelfFsck _ _) = True
isSelfFsck _ = False
isFsckOf :: ScheduledActivity -> UUID -> Bool
isFsckOf (ScheduledRemoteFsck u _ _) u' = u == u'
isFsckOf _ _ = False

38
Assistant/Gpg.hs Normal file
View file

@ -0,0 +1,38 @@
{- git-annex assistant gpg stuff
-
- Copyright 2013 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Assistant.Gpg where
import Utility.Gpg
import Utility.UserInfo
import Types.Remote (RemoteConfigField)
import Annex.SpecialRemote.Config
import Types.ProposedAccepted
import qualified Data.Map as M
import Control.Applicative
import Prelude
{- Generates a gpg user id that is not used by any existing secret key -}
newUserId :: GpgCmd -> IO UserId
newUserId cmd = do
oldkeys <- secretKeys cmd
username <- either (const "unknown") id <$> myUserName
let basekeyname = username ++ "'s git-annex encryption key"
return $ Prelude.head $ filter (\n -> M.null $ M.filter (== n) oldkeys)
( basekeyname
: map (\n -> basekeyname ++ show n) ([2..] :: [Int])
)
data EnableEncryption = HybridEncryption | SharedEncryption | NoEncryption
deriving (Eq)
{- Generates Remote configuration for encryption. -}
configureEncryption :: EnableEncryption -> (RemoteConfigField, ProposedAccepted String)
configureEncryption SharedEncryption = (encryptionField, Proposed "shared")
configureEncryption NoEncryption = (encryptionField, Proposed "none")
configureEncryption HybridEncryption = (encryptionField, Proposed "hybrid")

194
Assistant/Install.hs Normal file
View file

@ -0,0 +1,194 @@
{- Assistant installation
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Assistant.Install where
import Assistant.Common
import Assistant.Install.AutoStart
import Config.Files
import Utility.FileMode
import Utility.Shell
import Utility.Tmp
import Utility.Env
import Utility.SshConfig
#ifdef darwin_HOST_OS
import Utility.OSX
#else
import Utility.FreeDesktop
import Assistant.Install.Menu
import Utility.UserInfo
import Utility.Android
#endif
standaloneAppBase :: IO (Maybe FilePath)
standaloneAppBase = getEnv "GIT_ANNEX_APP_BASE"
{- The standalone app does not have an installation process.
- So when it's run, it needs to set up autostarting of the assistant
- daemon, as well as writing the programFile, and putting the
- git-annex-shell and git-annex-wrapper wrapper scripts into ~/.ssh
-
- Note that this is done every time it's started, so if the user moves
- it around, the paths this sets up won't break.
-
- File manager hook script installation is done even for
- packaged apps, since it has to go into the user's home directory.
-}
ensureInstalled :: IO ()
ensureInstalled = ifM (isJust <$> getEnv "GIT_ANNEX_PACKAGE_INSTALL")
( go Nothing
, go =<< standaloneAppBase
)
where
go Nothing = installFileManagerHooks "git-annex"
go (Just base) = do
let program = base </> "git-annex"
programfile <- programFile
createDirectoryIfMissing True (parentDir programfile)
writeFile programfile program
#ifdef darwin_HOST_OS
autostartfile <- userAutoStart osxAutoStartLabel
installAutoStart program autostartfile
#else
ifM osAndroid
( do
-- Integration with the Termux:Boot app.
home <- myHomeDir
let bootfile = home </> ".termux" </> "boot" </> "git-annex"
unlessM (doesFileExist bootfile) $ do
createDirectoryIfMissing True (takeDirectory bootfile)
writeFile bootfile "git-annex assistant --autostart"
, do
menufile <- desktopMenuFilePath "git-annex" <$> userDataDir
icondir <- iconDir <$> userDataDir
installMenu program menufile base icondir
autostartfile <- autoStartPath "git-annex" <$> userConfigDir
installAutoStart program autostartfile
)
#endif
sshdir <- sshDir
let runshell var = "exec " ++ base </> "runshell " ++ var
let rungitannexshell var = runshell $ "git-annex-shell -c \"" ++ var ++ "\""
installWrapper (sshdir </> "git-annex-shell") $ unlines
[ shebang
, "set -e"
, "if [ \"x$SSH_ORIGINAL_COMMAND\" != \"x\" ]; then"
, rungitannexshell "$SSH_ORIGINAL_COMMAND"
, "else"
, rungitannexshell "$@"
, "fi"
]
installWrapper (sshdir </> "git-annex-wrapper") $ unlines
[ shebang
, "set -e"
, runshell "\"$@\""
]
installFileManagerHooks program
installWrapper :: FilePath -> String -> IO ()
installWrapper file content = do
curr <- catchDefaultIO "" $ readFileStrict file
when (curr /= content) $ do
createDirectoryIfMissing True (parentDir file)
viaTmp writeFile file content
modifyFileMode file $ addModes [ownerExecuteMode]
installFileManagerHooks :: FilePath -> IO ()
#ifdef linux_HOST_OS
installFileManagerHooks program = unlessM osAndroid $ do
let actions = ["get", "drop", "undo"]
-- Gnome
nautilusScriptdir <- (\d -> d </> "nautilus" </> "scripts") <$> userDataDir
createDirectoryIfMissing True nautilusScriptdir
forM_ actions $
genNautilusScript nautilusScriptdir
-- KDE
userdata <- userDataDir
let kdeServiceMenusdir = userdata </> "kservices5" </> "ServiceMenus"
createDirectoryIfMissing True kdeServiceMenusdir
writeFile (kdeServiceMenusdir </> "git-annex.desktop")
(kdeDesktopFile actions)
where
genNautilusScript scriptdir action =
installscript (scriptdir </> scriptname action) $ unlines
[ shebang
, autoaddedcomment
, "exec " ++ program ++ " " ++ action ++ " --notify-start --notify-finish -- \"$@\""
]
scriptname action = "git-annex " ++ action
installscript f c = whenM (safetoinstallscript f) $ do
writeFile f c
modifyFileMode f $ addModes [ownerExecuteMode]
safetoinstallscript f = catchDefaultIO True $
elem autoaddedcomment . lines <$> readFileStrict f
autoaddedcomment = "# " ++ autoaddedmsg ++ " (To disable, chmod 600 this file.)"
autoaddedmsg = "Automatically added by git-annex, do not edit."
kdeDesktopFile actions = unlines $ concat $
kdeDesktopHeader actions : map kdeDesktopAction actions
kdeDesktopHeader actions =
[ "# " ++ autoaddedmsg
, "[Desktop Entry]"
, "Type=Service"
, "ServiceTypes=all/allfiles"
, "MimeType=all/all;"
, "Actions=" ++ intercalate ";" (map kdeDesktopSection actions)
, "X-KDE-Priority=TopLevel"
, "X-KDE-Submenu=Git-Annex"
, "X-KDE-Icon=git-annex"
, "X-KDE-ServiceTypes=KonqPopupMenu/Plugin"
]
kdeDesktopSection command = "GitAnnex" ++ command
kdeDesktopAction command =
[ ""
, "[Desktop Action " ++ kdeDesktopSection command ++ "]"
, "Name=" ++ command
, "Icon=git-annex"
, unwords
[ "Exec=sh -c 'cd \"$(dirname \"$1\")\" &&"
, program
, command
, "--notify-start --notify-finish -- \"$1\"'"
, "false" -- this becomes $0 in sh, so unused
, "%f"
]
]
#else
installFileManagerHooks _ = noop
#endif
{- Returns a cleaned up environment that lacks settings used to make the
- standalone builds use their bundled libraries and programs.
- Useful when calling programs not included in the standalone builds.
-
- For a non-standalone build, returns Nothing.
-}
cleanEnvironment :: IO (Maybe [(String, String)])
cleanEnvironment = clean <$> getEnvironment
where
clean environ
| null vars = Nothing
| otherwise = Just $ catMaybes $ map (restoreorig environ) environ
| otherwise = Nothing
where
vars = words $ fromMaybe "" $
lookup "GIT_ANNEX_STANDLONE_ENV" environ
restoreorig oldenviron p@(k, _v)
| k `elem` vars = case lookup ("ORIG_" ++ k) oldenviron of
(Just v')
| not (null v') -> Just (k, v')
_ -> Nothing
| otherwise = Just p

View file

@ -0,0 +1,40 @@
{- Assistant autostart file installation
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fno-warn-tabs #-}
module Assistant.Install.AutoStart where
import Utility.FreeDesktop
#ifdef darwin_HOST_OS
import Utility.OSX
import Utility.Path
import Utility.Directory
#endif
installAutoStart :: FilePath -> FilePath -> IO ()
installAutoStart command file = do
#ifdef darwin_HOST_OS
createDirectoryIfMissing True (parentDir file)
writeFile file $ genOSXAutoStartFile osxAutoStartLabel command
["assistant", "--autostart"]
#else
writeDesktopMenuFile (fdoAutostart command) file
#endif
osxAutoStartLabel :: String
osxAutoStartLabel = "com.branchable.git-annex.assistant"
fdoAutostart :: FilePath -> DesktopEntry
fdoAutostart command = genDesktopEntry
"Git Annex Assistant"
"Autostart"
False
(command ++ " assistant --autostart")
Nothing
[]

48
Assistant/Install/Menu.hs Normal file
View file

@ -0,0 +1,48 @@
{- Assistant menu installation.
-
- Copyright 2013 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fno-warn-tabs #-}
module Assistant.Install.Menu where
import Common
import Utility.FreeDesktop
installMenu :: FilePath -> FilePath -> FilePath -> FilePath -> IO ()
#ifdef darwin_HOST_OS
installMenu _command _menufile _iconsrcdir _icondir = return ()
#else
installMenu command menufile iconsrcdir icondir = do
writeDesktopMenuFile (fdoDesktopMenu command) menufile
installIcon (iconsrcdir </> "logo.svg") $
iconFilePath (iconBaseName ++ ".svg") "scalable" icondir
installIcon (iconsrcdir </> "logo_16x16.png") $
iconFilePath (iconBaseName ++ ".png") "16x16" icondir
#endif
{- The command can be either just "git-annex", or the full path to use
- to run it. -}
fdoDesktopMenu :: FilePath -> DesktopEntry
fdoDesktopMenu command = genDesktopEntry
"Git Annex"
"Track and sync the files in your Git Annex"
False
(command ++ " webapp")
(Just iconBaseName)
["Network", "FileTransfer"]
installIcon :: FilePath -> FilePath -> IO ()
installIcon src dest = do
createDirectoryIfMissing True (parentDir dest)
withBinaryFile src ReadMode $ \hin ->
withBinaryFile dest WriteMode $ \hout ->
hGetContents hin >>= hPutStr hout
iconBaseName :: String
iconBaseName = "git-annex"

184
Assistant/MakeRemote.hs Normal file
View file

@ -0,0 +1,184 @@
{- git-annex assistant remote creation utilities
-
- Copyright 2012, 2013 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Assistant.MakeRemote where
import Assistant.Common
import Assistant.Ssh
import qualified Types.Remote as R
import qualified Remote
import Remote.List.Util
import qualified Remote.Rsync as Rsync
import qualified Remote.GCrypt as GCrypt
import qualified Git
import qualified Git.Command
import qualified Annex
import qualified Annex.SpecialRemote
import Annex.SpecialRemote.Config
import Logs.UUID
import Logs.Remote
import Git.Remote
import Git.Types (RemoteName)
import Creds
import Assistant.Gpg
import Utility.Gpg (KeyId)
import Types.GitConfig
import Config
import Types.ProposedAccepted
import qualified Data.Map as M
{- Sets up a new git or rsync remote, accessed over ssh. -}
makeSshRemote :: SshData -> Annex RemoteName
makeSshRemote sshdata = maker (sshRepoName sshdata) (genSshUrl sshdata)
where
maker
| onlyCapability sshdata RsyncCapable = makeRsyncRemote
| otherwise = makeGitRemote
{- Runs an action that returns a name of the remote, and finishes adding it. -}
addRemote :: Annex RemoteName -> Annex Remote
addRemote a = do
name <- a
remotesChanged
maybe (error "failed to add remote") return
=<< Remote.byName (Just name)
{- Inits a rsync special remote, and returns its name. -}
makeRsyncRemote :: RemoteName -> String -> Annex String
makeRsyncRemote name location = makeRemote name location $ const $ void $
go =<< Annex.SpecialRemote.findExisting name
where
go Nothing = setupSpecialRemote name Rsync.remote config Nothing
(Nothing, R.Init, Annex.SpecialRemote.newConfig name Nothing mempty mempty) Nothing
go (Just (u, c, mcu)) = setupSpecialRemote name Rsync.remote config Nothing
(Just u, R.Enable c, c) mcu
config = M.fromList
[ (encryptionField, Proposed "shared")
, (Proposed "rsyncurl", Proposed location)
, (typeField, Proposed "rsync")
]
{- Inits a gcrypt special remote, and returns its name. -}
makeGCryptRemote :: RemoteName -> String -> KeyId -> Annex RemoteName
makeGCryptRemote remotename location keyid =
initSpecialRemote remotename GCrypt.remote Nothing $ M.fromList
[ (typeField, Proposed "gcrypt")
, (Proposed "gitrepo", Proposed location)
, configureEncryption HybridEncryption
, (Proposed "keyid", Proposed keyid)
]
type SpecialRemoteMaker = RemoteName -> RemoteType -> Maybe CredPair -> R.RemoteConfig -> Annex RemoteName
{- Inits a new special remote. The name is used as a suggestion, but
- will be changed if there is already a special remote with that name. -}
initSpecialRemote :: SpecialRemoteMaker
initSpecialRemote name remotetype mcreds config = go 0
where
go :: Int -> Annex RemoteName
go n = do
let fullname = if n == 0 then name else name ++ show n
Annex.SpecialRemote.findExisting fullname >>= \case
Nothing -> setupSpecialRemote fullname remotetype config mcreds
(Nothing, R.Init, Annex.SpecialRemote.newConfig fullname Nothing mempty mempty) Nothing
Just _ -> go (n + 1)
{- Enables an existing special remote. -}
enableSpecialRemote :: SpecialRemoteMaker
enableSpecialRemote name remotetype mcreds config =
Annex.SpecialRemote.findExisting name >>= \case
Nothing -> error $ "Cannot find a special remote named " ++ name
Just (u, c, mcu) -> setupSpecialRemote' False name remotetype config mcreds (Just u, R.Enable c, c) mcu
setupSpecialRemote :: RemoteName -> RemoteType -> R.RemoteConfig -> Maybe CredPair -> (Maybe UUID, R.SetupStage, R.RemoteConfig) -> Maybe (Annex.SpecialRemote.ConfigFrom UUID) -> Annex RemoteName
setupSpecialRemote = setupSpecialRemote' True
setupSpecialRemote' :: Bool -> RemoteName -> RemoteType -> R.RemoteConfig -> Maybe CredPair -> (Maybe UUID, R.SetupStage, R.RemoteConfig) -> Maybe (Annex.SpecialRemote.ConfigFrom UUID) -> Annex RemoteName
setupSpecialRemote' setdesc name remotetype config mcreds (mu, ss, c) mcu = do
{- Currently, only 'weak' ciphers can be generated from the
- assistant, because otherwise GnuPG may block once the entropy
- pool is drained, and as of now there's no way to tell the user
- to perform IO actions to refill the pool. -}
let weakc = M.insert (Proposed "highRandomQuality") (Proposed "false") (M.union config c)
dummycfg <- liftIO dummyRemoteGitConfig
(c', u) <- R.setup remotetype ss mu mcreds weakc dummycfg
case mcu of
Nothing ->
configSet u c'
Just (Annex.SpecialRemote.ConfigFrom cu) -> do
setConfig (remoteAnnexConfig c' "config-uuid") (fromUUID cu)
configSet cu c'
when setdesc $
whenM (isNothing . M.lookup u <$> uuidDescMap) $
describeUUID u (toUUIDDesc name)
return name
{- Returns the name of the git remote it created. If there's already a
- remote at the location, returns its name. -}
makeGitRemote :: String -> String -> Annex RemoteName
makeGitRemote basename location = makeRemote basename location $ \name ->
void $ inRepo $ Git.Command.runBool
[Param "remote", Param "add", Param name, Param location]
{- If there's not already a remote at the location, adds it using the
- action, which is passed the name of the remote to make.
-
- Returns the name of the remote. -}
makeRemote :: String -> String -> (RemoteName -> Annex ()) -> Annex RemoteName
makeRemote basename location a = do
rs <- Annex.getGitRemotes
if not (any samelocation rs)
then do
let name = uniqueRemoteName basename 0 rs
a name
return name
else return basename
where
samelocation x = Git.repoLocation x == location
{- Given a list of all remotes, generate an unused name for a new
- remote, adding a number if necessary.
-
- Ensures that the returned name is a legal git remote name. -}
uniqueRemoteName :: String -> Int -> [Git.Repo] -> RemoteName
uniqueRemoteName basename n rs
| null namecollision = name
| otherwise = uniqueRemoteName legalbasename (succ n) rs
where
namecollision = filter samename rs
samename x = Git.remoteName x == Just name
name
| n == 0 = legalbasename
| otherwise = legalbasename ++ show n
legalbasename = makeLegalName basename
{- Finds a CredPair belonging to any Remote that is of a given type
- and matches some other criteria.
-
- This can be used as a default when another repository is being set up
- using the same service.
-
- A function must be provided that returns the CredPairStorage
- to use for a particular Remote's uuid.
-}
previouslyUsedCredPair
:: (UUID -> CredPairStorage)
-> RemoteType
-> (Remote -> Bool)
-> Annex (Maybe CredPair)
previouslyUsedCredPair getstorage remotetype criteria =
getM fromstorage
=<< filter criteria . filter sametype
<$> Remote.remoteList
where
sametype r = R.typename (R.remotetype r) == R.typename remotetype
fromstorage r = do
let storage = getstorage (R.uuid r)
getRemoteCredPair (R.config r) (R.gitconfig r) storage

96
Assistant/MakeRepo.hs Normal file
View file

@ -0,0 +1,96 @@
{- making local repositories
-
- Copyright 2012-2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Assistant.MakeRepo where
import Assistant.WebApp.Common
import Annex.Init
import qualified Git.Construct
import qualified Git.Config
import qualified Git.Command
import qualified Git.Branch
import qualified Annex
import Annex.UUID
import Annex.AdjustedBranch
import Annex.Action
import Types.StandardGroups
import Logs.PreferredContent
import qualified Annex.Branch
import Utility.Process.Transcript
import Config
{- Makes a new git repository. Or, if a git repository already
- exists, returns False. -}
makeRepo :: FilePath -> Bool -> IO Bool
makeRepo path bare = ifM (probeRepoExists path)
( return False
, do
(transcript, ok) <-
processTranscript "git" (toCommand params) Nothing
unless ok $
error $ "git init failed!\nOutput:\n" ++ transcript
return True
)
where
baseparams = [Param "init", Param "--quiet"]
params
| bare = baseparams ++ [Param "--bare", File path]
| otherwise = baseparams ++ [File path]
{- Runs an action in the git repository in the specified directory. -}
inDir :: FilePath -> Annex a -> IO a
inDir dir a = do
state <- Annex.new =<< Git.Config.read =<< Git.Construct.fromPath dir
Annex.eval state $ a `finally` stopCoProcesses
{- Creates a new repository, and returns its UUID. -}
initRepo :: Bool -> Bool -> FilePath -> Maybe String -> Maybe StandardGroup -> IO UUID
initRepo True primary_assistant_repo dir desc mgroup = inDir dir $ do
initRepo' desc mgroup
{- Initialize the master branch, so things that expect
- to have it will work, before any files are added. -}
unlessM (Git.Config.isBare <$> gitRepo) $ do
cmode <- annexCommitMode <$> Annex.getGitConfig
void $ inRepo $ Git.Branch.commitCommand cmode
[ Param "--quiet"
, Param "--allow-empty"
, Param "-m"
, Param "created repository"
]
{- Repositories directly managed by the assistant use
- an adjusted unlocked branch with annex.thin set.
-
- Automatic gc is disabled, as it can be slow. Insted, gc is done
- once a day.
-}
when primary_assistant_repo $ do
void $ enterAdjustedBranch (LinkAdjustment UnlockAdjustment)
setConfig (annexConfig "thin") (Git.Config.boolConfig True)
inRepo $ Git.Command.run
[Param "config", Param "gc.auto", Param "0"]
getUUID
{- Repo already exists, could be a non-git-annex repo though so
- still initialize it. -}
initRepo False _ dir desc mgroup = inDir dir $ do
initRepo' desc mgroup
getUUID
initRepo' :: Maybe String -> Maybe StandardGroup -> Annex ()
initRepo' desc mgroup = unlessM isInitialized $ do
initialize desc Nothing
u <- getUUID
maybe noop (defaultStandardGroup u) mgroup
{- Ensure branch gets committed right away so it is
- available for merging immediately. -}
Annex.Branch.commit =<< Annex.Branch.commitMessage
{- Checks if a git repo exists at a location. -}
probeRepoExists :: FilePath -> IO Bool
probeRepoExists dir = isJust <$>
catchDefaultIO Nothing (Git.Construct.checkForRepo dir)

153
Assistant/Monad.hs Normal file
View file

@ -0,0 +1,153 @@
{- git-annex assistant monad
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE GeneralizedNewtypeDeriving, MultiParamTypeClasses #-}
module Assistant.Monad (
Assistant,
AssistantData(..),
newAssistantData,
runAssistant,
getAssistant,
LiftAnnex,
liftAnnex,
(<~>),
(<<~),
asIO,
asIO1,
asIO2,
ThreadName,
debug,
notice
) where
import "mtl" Control.Monad.Reader
import System.Log.Logger
import qualified Control.Monad.Fail as Fail
import Annex.Common
import Assistant.Types.ThreadedMonad
import Assistant.Types.DaemonStatus
import Assistant.Types.ScanRemotes
import Assistant.Types.TransferQueue
import Assistant.Types.TransferSlots
import Assistant.Types.TransferrerPool
import Assistant.Types.Pushes
import Assistant.Types.BranchChange
import Assistant.Types.Commits
import Assistant.Types.Changes
import Assistant.Types.RepoProblem
import Assistant.Types.ThreadName
import Assistant.Types.RemoteControl
import Assistant.Types.CredPairCache
newtype Assistant a = Assistant { mkAssistant :: ReaderT AssistantData IO a }
deriving (
Monad,
MonadIO,
MonadReader AssistantData,
MonadCatch,
MonadThrow,
MonadMask,
Fail.MonadFail,
Functor,
Applicative
)
data AssistantData = AssistantData
{ threadName :: ThreadName
, threadState :: ThreadState
, daemonStatusHandle :: DaemonStatusHandle
, scanRemoteMap :: ScanRemoteMap
, transferQueue :: TransferQueue
, transferSlots :: TransferSlots
, transferrerPool :: TransferrerPool
, failedPushMap :: FailedPushMap
, failedExportMap :: FailedPushMap
, commitChan :: CommitChan
, exportCommitChan :: CommitChan
, changePool :: ChangePool
, repoProblemChan :: RepoProblemChan
, branchChangeHandle :: BranchChangeHandle
, remoteControl :: RemoteControl
, credPairCache :: CredPairCache
}
newAssistantData :: ThreadState -> DaemonStatusHandle -> IO AssistantData
newAssistantData st dstatus = AssistantData
<$> pure (ThreadName "main")
<*> pure st
<*> pure dstatus
<*> newScanRemoteMap
<*> newTransferQueue
<*> newTransferSlots
<*> newTransferrerPool (checkNetworkConnections dstatus)
<*> newFailedPushMap
<*> newFailedPushMap
<*> newCommitChan
<*> newCommitChan
<*> newChangePool
<*> newRepoProblemChan
<*> newBranchChangeHandle
<*> newRemoteControl
<*> newCredPairCache
runAssistant :: AssistantData -> Assistant a -> IO a
runAssistant d a = runReaderT (mkAssistant a) d
getAssistant :: (AssistantData -> a) -> Assistant a
getAssistant = reader
{- Using a type class for lifting into the annex monad allows
- easily lifting to it from multiple different monads. -}
class LiftAnnex m where
liftAnnex :: Annex a -> m a
{- Runs an action in the git-annex monad. Note that the same monad state
- is shared among all assistant threads, so only one of these can run at
- a time. Therefore, long-duration actions should be avoided. -}
instance LiftAnnex Assistant where
liftAnnex a = do
st <- reader threadState
liftIO $ runThreadState st a
{- Runs an IO action, passing it an IO action that runs an Assistant action. -}
(<~>) :: (IO a -> IO b) -> Assistant a -> Assistant b
io <~> a = do
d <- reader id
liftIO $ io $ runAssistant d a
{- Creates an IO action that will run an Assistant action when run. -}
asIO :: Assistant a -> Assistant (IO a)
asIO a = do
d <- reader id
return $ runAssistant d a
asIO1 :: (a -> Assistant b) -> Assistant (a -> IO b)
asIO1 a = do
d <- reader id
return $ \v -> runAssistant d $ a v
asIO2 :: (a -> b -> Assistant c) -> Assistant (a -> b -> IO c)
asIO2 a = do
d <- reader id
return $ \v1 v2 -> runAssistant d (a v1 v2)
{- Runs an IO action on a selected field of the AssistantData. -}
(<<~) :: (a -> IO b) -> (AssistantData -> a) -> Assistant b
io <<~ v = reader v >>= liftIO . io
debug :: [String] -> Assistant ()
debug = logaction debugM
notice :: [String] -> Assistant ()
notice = logaction noticeM
logaction :: (String -> String -> IO ()) -> [String] -> Assistant ()
logaction a ws = do
ThreadName name <- getAssistant threadName
liftIO $ a name $ unwords $ (name ++ ":") : ws

99
Assistant/NamedThread.hs Normal file
View file

@ -0,0 +1,99 @@
{- git-annex assistant named threads.
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Assistant.NamedThread where
import Annex.Common
import Assistant.Types.NamedThread
import Assistant.Types.ThreadName
import Assistant.Types.DaemonStatus
import Assistant.Types.UrlRenderer
import Assistant.DaemonStatus
import Assistant.Monad
import Utility.NotificationBroadcaster
import Control.Concurrent
import Control.Concurrent.Async
import qualified Data.Map.Strict as M
import qualified Control.Exception as E
#ifdef WITH_WEBAPP
import Assistant.WebApp.Types
import Assistant.Types.Alert
import Assistant.Alert
import qualified Data.Text as T
#endif
{- Starts a named thread, if it's not already running.
-
- Named threads are run by a management thread, so if they crash
- an alert is displayed, allowing the thread to be restarted. -}
startNamedThread :: UrlRenderer -> NamedThread -> Assistant ()
startNamedThread urlrenderer (NamedThread afterstartupsanitycheck name a) =
M.lookup name . startedThreads <$> getDaemonStatus >>= \case
Nothing -> start
Just (aid, _) -> do
r <- liftIO (E.try (poll aid) :: IO (Either E.SomeException (Maybe (Either E.SomeException ()))))
case r of
Right Nothing -> noop
_ -> start
where
start
| afterstartupsanitycheck = do
status <- getDaemonStatus
h <- liftIO $ newNotificationHandle False $
startupSanityCheckNotifier status
startwith $ runmanaged $
liftIO $ waitNotification h
| otherwise = startwith $ runmanaged noop
startwith runner = do
d <- getAssistant id
aid <- liftIO $ runner $ d { threadName = name }
restart <- asIO $ startNamedThread urlrenderer (NamedThread False name a)
modifyDaemonStatus_ $ \s -> s
{ startedThreads = M.insert name (aid, restart) (startedThreads s) }
runmanaged first d = do
aid <- async $ runAssistant d $ do
void first
a
void $ forkIO $ manager d aid
return aid
manager d aid = (E.try (wait aid) :: IO (Either E.SomeException ())) >>= \case
Right _ -> noop
Left e -> do
let msg = unwords
[ fromThreadName $ threadName d
, "crashed:", show e
]
hPutStrLn stderr msg
#ifdef WITH_WEBAPP
button <- runAssistant d $ mkAlertButton True
(T.pack "Restart Thread")
urlrenderer
(RestartThreadR name)
runAssistant d $ void $ addAlert $
(warningAlert (fromThreadName name) msg)
{ alertButtons = [button] }
#endif
namedThreadId :: NamedThread -> Assistant (Maybe ThreadId)
namedThreadId (NamedThread _ name _) = do
m <- startedThreads <$> getDaemonStatus
return $ asyncThreadId . fst <$> M.lookup name m
{- Waits for all named threads that have been started to finish.
-
- Note that if a named thread crashes, it will probably
- cause this to crash as well. Also, named threads that are started
- after this is called will not be waited on. -}
waitNamedThreads :: Assistant ()
waitNamedThreads = do
m <- startedThreads <$> getDaemonStatus
liftIO $ mapM_ (wait . fst) $ M.elems m

97
Assistant/Pairing.hs Normal file
View file

@ -0,0 +1,97 @@
{- git-annex assistant repo pairing, core data types
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Assistant.Pairing where
import Annex.Common
import Utility.Verifiable
import Assistant.Ssh
import Control.Concurrent
import Network.Socket
import Data.Char
import qualified Data.Text as T
data PairStage
{- "I'll pair with anybody who shares the secret that can be used
- to verify this request." -}
= PairReq
{- "I've verified your request, and you can verify this to see
- that I know the secret. I set up your ssh key already.
- Here's mine for you to set up." -}
| PairAck
{- "I saw your PairAck; you can stop sending them." -}
| PairDone
deriving (Eq, Read, Show, Ord, Enum)
newtype PairMsg = PairMsg (Verifiable (PairStage, PairData, SomeAddr))
deriving (Eq, Read, Show)
verifiedPairMsg :: PairMsg -> PairingInProgress -> Bool
verifiedPairMsg (PairMsg m) pip = verify m $ inProgressSecret pip
fromPairMsg :: PairMsg -> Verifiable (PairStage, PairData, SomeAddr)
fromPairMsg (PairMsg m) = m
pairMsgStage :: PairMsg -> PairStage
pairMsgStage (PairMsg (Verifiable (s, _, _) _)) = s
pairMsgData :: PairMsg -> PairData
pairMsgData (PairMsg (Verifiable (_, d, _) _)) = d
pairMsgAddr :: PairMsg -> SomeAddr
pairMsgAddr (PairMsg (Verifiable (_, _, a) _)) = a
data PairData = PairData
-- uname -n output, not a full domain name
{ remoteHostName :: Maybe HostName
, remoteUserName :: UserName
, remoteDirectory :: FilePath
, remoteSshPubKey :: SshPubKey
, pairUUID :: UUID
}
deriving (Eq, Read, Show)
checkSane :: PairData -> Bool
checkSane p = all (not . any isControl)
[ fromMaybe "" (remoteHostName p)
, remoteUserName p
, remoteDirectory p
, remoteSshPubKey p
, fromUUID (pairUUID p)
]
type UserName = String
{- A pairing that is in progress has a secret, a thread that is
- broadcasting pairing messages, and a SshKeyPair that has not yet been
- set up on disk. -}
data PairingInProgress = PairingInProgress
{ inProgressSecret :: Secret
, inProgressThreadId :: Maybe ThreadId
, inProgressSshKeyPair :: SshKeyPair
, inProgressPairData :: PairData
, inProgressPairStage :: PairStage
}
deriving (Show)
data AddrClass = IPv4AddrClass | IPv6AddrClass
data SomeAddr = IPv4Addr HostAddress
| IPv6Addr HostAddress6
deriving (Ord, Eq, Read, Show)
{- This contains the whole secret, just lightly obfuscated to make it not
- too obvious. It's only displayed in the user's web browser. -}
newtype SecretReminder = SecretReminder [Int]
deriving (Show, Eq, Ord, Read)
toSecretReminder :: T.Text -> SecretReminder
toSecretReminder = SecretReminder . map ord . T.unpack
fromSecretReminder :: SecretReminder -> T.Text
fromSecretReminder (SecretReminder s) = T.pack $ map chr s

Some files were not shown because too many files have changed in this diff Show more