diff --git a/.appveyor.yml b/.appveyor.yml new file mode 100644 index 0000000000..e6ac3d2e21 --- /dev/null +++ b/.appveyor.yml @@ -0,0 +1,141 @@ +# This CI setup provides a largely homogeneous configuration across all +# major platforms (Windows, MacOS, and Linux). The aim of this test setup is +# to create a "native" platform experience, using as few cross-platform +# helper tools as possible. +# +# All workers support remote login. Login details are shown at the top of each +# CI run log. +# +# - Linux/Mac workers (via SSH): +# +# - A permitted SSH key must be defined in an APPVEYOR_SSH_KEY environment +# variable (via the appveyor project settings) +# +# - SSH login info is given in the form of: 'appveyor@67.225.164.xx -p 22xxx' +# +# - Login with: +# +# ssh -o StrictHostKeyChecking=no +# +# - to prevent the CI run from exiting, `touch` a file named `BLOCK` in the +# user HOME directory (current directory directly after login). The session +# will run until the file is removed (or 60 min have passed) +# +# - Windows workers (via RDP): +# +# - An RDP password should be defined in an APPVEYOR_RDP_PASSWORD environment +# variable (via the appveyor project settings), or a random password is used +# every time +# +# - RDP login info is given in the form of IP:PORT +# +# - Login with: +# +# xfreerdp /cert:ignore /dynamic-resolution /u:appveyor /p: /v: +# +# - to prevent the CI run from exiting, create a textfile named +# `BLOCK.txt` in the currently directory after login. The session +# will run until the file is removed (or 60 min have passed) + +# Do a shallow clone with enough commits that queued builds will still +# find the commit they want to build. +clone_depth: 100 + +environment: + # Do not use `image` as a matrix dimension, to have fine-grained control over + # what tests run on which platform + # The ID variable had no impact, but sorts first in the CI run overview + # an intelligible name can help to locate a specific test run + matrix: + # List a CI run for each platform first, to have immediate access when there + # is a need for debugging + + # Windows core tests + - ID: WinP39core + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019 + STACK_ROOT: "c:\\sr" + # MacOS core tests + - ID: MacP38core + APPVEYOR_BUILD_WORKER_IMAGE: macos-monterey + # Ubuntu core tests + # (disabled because it's not needed) + #- ID: Ubu20 + # APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 + +# do not run the CI if only documentation changes were made +# documentation builds are tested elsewhere and cheaper +skip_commits: + files: + - doc/ + - CHANGELOG + +# it is OK to specify paths that may not exist for a particular test run +cache: + - C:\sr -> stack.yaml + - C:\Users\appveyor\AppData\Local\Programs\stack -> stack.yaml + - /Users/appveyor/.stack -> stack.yaml + +# turn of support for MS project build support (not needed) +build: off + +# init cannot use any components from the repo, because it runs prior to +# cloning it +init: + # remove windows 260-char limit on path names + - cmd: powershell Set-Itemproperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name LongPathsEnabled -value 1 + # enable developer mode on windows + # this should enable mklink without admin privileges, but it doesn't seem to work + #- cmd: powershell tools\ci\appveyor_enable_windevmode.ps1 + # enable RDP access on windows (RDP password is in appveyor project config) + # this is relatively expensive (1-2min), but very convenient to jump into any build at any time + - cmd: powershell.exe iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) + +install: + # enable external SSH access to CI worker on all other systems + # needs APPVEYOR_SSH_KEY defined in project settings (or environment) + - sh: curl -sflL 'https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-ssh.sh' | bash -e - + # install stack (works on linux, OSX, and windows) + - curl -sSL https://get.haskellstack.org/ | sh + +# Building dependencies takes almost too long on windows, so build without +# optimisation (including when building the dependencies) +before_build: + - sh: cp stack.yaml stack.yaml.build + - ps: cp stack.yaml stack.yaml.build + - sh: 'echo "apply-ghc-options: everything" >> stack.yaml.build' + - ps: '"apply-ghc-options: everything" |Add-Content -Path .\stack.yaml.build' + - stack --stack-yaml stack.yaml.build build --only-dependencies --ghc-options=-O0 + +build_script: + - stack --stack-yaml stack.yaml.build build --copy-bins --ghc-options=-O0 + +#after_build: +# + +#before_test: +# + +# Cannot use stack run git-annex because it does not support --ghc-options +# and would rebuild all deps. Instead, use the binary --copy-bins installed. +test_script: + - cmd: C:\Users\appveyor\AppData\Roaming\local\bin\git-annex.exe test + - sh: ln -s $(stack path --local-bin)/git-annex git-annex + - sh: ln -s $(stack path --local-bin)/git-annex git-annex-shell + - sh: PATH=`pwd`:$PATH; export PATH; git-annex test + +#after_test: +# + +#on_success: +# + +#on_failure: +# + +on_finish: + # conditionally block the exit of a CI run for direct debugging + - sh: while [ -f ~/BLOCK ]; do sleep 5; done + - cmd: powershell.exe while ((Test-Path "C:\Users\\appveyor\\BLOCK.txt")) { Start-Sleep 5 } + # block exit until 60 minute timeout, for direct debugging + #- sh: while true; do sleep 5; done + #- cmd: powershell.exe while ($true) { Start-Sleep 5 } diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 0000000000..e045d59c96 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,8 @@ +[codespell] +skip = .git,*.pdf,*.svg,*._comment,jquery.*.js,*.mdwn,changelog,CHANGELOG,list.2018,html,dist,dist-newstyle,.stack-work,man,tags,tmp +ignore-regex=\b(valUs|addIn)\b +# some common variables etc (case insensitive) +# keypair - constructs +## May be TODO later, touches too much +# sentinal -> sentinel +ignore-words-list = dne,inout,fo,ot,bu,te,allright,inh,mor,myu,keypair,pasttime,sentinal,startd,ifset,afile,buildt,toword diff --git a/.forgejo/patches/ghc-9.8.patch b/.forgejo/patches/ghc-9.8.patch deleted file mode 100644 index 85796d787d..0000000000 --- a/.forgejo/patches/ghc-9.8.patch +++ /dev/null @@ -1,18 +0,0 @@ -Support ghc-9.8 by widening a lot of constraints. - -This patch can be removed once upstream supports ghc 9.8 offically. - -diff -uprN git-annex-10.20240227.orig/cabal.project git-annex-10.20240227/cabal.project ---- git-annex-10.20240227.orig/cabal.project 1970-01-01 01:00:00.000000000 +0100 -+++ git-annex-10.20240227/cabal.project 2024-04-28 13:30:14.061706299 +0200 -@@ -0,0 +1,10 @@ -+packages: *.cabal -+ -+allow-newer: dav -+allow-newer: haskeline:filepath -+allow-newer: haskeline:directory -+allow-newer: xml-hamlet -+allow-newer: aws:filepath -+allow-newer: dbus:network -+allow-newer: dbus:filepath -+allow-newer: microstache:filepath diff --git a/.forgejo/workflows/generate-lockfile.yml b/.forgejo/workflows/generate-lockfile.yml deleted file mode 100644 index 8dbb579e67..0000000000 --- a/.forgejo/workflows/generate-lockfile.yml +++ /dev/null @@ -1,89 +0,0 @@ -on: - workflow_dispatch: - inputs: - ref_name: - description: 'Tag or commit' - required: true - type: string - - push: - tags: - - '*' - -jobs: - cabal-config-edge: - name: Generate cabal config for edge - runs-on: x86_64 - container: - image: alpine:edge - env: - CI_ALPINE_TARGET_RELEASE: edge - steps: - - name: Environment setup - run: | - apk upgrade -a - apk add nodejs git cabal patch - - name: Repo pull - uses: actions/checkout@v4 - with: - fetch-depth: 1 - ref: ${{ inputs.ref_name }} - - name: Config generation - run: | - patch -p1 -i .forgejo/patches/ghc-9.8.patch - HOME="${{ github.workspace}}"/cabal_cache cabal update - HOME="${{ github.workspace}}"/cabal_cache cabal v2-freeze --shadow-installed-packages --strong-flags --flags="+assistant +webapp +pairing +production +torrentparser +magicmime +benchmark -debuglocks +dbus +networkbsd +gitlfs +httpclientrestricted" - mv cabal.project.freeze git-annex.config - - name: Package upload - uses: forgejo/upload-artifact@v3 - with: - name: cabalconfigedge - path: git-annex*.config - cabal-config-v322: - name: Generate cabal config for v3.22 - runs-on: x86_64 - container: - image: alpine:3.22 - env: - CI_ALPINE_TARGET_RELEASE: v3.22 - steps: - - name: Environment setup - run: | - apk upgrade -a - apk add nodejs git cabal patch - - name: Repo pull - uses: actions/checkout@v4 - with: - fetch-depth: 1 - ref: ${{ inputs.ref_name }} - - name: Config generation - run: | - patch -p1 -i .forgejo/patches/ghc-9.8.patch - HOME="${{ github.workspace }}"/cabal_cache cabal update - HOME="${{ github.workspace }}"/cabal_cache cabal v2-freeze --shadow-installed-packages --strong-flags --flags="+assistant +webapp +pairing +production +torrentparser +magicmime +benchmark -debuglocks +dbus +networkbsd +gitlfs +httpclientrestricted" - mv cabal.project.freeze git-annex.config - - name: Package upload - uses: forgejo/upload-artifact@v3 - with: - name: cabalconfig322 - path: git-annex*.config - upload-tarball: - name: Upload to generic repo - runs-on: x86_64 - needs: [cabal-config-edge,cabal-config-v322] - container: - image: alpine:latest - steps: - - name: Environment setup - run: apk add nodejs curl findutils - - name: Package download - uses: forgejo/download-artifact@v3 - - name: Package deployment - run: | - if test $GITHUB_REF_NAME == "ci" ; then - CI_REF_NAME=${{ inputs.ref_name }} - else - CI_REF_NAME=$GITHUB_REF_NAME - fi - curl --user ${{ vars.CODE_FORGEJO_USER }}:${{ secrets.CODE_FORGEJO_TOKEN }} --upload-file ./cabalconfigedge/git-annex.config ${{ github.server_url }}/api/packages/mirrors/generic/git-annex/$CI_REF_NAME/git-annex-$CI_REF_NAME-edge.cabal - curl --user ${{ vars.CODE_FORGEJO_USER }}:${{ secrets.CODE_FORGEJO_TOKEN }} --upload-file ./cabalconfig322/git-annex.config ${{ github.server_url }}/api/packages/mirrors/generic/git-annex/$CI_REF_NAME/git-annex-$CI_REF_NAME-v322.cabal diff --git a/.forgejo/workflows/mirror-repository.yml b/.forgejo/workflows/mirror-repository.yml deleted file mode 100644 index f44c4668cf..0000000000 --- a/.forgejo/workflows/mirror-repository.yml +++ /dev/null @@ -1,50 +0,0 @@ -on: - workflow_dispatch: - - schedule: - - cron: '@hourly' - -jobs: - mirror: - name: Pull from upstream - runs-on: x86_64 - container: - image: alpine:latest - env: - upstream: https://git.joeyh.name/git/git-annex.git - tags: '10.2025*' - steps: - - name: Environment setup - run: apk add grep git sed coreutils bash nodejs - - name: Fetch destination - uses: actions/checkout@v4 - with: - fetch_depth: 1 - ref: ci - token: ${{ secrets.CODE_FORGEJO_TOKEN }} - - name: Missing tag detecting - run: | - git ls-remote $upstream "refs/tags/$tags" | grep -v '{' | sed 's|.*/||' | sort > upstream_tags - git ls-remote ${{ github.server_url}}/${{ github.repository }} "refs/tags/$tags" | grep -v '{' | sed 's|.*/||' | sort > destination_tags - comm -23 upstream_tags destination_tags > missing_tags - echo "Missing tags:" - cat missing_tags - - name: Missing tag fetch - run: | - git remote add upstream $upstream - while read tag; do - git fetch upstream tag $tag --no-tags - done < missing_tags - - name: Packaging workflow injection - run: | - while read tag; do - git checkout $tag - git tag -d $tag - git checkout ci -- ./.forgejo - git config user.name "forgejo-actions[bot]" - git config user.email "dev@ayakael.net" - git commit -m 'Inject custom workflow' - git tag -a $tag -m $tag - done < missing_tags - - name: Push to destination - run: git push --force origin refs/tags/*:refs/tags/* --tags diff --git a/.ghci b/.ghci new file mode 100644 index 0000000000..931298e050 --- /dev/null +++ b/.ghci @@ -0,0 +1,3 @@ +:load Common +:set -XLambdaCase +:set -fno-warn-tabs diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..a81b30b931 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +CHANGELOG merge=dpkg-mergechangelogs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..e21cbf9c80 --- /dev/null +++ b/.gitignore @@ -0,0 +1,41 @@ +tags +TAGS +Setup +*.hi +*.o +tmp +test +Build/SysConfig +Build/Version +Build/InstallDesktopFile +Build/Standalone +Build/BuildVersion +Build/MakeMans +git-annex +git-annex-shell +git-remote-annex +man +git-union-merge +git-union-merge.1 +doc/.ikiwiki +html +*.tix +.hpc +dist +dist-newstyle +cabal.project.local +cabal.project.local~* +result +git-annex-build-deps* +# Sandboxed builds +cabal-dev +.cabal-sandbox +cabal.sandbox.config +.stack-work +stack.yaml.lock +# Project-local emacs configuration +.dir-locals.el +# OSX related +.DS_Store +.virthualenv +.tasty-rerun-log diff --git a/.mailmap b/.mailmap new file mode 100644 index 0000000000..b6c55d3f9b --- /dev/null +++ b/.mailmap @@ -0,0 +1,30 @@ +Antoine Beaupré anarcat +Antoine Beaupré https://id.koumbit.net/anarcat +Greg Grossmeier http://grossmeier.net/ +Jimmy Tang jtang +Joachim Breitner http://www.joachim-breitner.de/ +Joey Hess Joey Hess +Joey Hess Joey Hess +Joey Hess Joey Hess +Joey Hess Joey Hess +Joey Hess Joey Hess +Joey Hess Joey Hess +Joey Hess Joey Hess +Joey Hess http://joey.kitenet.net/ +Joey Hess http://joeyh.name/ +Joey Hess http://joeyh.name/ +Joey Hess https://www.google.com/accounts/o8/id?id=AItOawmJfIszzreLNvCqzqzvTayA9_9L6gb9RtY +Johan Kiviniemi http://johan.kiviniemi.name/ +Johan Kiviniemi http://johan.kiviniemi.name/ +Nicolas Pouillard http://ertai.myopenid.com/ +Peter Simons Peter Simons +Peter Simons http://peter-simons.myopenid.com/ +Philipp Kern http://phil.0x539.de/ +Richard Hartmann https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U +Yaroslav Halchenko +Yaroslav Halchenko yarikoptic +Yaroslav Halchenko http://yarikoptic.myopenid.com/ +Yaroslav Halchenko https://www.google.com/accounts/o8/id?id=AItOawnx8kHW66N3BqmkVpgtXDlYMvr8TJ5VvfY +Yaroslav Halchenko https://me.yahoo.com/a/EbvxpTI_xP9Aod7Mg4cwGhgjrCrdM5s-#7c0f4 +Øyvind A. Holm http://sunny256.sunbase.org/ +Øyvind A. Holm https://sunny256.wordpress.com/ diff --git a/Annex.hs b/Annex.hs new file mode 100644 index 0000000000..eaba4703cf --- /dev/null +++ b/Annex.hs @@ -0,0 +1,478 @@ +{- git-annex monad + - + - Copyright 2010-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE GeneralizedNewtypeDeriving, BangPatterns, PackageImports #-} + +module Annex ( + Annex, + AnnexState(..), + AnnexRead(..), + new, + run, + eval, + makeRunner, + getRead, + getState, + changeState, + withState, + setField, + setOutput, + getField, + addCleanupAction, + gitRepo, + inRepo, + fromRepo, + calcRepo, + calcRepo', + getGitConfig, + overrideGitConfig, + changeGitRepo, + adjustGitRepo, + addGitConfigOverride, + getGitConfigOverrides, + getRemoteGitConfig, + withCurrentState, + changeDirectory, + getGitRemotes, + incError, +) where + +import Common +import qualified Git +import qualified Git.Config +import qualified Git.Construct +import Annex.Fixup +import Git.HashObject +import Git.CheckAttr +import Git.CheckIgnore +import qualified Git.Hook +import qualified Git.Queue +import Types.Key +import Types.Backend +import Types.GitConfig +import qualified Types.Remote +import Types.Crypto +import Types.BranchState +import Types.TrustLevel +import Types.Group +import Types.Messages +import Types.Concurrency +import Types.UUID +import Types.FileMatcher +import Types.NumCopies +import Types.LockCache +import Types.DesktopNotify +import Types.CleanupActions +import Types.AdjustedBranch +import Types.WorkerPool +import Types.IndexFiles +import Types.CatFileHandles +import Types.RemoteConfig +import Types.TransferrerPool +import Types.VectorClock +import Types.Cluster +import Annex.VectorClock.Utility +import Annex.Debug.Utility +import qualified Database.Keys.Handle as Keys +import Utility.InodeCache +import Utility.Url +import Utility.ResourcePool +import Utility.HumanTime +import Git.Credential (CredentialCache(..)) + +import "mtl" Control.Monad.Reader +import Control.Concurrent +import Control.Concurrent.STM +import qualified Control.Monad.Fail as Fail +import qualified Data.Map.Strict as M +import qualified Data.Set as S +import Data.Time.Clock.POSIX + +{- git-annex's monad is a ReaderT around an AnnexState stored in a MVar, + - and an AnnexRead. The MVar is not exposed outside this module. + - + - Note that when an Annex action fails and the exception is caught, + - any changes the action has made to the AnnexState are retained, + - due to the use of the MVar to store the state. + -} +newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState, AnnexRead) IO a } + deriving ( + Monad, + MonadIO, + MonadReader (MVar AnnexState, AnnexRead), + MonadCatch, + MonadThrow, + MonadMask, + Fail.MonadFail, + Functor, + Applicative, + Alternative + ) + +-- Values that can be read, but not modified by an Annex action. +data AnnexRead = AnnexRead + { branchstate :: MVar BranchState + , activekeys :: TVar (M.Map Key ThreadId) + , activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer) + , keysdbhandle :: Keys.DbHandle + , sshstalecleaned :: TMVar Bool + , signalactions :: TVar (M.Map SignalAction (Int -> IO ())) + , transferrerpool :: TransferrerPool + , debugenabled :: Bool + , debugselector :: DebugSelector + , explainenabled :: Bool + , ciphers :: TMVar (M.Map StorableCipher Cipher) + , fast :: Bool + , force :: Bool + , forcenumcopies :: Maybe NumCopies + , forcemincopies :: Maybe MinCopies + , forcebackend :: Maybe String + , useragent :: Maybe String + , desktopnotify :: DesktopNotify + , gitcredentialcache :: TMVar CredentialCache + } + +newAnnexRead :: GitConfig -> IO AnnexRead +newAnnexRead c = do + bs <- newMVar startBranchState + emptyactivekeys <- newTVarIO M.empty + emptyactiveremotes <- newMVar M.empty + kh <- Keys.newDbHandle + sc <- newTMVarIO False + si <- newTVarIO M.empty + tp <- newTransferrerPool + cm <- newTMVarIO M.empty + cc <- newTMVarIO (CredentialCache M.empty) + return $ AnnexRead + { branchstate = bs + , activekeys = emptyactivekeys + , activeremotes = emptyactiveremotes + , keysdbhandle = kh + , sshstalecleaned = sc + , signalactions = si + , transferrerpool = tp + , debugenabled = annexDebug c + , debugselector = debugSelectorFromGitConfig c + , explainenabled = False + , ciphers = cm + , fast = False + , force = False + , forcebackend = Nothing + , forcenumcopies = Nothing + , forcemincopies = Nothing + , useragent = Nothing + , desktopnotify = mempty + , gitcredentialcache = cc + } + +-- Values that can change while running an Annex action. +data AnnexState = AnnexState + { repo :: Git.Repo + , repoadjustment :: (Git.Repo -> IO Git.Repo) + , gitconfig :: GitConfig + , gitconfigadjustment :: (GitConfig -> GitConfig) + , gitconfigoverride :: [String] + , gitremotes :: Maybe [Git.Repo] + , gitconfiginodecache :: Maybe InodeCache + , backend :: Maybe (BackendA Annex) + , remotes :: [Types.Remote.RemoteA Annex] + , output :: MessageState + , concurrency :: ConcurrencySetting + , daemon :: Bool + , repoqueue :: Maybe (Git.Queue.Queue Annex) + , catfilehandles :: CatFileHandles + , hashobjecthandle :: Maybe (ResourcePool HashObjectHandle) + , checkattrhandle :: Maybe (ResourcePool CheckAttrHandle) + , checkignorehandle :: Maybe (ResourcePool CheckIgnoreHandle) + , globalnumcopies :: Maybe (Maybe NumCopies) + , globalmincopies :: Maybe (Maybe MinCopies) + , limit :: ExpandableMatcher Annex + , timelimit :: Maybe (Duration, POSIXTime) + , sizelimit :: Maybe (TVar Integer) + , uuiddescmap :: Maybe UUIDDescMap + , preferredcontentmap :: Maybe (FileMatcherMap Annex) + , requiredcontentmap :: Maybe (FileMatcherMap Annex) + , remoteconfigmap :: Maybe (M.Map UUID RemoteConfig) + , clusters :: Maybe (Annex Clusters) + , forcetrust :: TrustMap + , trustmap :: Maybe TrustMap + , groupmap :: Maybe GroupMap + , lockcache :: LockCache + , fields :: M.Map String String + , cleanupactions :: M.Map CleanupAction (Annex ()) + , sentinalstatus :: Maybe SentinalStatus + , errcounter :: Integer + , reachedlimit :: Bool + , adjustedbranchrefreshcounter :: Integer + , unusedkeys :: Maybe (S.Set Key) + , tempurls :: M.Map Key URLString + , existinghooks :: M.Map Git.Hook.Hook Bool + , workers :: Maybe (TMVar (WorkerPool (AnnexState, AnnexRead))) + , cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment)) + , cachedgitenv :: Maybe (AltIndexFile, FilePath, [(String, String)]) + , urloptions :: Maybe UrlOptions + , insmudgecleanfilter :: Bool + , getvectorclock :: IO CandidateVectorClock + , proxyremote :: Maybe (Either ClusterUUID (Types.Remote.RemoteA Annex)) + } + +newAnnexState :: GitConfig -> Git.Repo -> IO AnnexState +newAnnexState c r = do + o <- newMessageState + vc <- startVectorClock + return $ AnnexState + { repo = r + , repoadjustment = return + , gitconfig = c + , gitconfigadjustment = id + , gitconfigoverride = [] + , gitremotes = Nothing + , gitconfiginodecache = Nothing + , backend = Nothing + , remotes = [] + , output = o + , concurrency = ConcurrencyCmdLine NonConcurrent + , daemon = False + , repoqueue = Nothing + , catfilehandles = catFileHandlesNonConcurrent + , hashobjecthandle = Nothing + , checkattrhandle = Nothing + , checkignorehandle = Nothing + , globalnumcopies = Nothing + , globalmincopies = Nothing + , limit = BuildingMatcher [] + , timelimit = Nothing + , sizelimit = Nothing + , uuiddescmap = Nothing + , preferredcontentmap = Nothing + , requiredcontentmap = Nothing + , remoteconfigmap = Nothing + , clusters = Nothing + , forcetrust = M.empty + , trustmap = Nothing + , groupmap = Nothing + , lockcache = M.empty + , fields = M.empty + , cleanupactions = M.empty + , sentinalstatus = Nothing + , errcounter = 0 + , reachedlimit = False + , adjustedbranchrefreshcounter = 0 + , unusedkeys = Nothing + , tempurls = M.empty + , existinghooks = M.empty + , workers = Nothing + , cachedcurrentbranch = Nothing + , cachedgitenv = Nothing + , urloptions = Nothing + , insmudgecleanfilter = False + , getvectorclock = vc + , proxyremote = Nothing + } + +{- Makes an Annex state object for the specified git repo. + - Ensures the config is read, if it was not already, and performs + - any necessary git repo fixups. -} +new :: Git.Repo -> IO (AnnexState, AnnexRead) +new r = do + r' <- Git.Config.read r + let c = extractGitConfig FromGitConfig r' + st <- newAnnexState c =<< fixupRepo r' c + rd <- newAnnexRead c + return (st, rd) + +{- Performs an action in the Annex monad from a starting state, + - returning a new state. -} +run :: (AnnexState, AnnexRead) -> Annex a -> IO (a, (AnnexState, AnnexRead)) +run (st, rd) a = do + mv <- newMVar st + run' mv rd a + +run' :: MVar AnnexState -> AnnexRead -> Annex a -> IO (a, (AnnexState, AnnexRead)) +run' mvar rd a = do + r <- runReaderT (runAnnex a) (mvar, rd) + st <- takeMVar mvar + return (r, (st, rd)) + +{- Performs an action in the Annex monad from a starting state, + - and throws away the changed state. -} +eval :: (AnnexState, AnnexRead) -> Annex a -> IO a +eval v a = fst <$> run v a + +{- Makes a runner action, that allows diving into IO and from inside + - the IO action, running an Annex action. -} +makeRunner :: Annex (Annex a -> IO a) +makeRunner = do + (mvar, rd) <- ask + return $ \a -> do + (r, (s, _rd)) <- run' mvar rd a + putMVar mvar s + return r + +getRead :: (AnnexRead -> v) -> Annex v +getRead selector = selector . snd <$> ask + +getState :: (AnnexState -> v) -> Annex v +getState selector = do + mvar <- fst <$> ask + st <- liftIO $ readMVar mvar + return $ selector st + +changeState :: (AnnexState -> AnnexState) -> Annex () +changeState modifier = do + mvar <- fst <$> ask + liftIO $ modifyMVar_ mvar $ return . modifier + +withState :: (AnnexState -> IO (AnnexState, b)) -> Annex b +withState modifier = do + mvar <- fst <$> ask + liftIO $ modifyMVar mvar modifier + +{- Sets a field to a value -} +setField :: String -> String -> Annex () +setField field value = changeState $ \st -> + st { fields = M.insert field value $ fields st } + +{- Adds a cleanup action to perform. -} +addCleanupAction :: CleanupAction -> Annex () -> Annex () +addCleanupAction k a = changeState $ \st -> + st { cleanupactions = M.insert k a $ cleanupactions st } + +{- Sets the type of output to emit. -} +setOutput :: OutputType -> Annex () +setOutput o = changeState $ \st -> + let m = output st + in st { output = m { outputType = adjustOutputType (outputType m) o } } + +{- Gets the value of a field. -} +getField :: String -> Annex (Maybe String) +getField field = M.lookup field <$> getState fields + +{- Returns the annex's git repository. -} +gitRepo :: Annex Git.Repo +gitRepo = getState repo + +{- Runs an IO action in the annex's git repository. -} +inRepo :: (Git.Repo -> IO a) -> Annex a +inRepo a = liftIO . a =<< gitRepo + +{- Extracts a value from the annex's git repisitory. -} +fromRepo :: (Git.Repo -> a) -> Annex a +fromRepo a = a <$> gitRepo + +{- Calculates a value from an annex's git repository and its GitConfig. -} +calcRepo :: (Git.Repo -> GitConfig -> IO a) -> Annex a +calcRepo a = do + s <- getState id + liftIO $ a (repo s) (gitconfig s) + +calcRepo' :: (Git.Repo -> GitConfig -> a) -> Annex a +calcRepo' f = do + s <- getState id + pure $ f (repo s) (gitconfig s) + +{- Gets the GitConfig settings. -} +getGitConfig :: Annex GitConfig +getGitConfig = getState gitconfig + +{- Overrides a GitConfig setting. The modification persists across + - reloads of the repo's config. -} +overrideGitConfig :: (GitConfig -> GitConfig) -> Annex () +overrideGitConfig f = changeState $ \st -> st + { gitconfigadjustment = gitconfigadjustment st . f + , gitconfig = f (gitconfig st) + } + +{- Adds an adjustment to the Repo data. Adjustments persist across reloads + - of the repo's config. + - + - Note that the action may run more than once, and should avoid eg, + - appending the same value to a repo's config when run repeatedly. + -} +adjustGitRepo :: (Git.Repo -> IO Git.Repo) -> Annex () +adjustGitRepo a = do + changeState $ \st -> st { repoadjustment = \r -> repoadjustment st r >>= a } + changeGitRepo =<< gitRepo + +{- Adds git config setting, like "foo=bar". It will be passed with -c + - to git processes. The config setting is also recorded in the Repo, + - and the GitConfig is updated. -} +addGitConfigOverride :: String -> Annex () +addGitConfigOverride v = do + adjustGitRepo $ \r -> + Git.Config.store (encodeBS v) Git.Config.ConfigList $ + r { Git.gitGlobalOpts = go (Git.gitGlobalOpts r) } + changeState $ \st -> st { gitconfigoverride = v : gitconfigoverride st } + where + -- Remove any prior occurrence of the setting to avoid + -- building up many of them when the adjustment is run repeatedly, + -- and add the setting to the end. + go [] = [Param "-c", Param v] + go (Param "-c": Param v':rest) | v' == v = go rest + go (c:rest) = c : go rest + +{- Values that were passed to addGitConfigOverride. -} +getGitConfigOverrides :: Annex [String] +getGitConfigOverrides = reverse <$> getState gitconfigoverride + +{- Changing the git Repo data also involves re-extracting its GitConfig. -} +changeGitRepo :: Git.Repo -> Annex () +changeGitRepo r = do + repoadjuster <- getState repoadjustment + gitconfigadjuster <- getState gitconfigadjustment + r' <- liftIO $ repoadjuster r + changeState $ \st -> st + { repo = r' + , gitconfig = gitconfigadjuster $ + extractGitConfig FromGitConfig r' + , gitremotes = Nothing + } + +{- Gets the RemoteGitConfig from a remote, given the Git.Repo for that + - remote. -} +getRemoteGitConfig :: Git.Repo -> Annex RemoteGitConfig +getRemoteGitConfig r = do + g <- gitRepo + liftIO $ atomically $ extractRemoteGitConfig g (Git.repoDescribe r) + +{- Converts an Annex action into an IO action, that runs with a copy + - of the current Annex state. + - + - Use with caution; the action should not rely on changing the + - state, as it will be thrown away. -} +withCurrentState :: Annex a -> Annex (IO a) +withCurrentState a = do + (mvar, rd) <- ask + st <- liftIO $ readMVar mvar + return $ eval (st, rd) a + +{- It's not safe to use setCurrentDirectory in the Annex monad, + - because the git repo paths are stored relative. + - Instead, use this. + -} +changeDirectory :: FilePath -> Annex () +changeDirectory d = do + r <- liftIO . Git.adjustPath absPath =<< gitRepo + liftIO $ setCurrentDirectory d + r' <- liftIO $ Git.relPath r + changeState $ \st -> st { repo = r' } + +incError :: Annex () +incError = changeState $ \st -> + let !c = errcounter st + 1 + !st' = st { errcounter = c } + in st' + +getGitRemotes :: Annex [Git.Repo] +getGitRemotes = do + st <- getState id + case gitremotes st of + Just rs -> return rs + Nothing -> do + rs <- liftIO $ Git.Construct.fromRemotes (repo st) + changeState $ \st' -> st' { gitremotes = Just rs } + return rs diff --git a/Annex/Action.hs b/Annex/Action.hs new file mode 100644 index 0000000000..69b92f8240 --- /dev/null +++ b/Annex/Action.hs @@ -0,0 +1,69 @@ +{- git-annex actions + - + - Copyright 2010-2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Action ( + action, + verifiedAction, + quiesce, + stopCoProcesses, +) where + +import qualified Data.Map as M + +import Annex.Common +import qualified Annex +import Annex.Content +import Annex.CatFile +import Annex.CheckAttr +import Annex.HashObject +import Annex.CheckIgnore +import Annex.TransferrerPool +import qualified Database.Keys + +{- Runs an action that may throw exceptions, catching and displaying them. -} +action :: Annex () -> Annex Bool +action a = tryNonAsync a >>= \case + Right () -> return True + Left e -> do + warning (UnquotedString (show e)) + return False + +verifiedAction :: Annex Verification -> Annex (Bool, Verification) +verifiedAction a = tryNonAsync a >>= \case + Right v -> return (True, v) + Left e -> do + warning (UnquotedString (show e)) + return (False, UnVerified) + +{- Rn all cleanup actions, save all state, stop all long-running child + - processes. + - + - This can be run repeatedly with other Annex actions run in between, + - but usually it is run only once at the end. + - + - When passed True, avoids making any commits to the git-annex branch, + - leaving changes in the journal for later commit. + -} +quiesce :: Bool -> Annex () +quiesce nocommit = do + cas <- Annex.withState $ \st -> return + ( st { Annex.cleanupactions = mempty } + , Annex.cleanupactions st + ) + sequence_ (M.elems cas) + saveState nocommit + stopCoProcesses + Database.Keys.closeDb + +{- Stops all long-running child processes, including git query processes. -} +stopCoProcesses :: Annex () +stopCoProcesses = do + catFileStop + checkAttrStop + hashObjectStop + checkIgnoreStop + emptyTransferrerPool diff --git a/Annex/AdjustedBranch.hs b/Annex/AdjustedBranch.hs new file mode 100644 index 0000000000..4ce101d8f9 --- /dev/null +++ b/Annex/AdjustedBranch.hs @@ -0,0 +1,688 @@ +{- adjusted branch + - + - Copyright 2016-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns, OverloadedStrings #-} + +module Annex.AdjustedBranch ( + Adjustment(..), + LinkAdjustment(..), + PresenceAdjustment(..), + LinkPresentAdjustment(..), + adjustmentHidesFiles, + adjustmentIsStable, + OrigBranch, + AdjBranch(..), + originalToAdjusted, + adjustedToOriginal, + fromAdjustedBranch, + getAdjustment, + enterAdjustedBranch, + adjustedBranchRefresh, + adjustedBranchRefreshFull, + adjustBranch, + adjustTree, + adjustToCrippledFileSystem, + commitForAdjustedBranch, + propigateAdjustedCommits, + propigateAdjustedCommits', + commitAdjustedTree, + commitAdjustedTree', + BasisBranch(..), + basisBranch, + setBasisBranch, + preventCommits, + AdjustedClone(..), + checkAdjustedClone, + checkVersionSupported, + isGitVersionSupported, +) where + +import Annex.Common +import Types.AdjustedBranch +import Annex.AdjustedBranch.Name +import qualified Annex +import Git +import Git.Types +import qualified Git.Branch +import qualified Git.Ref +import qualified Git.Command +import qualified Git.Tree +import qualified Git.DiffTree +import Git.Tree (TreeItem(..)) +import Git.Sha +import Git.Env +import Git.Index +import Git.FilePath +import qualified Git.LockFile +import qualified Git.Version +import Annex.CatFile +import Annex.Link +import Annex.Content.Presence +import Annex.CurrentBranch +import Types.CleanupActions +import qualified Database.Keys +import Config +import Logs.View (is_branchView) +import Logs.AdjustedBranchUpdate +import Utility.FileMode +import qualified Utility.RawFilePath as R + +import Data.Time.Clock.POSIX +import qualified Data.Map as M +import System.PosixCompat.Files (fileMode) + +class AdjustTreeItem t where + -- How to perform various adjustments to a TreeItem. + adjustTreeItem :: t -> TreeItem -> Annex (Maybe TreeItem) + -- Will adjusting a given tree always yield the same adjusted tree? + adjustmentIsStable :: t -> Bool + +instance AdjustTreeItem Adjustment where + adjustTreeItem (LinkAdjustment l) t = adjustTreeItem l t + adjustTreeItem (PresenceAdjustment p Nothing) t = adjustTreeItem p t + adjustTreeItem (PresenceAdjustment p (Just l)) t = + adjustTreeItem p t >>= \case + Nothing -> return Nothing + Just t' -> adjustTreeItem l t' + adjustTreeItem (LinkPresentAdjustment l) t = adjustTreeItem l t + + adjustmentIsStable (LinkAdjustment l) = adjustmentIsStable l + adjustmentIsStable (PresenceAdjustment p _) = adjustmentIsStable p + adjustmentIsStable (LinkPresentAdjustment l) = adjustmentIsStable l + +instance AdjustTreeItem LinkAdjustment where + adjustTreeItem UnlockAdjustment = + ifSymlink adjustToPointer noAdjust + adjustTreeItem LockAdjustment = + ifSymlink noAdjust adjustToSymlink + adjustTreeItem FixAdjustment = + ifSymlink adjustToSymlink noAdjust + adjustTreeItem UnFixAdjustment = + ifSymlink (adjustToSymlink' gitAnnexLinkCanonical) noAdjust + + adjustmentIsStable _ = True + +instance AdjustTreeItem PresenceAdjustment where + adjustTreeItem HideMissingAdjustment = + ifPresent noAdjust hideAdjust + adjustTreeItem ShowMissingAdjustment = + noAdjust + + adjustmentIsStable HideMissingAdjustment = False + adjustmentIsStable ShowMissingAdjustment = True + +instance AdjustTreeItem LinkPresentAdjustment where + adjustTreeItem UnlockPresentAdjustment = + ifPresent adjustToPointer adjustToSymlink + adjustTreeItem LockPresentAdjustment = + -- Turn all pointers back to symlinks, whether the content + -- is present or not. This is done because the content + -- availability may have changed and the branch not been + -- re-adjusted to keep up, so there may be pointers whose + -- content is not present. + ifSymlink noAdjust adjustToSymlink + + adjustmentIsStable UnlockPresentAdjustment = False + adjustmentIsStable LockPresentAdjustment = True + +ifSymlink + :: (TreeItem -> Annex a) + -> (TreeItem -> Annex a) + -> TreeItem + -> Annex a +ifSymlink issymlink notsymlink ti@(TreeItem _f m _s) + | toTreeItemType m == Just TreeSymlink = issymlink ti + | otherwise = notsymlink ti + +ifPresent + :: (TreeItem -> Annex (Maybe TreeItem)) + -> (TreeItem -> Annex (Maybe TreeItem)) + -> TreeItem + -> Annex (Maybe TreeItem) +ifPresent ispresent notpresent ti@(TreeItem _ _ s) = + catKey s >>= \case + Just k -> ifM (inAnnex k) (ispresent ti, notpresent ti) + Nothing -> return (Just ti) + +noAdjust :: TreeItem -> Annex (Maybe TreeItem) +noAdjust = return . Just + +hideAdjust :: TreeItem -> Annex (Maybe TreeItem) +hideAdjust _ = return Nothing + +adjustToPointer :: TreeItem -> Annex (Maybe TreeItem) +adjustToPointer ti@(TreeItem f _m s) = catKey s >>= \case + Just k -> do + Database.Keys.addAssociatedFile k f + exe <- catchDefaultIO False $ + (isExecutable . fileMode) <$> + (liftIO . R.getFileStatus + =<< calcRepo (gitAnnexLocation k)) + let mode = fromTreeItemType $ + if exe then TreeExecutable else TreeFile + Just . TreeItem f mode <$> hashPointerFile k + Nothing -> return (Just ti) + +adjustToSymlink :: TreeItem -> Annex (Maybe TreeItem) +adjustToSymlink = adjustToSymlink' gitAnnexLink + +adjustToSymlink' :: (RawFilePath -> Key -> Git.Repo -> GitConfig -> IO RawFilePath) -> TreeItem -> Annex (Maybe TreeItem) +adjustToSymlink' gitannexlink ti@(TreeItem f _m s) = catKey s >>= \case + Just k -> do + absf <- inRepo $ \r -> absPath $ fromTopFilePath f r + linktarget <- calcRepo $ gitannexlink absf k + Just . TreeItem f (fromTreeItemType TreeSymlink) + <$> hashSymlink linktarget + Nothing -> return (Just ti) + +-- This is a hidden branch ref, that's used as the basis for the AdjBranch, +-- since pushes can overwrite the OrigBranch at any time. So, changes +-- are propagated from the AdjBranch to the head of the BasisBranch. +newtype BasisBranch = BasisBranch Ref + +-- The basis for refs/heads/adjusted/master(unlocked) is +-- refs/basis/adjusted/master(unlocked). +basisBranch :: AdjBranch -> BasisBranch +basisBranch (AdjBranch adjbranch) = BasisBranch $ + Ref ("refs/basis/" <> fromRef' (Git.Ref.base adjbranch)) + +getAdjustment :: Branch -> Maybe Adjustment +getAdjustment = fmap fst . adjustedToOriginal + +fromAdjustedBranch :: Branch -> OrigBranch +fromAdjustedBranch b = maybe b snd (adjustedToOriginal b) + +{- Enter an adjusted version of current branch (or, if already in an + - adjusted version of a branch, changes the adjustment of the original + - branch). + - + - Can fail, if no branch is checked out, or if the adjusted branch already + - exists, or if staged changes prevent a checkout. + -} +enterAdjustedBranch :: Adjustment -> Annex Bool +enterAdjustedBranch adj = inRepo Git.Branch.current >>= \case + Just currbranch -> case getAdjustment currbranch of + Just curradj | curradj == adj -> + updateAdjustedBranch adj (AdjBranch currbranch) + (fromAdjustedBranch currbranch) + _ -> go currbranch + Nothing -> do + warning "not on any branch!" + return False + where + go currbranch = do + let origbranch = fromAdjustedBranch currbranch + let adjbranch = adjBranch $ originalToAdjusted origbranch adj + ifM (inRepo (Git.Ref.exists adjbranch) <&&> (not <$> Annex.getRead Annex.force) <&&> pure (not (is_branchView origbranch))) + ( do + mapM_ (warning . UnquotedString . unwords) + [ [ "adjusted branch" + , Git.Ref.describe adjbranch + , "already exists." + ] + , [ "Aborting because that branch may have changes that have not yet reached" + , Git.Ref.describe origbranch + ] + , [ "You can check out the adjusted branch manually to enter it," + , "or add the --force option to overwrite the old branch." + ] + ] + return False + , do + starttime <- liftIO getPOSIXTime + b <- preventCommits $ const $ + adjustBranch adj origbranch + ok <- checkoutAdjustedBranch b False + when ok $ + recordAdjustedBranchUpdateFinished starttime + return ok + ) + +checkoutAdjustedBranch :: AdjBranch -> Bool -> Annex Bool +checkoutAdjustedBranch (AdjBranch b) quietcheckout = do + -- checkout can have output in large repos + unless quietcheckout + showOutput + inRepo $ Git.Command.runBool $ + [ Param "checkout" + , Param $ fromRef $ Git.Ref.base b + , if quietcheckout then Param "--quiet" else Param "--progress" + ] + +{- Already in a branch with this adjustment, but the user asked to enter it + - again. This should have the same result as propagating any commits + - back to the original branch, checking out the original branch, deleting + - and rebuilding the adjusted branch, and then checking it out. + - But, it can be implemented more efficiently than that. + -} +updateAdjustedBranch :: Adjustment -> AdjBranch -> OrigBranch -> Annex Bool +updateAdjustedBranch adj (AdjBranch currbranch) origbranch + | not (adjustmentIsStable adj) = do + (b, origheadfile, newheadfile) <- preventCommits $ \commitlck -> do + -- Avoid losing any commits that the adjusted branch + -- has that have not yet been propagated back to the + -- origbranch. + _ <- propigateAdjustedCommits' True origbranch adj commitlck + + origheadfile <- inRepo $ readFileStrict . Git.Ref.headFile + origheadsha <- inRepo (Git.Ref.sha currbranch) + + b <- adjustBranch adj origbranch + + -- Git normally won't do anything when asked to check + -- out the currently checked out branch, even when its + -- ref has changed. Work around this by writing a raw + -- sha to .git/HEAD. + newheadfile <- case origheadsha of + Just s -> do + inRepo $ \r -> do + let newheadfile = fromRef s + writeFile (Git.Ref.headFile r) newheadfile + return (Just newheadfile) + _ -> return Nothing + + return (b, origheadfile, newheadfile) + + -- Make git checkout quiet to avoid warnings about + -- disconnected branch tips being lost. + ok <- checkoutAdjustedBranch b True + + -- Avoid leaving repo with detached head. + unless ok $ case newheadfile of + Nothing -> noop + Just v -> preventCommits $ \_commitlck -> inRepo $ \r -> do + v' <- readFileStrict (Git.Ref.headFile r) + when (v == v') $ + writeFile (Git.Ref.headFile r) origheadfile + + return ok + | otherwise = preventCommits $ \commitlck -> do + -- Done for consistency. + _ <- propigateAdjustedCommits' True origbranch adj commitlck + -- No need to actually update the branch because the + -- adjustment is stable. + return True + +{- Passed an action that, if it succeeds may get or drop the Key associated + - with the file. When the adjusted branch needs to be refreshed to reflect + - those changes, it's handled here. + - + - Note that the AssociatedFile must be verified by this to point to the + - Key. In some cases, the value was provided by the user and might not + - really be an associated file. + -} +adjustedBranchRefresh :: AssociatedFile -> Annex a -> Annex a +adjustedBranchRefresh _af a = do + r <- a + go + return r + where + go = getCurrentBranch >>= \case + (Just origbranch, Just adj) -> + unless (adjustmentIsStable adj) $ do + recordAdjustedBranchUpdateNeeded + n <- annexAdjustedBranchRefresh <$> Annex.getGitConfig + unless (n == 0) $ ifM (checkcounter n) + -- This is slow, it would be better to incrementally + -- adjust the AssociatedFile, and only call this once + -- at shutdown to handle cases where not all + -- AssociatedFiles are known. + ( adjustedBranchRefreshFull' adj origbranch + , Annex.addCleanupAction AdjustedBranchUpdate $ + adjustedBranchRefreshFull' adj origbranch + ) + _ -> return () + + checkcounter n + -- Special case, 1 (or true) refreshes only at shutdown. + | n == 1 = pure False + | otherwise = Annex.withState $ \s -> + let !c = Annex.adjustedbranchrefreshcounter s + 1 + !enough = c >= pred n + !c' = if enough then 0 else c + !s' = s { Annex.adjustedbranchrefreshcounter = c' } + in pure (s', enough) + +{- Slow, but more dependable version of adjustedBranchRefresh that + - does not rely on all AssociatedFiles being known. -} +adjustedBranchRefreshFull :: Adjustment -> OrigBranch -> Annex () +adjustedBranchRefreshFull adj origbranch = + whenM isAdjustedBranchUpdateNeeded $ do + adjustedBranchRefreshFull' adj origbranch + +adjustedBranchRefreshFull' :: Adjustment -> OrigBranch -> Annex () +adjustedBranchRefreshFull' adj origbranch = do + -- Restage pointer files so modifications to them due to get/drop + -- do not prevent checking out the updated adjusted branch. + restagePointerFiles =<< Annex.gitRepo + starttime <- liftIO getPOSIXTime + let adjbranch = originalToAdjusted origbranch adj + ifM (updateAdjustedBranch adj adjbranch origbranch) + ( recordAdjustedBranchUpdateFinished starttime + , warning "Updating adjusted branch failed." + ) + +adjustToCrippledFileSystem :: Annex () +adjustToCrippledFileSystem = do + warning "Entering an adjusted branch where files are unlocked as this filesystem does not support locked files." + checkVersionSupported + whenM (isNothing <$> inRepo Git.Branch.current) $ + commitForAdjustedBranch [] + inRepo Git.Branch.current >>= \case + Just currbranch -> case getAdjustment currbranch of + Just curradj | curradj == adj -> return () + _ -> do + let adjbranch = originalToAdjusted currbranch adj + ifM (inRepo (Git.Ref.exists $ adjBranch adjbranch)) + ( unlessM (checkoutAdjustedBranch adjbranch False) $ + failedenter + , unlessM (enterAdjustedBranch adj) $ + failedenter + ) + Nothing -> failedenter + where + adj = LinkAdjustment UnlockAdjustment + failedenter = warning "Failed to enter adjusted branch!" + +{- Commit before entering adjusted branch. Only needs to be done + - when the current branch does not have any commits yet. + - + - If something is already staged, it will be committed, but otherwise + - an empty commit will be made. + -} +commitForAdjustedBranch :: [CommandParam] -> Annex () +commitForAdjustedBranch ps = do + cmode <- annexCommitMode <$> Annex.getGitConfig + let cquiet = Git.Branch.CommitQuiet True + void $ inRepo $ Git.Branch.commitCommand cmode cquiet $ + [ Param "--allow-empty" + , Param "-m" + , Param "commit before entering adjusted branch" + ] ++ ps + +setBasisBranch :: BasisBranch -> Ref -> Annex () +setBasisBranch (BasisBranch basis) new = + inRepo $ Git.Branch.update' basis new + +setAdjustedBranch :: String -> AdjBranch -> Ref -> Annex () +setAdjustedBranch msg (AdjBranch b) r = inRepo $ Git.Branch.update msg b r + +adjustBranch :: Adjustment -> OrigBranch -> Annex AdjBranch +adjustBranch adj origbranch = do + -- Start basis off with the current value of the origbranch. + setBasisBranch basis origbranch + sha <- adjustCommit adj basis + setAdjustedBranch "entering adjusted branch" adjbranch sha + return adjbranch + where + adjbranch = originalToAdjusted origbranch adj + basis = basisBranch adjbranch + +adjustCommit :: Adjustment -> BasisBranch -> Annex Sha +adjustCommit adj basis = do + treesha <- adjustTree adj basis + commitAdjustedTree treesha basis + +adjustTree :: Adjustment -> BasisBranch -> Annex Sha +adjustTree adj (BasisBranch basis) = do + let toadj = adjustTreeItem adj + treesha <- Git.Tree.adjustTree + toadj + [] + (\_old new -> new) + [] + basis =<< Annex.gitRepo + return treesha + +type CommitsPrevented = Git.LockFile.LockHandle + +{- Locks git's index file, preventing git from making a commit, merge, + - or otherwise changing the HEAD ref while the action is run. + - + - Throws an IO exception if the index file is already locked. + -} +preventCommits :: (CommitsPrevented -> Annex a) -> Annex a +preventCommits = bracket setup cleanup + where + setup = do + lck <- fromRepo $ indexFileLock . indexFile + liftIO $ Git.LockFile.openLock (fromRawFilePath lck) + cleanup = liftIO . Git.LockFile.closeLock + +{- Commits a given adjusted tree, with the provided parent ref. + - + - This should always yield the same value, even if performed in different + - clones of a repo, at different times. The commit message and other + - metadata is based on the parent. + -} +commitAdjustedTree :: Sha -> BasisBranch -> Annex Sha +commitAdjustedTree treesha parent@(BasisBranch b) = + commitAdjustedTree' treesha parent [b] + +commitAdjustedTree' :: Sha -> BasisBranch -> [Ref] -> Annex Sha +commitAdjustedTree' treesha (BasisBranch basis) parents = + go =<< catCommit basis + where + go Nothing = do + cmode <- annexCommitMode <$> Annex.getGitConfig + inRepo $ mkcommit cmode + go (Just basiscommit) = do + cmode <- annexCommitMode <$> Annex.getGitConfig + inRepo $ commitWithMetaData + (commitAuthorMetaData basiscommit) + (commitCommitterMetaData basiscommit) + (mkcommit cmode) + -- Make sure that the exact message is used in the commit, + -- since that message is looked for later. + -- After git-annex 10.20240227, it's possible to use + -- commitTree instead of this, but this is being kept + -- for some time, for compatibility with older versions. + mkcommit cmode = Git.Branch.commitTreeExactMessage cmode + adjustedBranchCommitMessage parents treesha + +{- This message should never be changed. -} +adjustedBranchCommitMessage :: String +adjustedBranchCommitMessage = "git-annex adjusted branch" + +{- Allow for a trailing newline after the message. -} +hasAdjustedBranchCommitMessage :: Commit -> Bool +hasAdjustedBranchCommitMessage c = + dropWhileEnd (\x -> x == '\n' || x == '\r') (commitMessage c) + == adjustedBranchCommitMessage + +findAdjustingCommit :: AdjBranch -> Annex (Maybe Commit) +findAdjustingCommit (AdjBranch b) = go =<< catCommit b + where + go Nothing = return Nothing + go (Just c) + | hasAdjustedBranchCommitMessage c = return (Just c) + | otherwise = case commitParent c of + [p] -> go =<< catCommit p + _ -> return Nothing + +{- Check for any commits present on the adjusted branch that have not yet + - been propagated to the basis branch, and propagate them to the basis + - branch and from there on to the orig branch. + - + - After propagating the commits back to the basis branch, + - rebase the adjusted branch on top of the updated basis branch. + -} +propigateAdjustedCommits :: OrigBranch -> Adjustment -> Annex () +propigateAdjustedCommits origbranch adj = + preventCommits $ \commitsprevented -> + join $ snd <$> propigateAdjustedCommits' True origbranch adj commitsprevented + +{- Returns sha of updated basis branch, and action which will rebase + - the adjusted branch on top of the updated basis branch. -} +propigateAdjustedCommits' + :: Bool + -> OrigBranch + -> Adjustment + -> CommitsPrevented + -> Annex (Maybe Sha, Annex ()) +propigateAdjustedCommits' warnwhendiverged origbranch adj _commitsprevented = + inRepo (Git.Ref.sha basis) >>= \case + Just origsha -> catCommit currbranch >>= \case + Just currcommit -> + newcommits >>= go origsha origsha False >>= \case + Left e -> do + warning (UnquotedString e) + return (Nothing, return ()) + Right newparent -> return + ( Just newparent + , rebase currcommit newparent + ) + Nothing -> return (Nothing, return ()) + Nothing -> do + warning $ UnquotedString $ + "Cannot find basis ref " ++ fromRef basis ++ "; not propagating adjusted commits to original branch " ++ fromRef origbranch + return (Nothing, return ()) + where + (BasisBranch basis) = basisBranch adjbranch + adjbranch@(AdjBranch currbranch) = originalToAdjusted origbranch adj + newcommits = inRepo $ Git.Branch.changedCommits basis currbranch + -- Get commits oldest first, so they can be processed + -- in order made. + [Param "--reverse"] + go origsha parent _ [] = do + setBasisBranch (BasisBranch basis) parent + inRepo (Git.Ref.sha origbranch) >>= \case + Just origbranchsha | origbranchsha /= origsha -> + when warnwhendiverged $ + warning $ UnquotedString $ + "Original branch " ++ fromRef origbranch ++ " has diverged from current adjusted branch " ++ fromRef currbranch + _ -> inRepo $ Git.Branch.update' origbranch parent + return (Right parent) + go origsha parent pastadjcommit (sha:l) = catCommit sha >>= \case + Just c + | hasAdjustedBranchCommitMessage c -> + go origsha parent True l + | pastadjcommit -> + reverseAdjustedCommit parent adj (sha, c) origbranch + >>= \case + Left e -> return (Left e) + Right commit -> go origsha commit pastadjcommit l + _ -> go origsha parent pastadjcommit l + rebase currcommit newparent = do + -- Reuse the current adjusted tree, and reparent it + -- on top of the newparent. + commitAdjustedTree (commitTree currcommit) (BasisBranch newparent) + >>= inRepo . Git.Branch.update rebaseOnTopMsg currbranch + +rebaseOnTopMsg :: String +rebaseOnTopMsg = "rebasing adjusted branch on top of updated original branch" + +{- Reverses an adjusted commit, and commit with provided commitparent, + - yielding a commit sha. + - + - Adjusts the tree of the commitparent, changing only the files that the + - commit changed, and reverse adjusting those changes. + - + - The commit message, and the author and committer metadata are + - copied over from the basiscommit. However, any gpg signature + - will be lost, and any other headers are not copied either. -} +reverseAdjustedCommit :: Sha -> Adjustment -> (Sha, Commit) -> OrigBranch -> Annex (Either String Sha) +reverseAdjustedCommit commitparent adj (csha, basiscommit) origbranch + | length (commitParent basiscommit) > 1 = return $ + Left $ "unable to propagate merge commit " ++ show csha ++ " back to " ++ show origbranch + | otherwise = do + cmode <- annexCommitMode <$> Annex.getGitConfig + treesha <- reverseAdjustedTree commitparent adj csha + revadjcommit <- inRepo $ commitWithMetaData + (commitAuthorMetaData basiscommit) + (commitCommitterMetaData basiscommit) $ + Git.Branch.commitTree cmode + [commitMessage basiscommit] + [commitparent] treesha + return (Right revadjcommit) + +{- Adjusts the tree of the basis, changing only the files that the + - commit changed, and reverse adjusting those changes. + - + - commitDiff does not support merge commits, so the csha must not be a + - merge commit. -} +reverseAdjustedTree :: Sha -> Adjustment -> Sha -> Annex Sha +reverseAdjustedTree basis adj csha = do + (diff, cleanup) <- inRepo (Git.DiffTree.commitDiff csha) + let (adds, others) = partition (\dti -> Git.DiffTree.srcsha dti `elem` nullShas) diff + let (removes, changes) = partition (\dti -> Git.DiffTree.dstsha dti `elem` nullShas) others + adds' <- catMaybes <$> + mapM (adjustTreeItem reverseadj) (map diffTreeToTreeItem adds) + treesha <- Git.Tree.adjustTree + (propchanges changes) + adds' + (\_old new -> new) + (map Git.DiffTree.file removes) + basis + =<< Annex.gitRepo + void $ liftIO cleanup + return treesha + where + reverseadj = reverseAdjustment adj + propchanges changes ti@(TreeItem f _ _) = + case M.lookup (norm f) m of + Nothing -> return (Just ti) -- not changed + Just change -> adjustTreeItem reverseadj change + where + m = M.fromList $ map (\i@(TreeItem f' _ _) -> (norm f', i)) $ + map diffTreeToTreeItem changes + norm = normalise . fromRawFilePath . getTopFilePath + +diffTreeToTreeItem :: Git.DiffTree.DiffTreeItem -> TreeItem +diffTreeToTreeItem dti = TreeItem + (Git.DiffTree.file dti) + (Git.DiffTree.dstmode dti) + (Git.DiffTree.dstsha dti) + +data AdjustedClone = InAdjustedClone | NotInAdjustedClone + +{- Cloning a repository that has an adjusted branch checked out will + - result in the clone having the same adjusted branch checked out -- but + - the origbranch won't exist in the clone, nor will the basis. So + - to properly set up the adjusted branch, the origbranch and basis need + - to be set. + - + - We can't trust that the origin's origbranch matches up with the currently + - checked out adjusted branch; the origin could have the two branches + - out of sync (eg, due to another branch having been pushed to the origin's + - origbranch), or due to a commit on its adjusted branch not having been + - propagated back to origbranch. + - + - So, find the adjusting commit on the currently checked out adjusted + - branch, and use the parent of that commit as the basis, and set the + - origbranch to it. + -} +checkAdjustedClone :: Annex AdjustedClone +checkAdjustedClone = ifM isBareRepo + ( return NotInAdjustedClone + , go =<< inRepo Git.Branch.current + ) + where + go Nothing = return NotInAdjustedClone + go (Just currbranch) = case adjustedToOriginal currbranch of + Nothing -> return NotInAdjustedClone + Just (adj, origbranch) -> do + let basis@(BasisBranch bb) = basisBranch (originalToAdjusted origbranch adj) + unlessM (inRepo $ Git.Ref.exists bb) $ do + aps <- fmap commitParent <$> findAdjustingCommit (AdjBranch currbranch) + case aps of + Just [p] -> do + unlessM (inRepo $ Git.Ref.exists origbranch) $ + inRepo $ Git.Branch.update' origbranch p + setBasisBranch basis p + _ -> giveup $ "Unable to clean up from clone of adjusted branch; perhaps you should check out " ++ Git.Ref.describe origbranch + return InAdjustedClone + +checkVersionSupported :: Annex () +checkVersionSupported = + unlessM (liftIO isGitVersionSupported) $ + giveup "Your version of git is too old; upgrade it to 2.2.0 or newer to use adjusted branches." + +-- git 2.2.0 needed for GIT_COMMON_DIR which is needed +-- by updateAdjustedBranch to use withWorkTreeRelated. +isGitVersionSupported :: IO Bool +isGitVersionSupported = not <$> Git.Version.older "2.2.0" diff --git a/Annex/AdjustedBranch/Merge.hs b/Annex/AdjustedBranch/Merge.hs new file mode 100644 index 0000000000..904f4ee412 --- /dev/null +++ b/Annex/AdjustedBranch/Merge.hs @@ -0,0 +1,167 @@ +{- adjusted branch merging + - + - Copyright 2016-2023 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns, OverloadedStrings #-} + +module Annex.AdjustedBranch.Merge ( + canMergeToAdjustedBranch, + mergeToAdjustedBranch, +) where + +import Annex.Common +import Annex.AdjustedBranch +import qualified Annex +import Git +import Git.Types +import qualified Git.Branch +import qualified Git.Ref +import qualified Git.Command +import qualified Git.Merge +import Git.Sha +import Annex.CatFile +import Annex.AutoMerge +import Annex.Tmp +import Annex.GitOverlay +import Utility.Tmp.Dir +import Utility.CopyFile +import Utility.Directory.Create + +import qualified Data.ByteString as S +import qualified System.FilePath.ByteString as P + +canMergeToAdjustedBranch :: Branch -> (OrigBranch, Adjustment) -> Annex Bool +canMergeToAdjustedBranch tomerge (origbranch, adj) = + inRepo $ Git.Branch.changed currbranch tomerge + where + AdjBranch currbranch = originalToAdjusted origbranch adj + +{- Update the currently checked out adjusted branch, merging the provided + - branch into it. Note that the provided branch should be a non-adjusted + - branch. -} +mergeToAdjustedBranch :: Branch -> (OrigBranch, Adjustment) -> [Git.Merge.MergeConfig] -> Bool -> Git.Branch.CommitMode -> Annex Bool +mergeToAdjustedBranch tomerge (origbranch, adj) mergeconfig canresolvemerge commitmode = catchBoolIO $ + join $ preventCommits go + where + adjbranch@(AdjBranch currbranch) = originalToAdjusted origbranch adj + basis = basisBranch adjbranch + + go commitsprevented = do + (updatedorig, _) <- propigateAdjustedCommits' + False origbranch adj commitsprevented + changestomerge updatedorig + + {- Since the adjusted branch changes files, merging tomerge + - directly into it would likely result in unnecessary merge + - conflicts. To avoid those conflicts, instead merge tomerge into + - updatedorig. The result of the merge can the be + - adjusted to yield the final adjusted branch. + - + - In order to do a merge into a ref that is not checked out, + - set the work tree to a temp directory, and set GIT_DIR + - to another temp directory, in which HEAD contains the + - updatedorig sha. GIT_COMMON_DIR is set to point to the real + - git directory, and so git can read and write objects from there, + - but will use GIT_DIR for HEAD and index. + - + - (Doing the merge this way also lets it run even though the main + - index file is currently locked.) + -} + changestomerge (Just updatedorig) = withOtherTmp $ \othertmpdir -> do + git_dir <- fromRepo Git.localGitDir + let git_dir' = fromRawFilePath git_dir + tmpwt <- fromRepo gitAnnexMergeDir + withTmpDirIn (fromRawFilePath othertmpdir) "git" $ \tmpgit -> withWorkTreeRelated tmpgit $ + withemptydir git_dir tmpwt $ withWorkTree tmpwt $ do + liftIO $ writeFile (tmpgit "HEAD") (fromRef updatedorig) + -- Copy in refs and packed-refs, to work + -- around bug in git 2.13.0, which + -- causes it not to look in GIT_DIR for refs. + refs <- liftIO $ emptyWhenDoesNotExist $ + dirContentsRecursive $ + git_dir' "refs" + let refs' = (git_dir' "packed-refs") : refs + liftIO $ forM_ refs' $ \src -> do + let src' = toRawFilePath src + whenM (doesFileExist src) $ do + dest <- relPathDirToFile git_dir src' + let dest' = toRawFilePath tmpgit P. dest + createDirectoryUnder [git_dir] + (P.takeDirectory dest') + void $ createLinkOrCopy src' dest' + -- This reset makes git merge not care + -- that the work tree is empty; otherwise + -- it will think that all the files have + -- been staged for deletion, and sometimes + -- the merge includes these deletions + -- (for an unknown reason). + -- http://thread.gmane.org/gmane.comp.version-control.git/297237 + inRepo $ Git.Command.run [Param "reset", Param "HEAD", Param "--quiet"] + when (tomerge /= origbranch) $ + showAction $ UnquotedString $ "Merging into " ++ fromRef (Git.Ref.base origbranch) + merged <- autoMergeFrom' tomerge Nothing mergeconfig commitmode True + (const $ resolveMerge (Just updatedorig) tomerge True) + if merged + then do + !mergecommit <- liftIO $ extractSha + <$> S.readFile (tmpgit "HEAD") + -- This is run after the commit lock is dropped. + return $ postmerge mergecommit + else return $ return False + changestomerge Nothing = return $ return False + + withemptydir git_dir d a = bracketIO setup cleanup (const a) + where + setup = do + whenM (doesDirectoryExist d) $ + removeDirectoryRecursive d + createDirectoryUnder [git_dir] (toRawFilePath d) + cleanup _ = removeDirectoryRecursive d + + {- A merge commit has been made between the basisbranch and + - tomerge. Update the basisbranch and origbranch to point + - to that commit, adjust it to get the new adjusted branch, + - and check it out. + - + - But, there may be unstaged work tree changes that conflict, + - so the check out is done by making a normal merge of + - the new adjusted branch. + -} + postmerge (Just mergecommit) = do + setBasisBranch basis mergecommit + inRepo $ Git.Branch.update' origbranch mergecommit + adjtree <- adjustTree adj (BasisBranch mergecommit) + adjmergecommit <- commitAdjustedTree adjtree (BasisBranch mergecommit) + -- Make currbranch be the parent, so that merging + -- this commit will be a fast-forward. + adjmergecommitff <- commitAdjustedTree' adjtree (BasisBranch mergecommit) [currbranch] + showAction "Merging into adjusted branch" + ifM (autoMergeFrom adjmergecommitff (Just currbranch) mergeconfig commitmode canresolvemerge) + ( reparent adjtree adjmergecommit =<< getcurrentcommit + , return False + ) + postmerge Nothing = return False + + -- Now that the merge into the adjusted branch is complete, + -- take the tree from that merge, and attach it on top of the + -- adjmergecommit, if it's different. + reparent adjtree adjmergecommit (Just currentcommit) = do + if (commitTree currentcommit /= adjtree) + then do + cmode <- annexCommitMode <$> Annex.getGitConfig + c <- inRepo $ Git.Branch.commitTree cmode + ["Merged " ++ fromRef tomerge] + [adjmergecommit] + (commitTree currentcommit) + inRepo $ Git.Branch.update "updating adjusted branch" currbranch c + propigateAdjustedCommits origbranch adj + else inRepo $ Git.Branch.update "updating adjusted branch" currbranch adjmergecommit + return True + reparent _ _ Nothing = return False + + getcurrentcommit = inRepo Git.Branch.currentUnsafe >>= \case + Nothing -> return Nothing + Just c -> catCommit c diff --git a/Annex/AdjustedBranch/Name.hs b/Annex/AdjustedBranch/Name.hs new file mode 100644 index 0000000000..7a1b44d54e --- /dev/null +++ b/Annex/AdjustedBranch/Name.hs @@ -0,0 +1,99 @@ +{- adjusted branch names + - + - Copyright 2016-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.AdjustedBranch.Name ( + originalToAdjusted, + adjustedToOriginal, + AdjBranch(..), + OrigBranch, +) where + +import Types.AdjustedBranch +import Git +import qualified Git.Ref +import Utility.Misc + +import Control.Applicative +import Data.Char +import qualified Data.ByteString as S + +adjustedBranchPrefix :: S.ByteString +adjustedBranchPrefix = "refs/heads/adjusted/" + +class SerializeAdjustment t where + serializeAdjustment :: t -> S.ByteString + deserializeAdjustment :: S.ByteString -> Maybe t + +instance SerializeAdjustment Adjustment where + serializeAdjustment (LinkAdjustment l) = + serializeAdjustment l + serializeAdjustment (PresenceAdjustment p Nothing) = + serializeAdjustment p + serializeAdjustment (PresenceAdjustment p (Just l)) = + serializeAdjustment p <> "-" <> serializeAdjustment l + serializeAdjustment (LinkPresentAdjustment l) = + serializeAdjustment l + deserializeAdjustment s = + (LinkAdjustment <$> deserializeAdjustment s) + <|> + (PresenceAdjustment <$> deserializeAdjustment s1 <*> pure (deserializeAdjustment s2)) + <|> + (PresenceAdjustment <$> deserializeAdjustment s <*> pure Nothing) + <|> + (LinkPresentAdjustment <$> deserializeAdjustment s) + where + (s1, s2) = separate' (== (fromIntegral (ord '-'))) s + +instance SerializeAdjustment LinkAdjustment where + serializeAdjustment UnlockAdjustment = "unlocked" + serializeAdjustment LockAdjustment = "locked" + serializeAdjustment FixAdjustment = "fixed" + serializeAdjustment UnFixAdjustment = "unfixed" + deserializeAdjustment "unlocked" = Just UnlockAdjustment + deserializeAdjustment "locked" = Just LockAdjustment + deserializeAdjustment "fixed" = Just FixAdjustment + deserializeAdjustment "unfixed" = Just UnFixAdjustment + deserializeAdjustment _ = Nothing + +instance SerializeAdjustment PresenceAdjustment where + serializeAdjustment HideMissingAdjustment = "hidemissing" + serializeAdjustment ShowMissingAdjustment = "showmissing" + deserializeAdjustment "hidemissing" = Just HideMissingAdjustment + deserializeAdjustment "showmissing" = Just ShowMissingAdjustment + deserializeAdjustment _ = Nothing + +instance SerializeAdjustment LinkPresentAdjustment where + serializeAdjustment UnlockPresentAdjustment = "unlockpresent" + serializeAdjustment LockPresentAdjustment = "lockpresent" + deserializeAdjustment "unlockpresent" = Just UnlockPresentAdjustment + deserializeAdjustment "lockpresent" = Just LockPresentAdjustment + deserializeAdjustment _ = Nothing + +newtype AdjBranch = AdjBranch { adjBranch :: Branch } + +originalToAdjusted :: OrigBranch -> Adjustment -> AdjBranch +originalToAdjusted orig adj = AdjBranch $ Ref $ + adjustedBranchPrefix <> base <> "(" <> serializeAdjustment adj <> ")" + where + base = fromRef' (Git.Ref.base orig) + +type OrigBranch = Branch + +adjustedToOriginal :: Branch -> Maybe (Adjustment, OrigBranch) +adjustedToOriginal b + | adjustedBranchPrefix `S.isPrefixOf` bs = do + let (base, as) = separateEnd' (== openparen) (S.drop prefixlen bs) + adj <- deserializeAdjustment (S.takeWhile (/= closeparen) as) + Just (adj, Git.Ref.branchRef (Ref base)) + | otherwise = Nothing + where + bs = fromRef' b + prefixlen = S.length adjustedBranchPrefix + openparen = fromIntegral (ord '(') + closeparen = fromIntegral (ord ')') diff --git a/Annex/AutoMerge.hs b/Annex/AutoMerge.hs new file mode 100644 index 0000000000..bb43d0593b --- /dev/null +++ b/Annex/AutoMerge.hs @@ -0,0 +1,391 @@ +{- git-annex automatic merge conflict resolution + - + - Copyright 2012-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.AutoMerge + ( autoMergeFrom + , autoMergeFrom' + , resolveMerge + , commitResolvedMerge + ) where + +import Annex.Common +import qualified Annex +import qualified Annex.Queue +import Annex.CatFile +import Annex.Link +import Annex.Content +import qualified Git.LsFiles as LsFiles +import qualified Git.UpdateIndex as UpdateIndex +import qualified Git.Merge +import qualified Git.Ref +import qualified Git +import qualified Git.Branch +import Git.Types (TreeItemType(..), fromTreeItemType) +import Git.FilePath +import Annex.ReplaceFile +import Annex.VariantFile +import qualified Database.Keys +import Annex.InodeSentinal +import Utility.InodeCache +import Utility.FileMode +import qualified Utility.RawFilePath as R + +import qualified Data.Set as S +import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L +import System.PosixCompat.Files (isSymbolicLink) + +{- Merges from a branch into the current branch (which may not exist yet), + - with automatic merge conflict resolution. + - + - Callers should use Git.Branch.changed first, to make sure that + - there are changes from the current branch to the branch being merged in. + -} +autoMergeFrom :: Git.Ref -> Maybe Git.Ref -> [Git.Merge.MergeConfig] -> Git.Branch.CommitMode -> Bool -> Annex Bool +autoMergeFrom branch currbranch mergeconfig commitmode canresolvemerge = + autoMergeFrom' branch currbranch mergeconfig commitmode canresolvemerge resolvemerge + where + resolvemerge old + | canresolvemerge = resolveMerge old branch False + | otherwise = return False + +autoMergeFrom' :: Git.Ref -> Maybe Git.Ref -> [Git.Merge.MergeConfig] -> Git.Branch.CommitMode -> Bool -> (Maybe Git.Ref -> Annex Bool) -> Annex Bool +autoMergeFrom' branch currbranch mergeconfig commitmode willresolvemerge toresolvemerge = do + showOutput + case currbranch of + Nothing -> go Nothing + Just b -> go =<< inRepo (Git.Ref.sha b) + where + go old = do + -- merge.directoryRenames=conflict plus automatic + -- merge conflict resolution results in files in a + -- "renamed" directory getting variant names, + -- so is not a great combination. If the user has + -- explicitly set it, use it, but otherwise when + -- merge conflicts will be resolved, override + -- to merge.directoryRenames=false. + overridedirectoryrenames <- if willresolvemerge + then isNothing . mergeDirectoryRenames + <$> Annex.getGitConfig + else pure False + let f r + | overridedirectoryrenames = r + { Git.gitGlobalOpts = + Param "-c" + : Param "merge.directoryRenames=false" + : Git.gitGlobalOpts r + } + | otherwise = r + r <- inRepo (Git.Merge.merge branch mergeconfig commitmode . f) + <||> (toresolvemerge old <&&> commitResolvedMerge commitmode) + -- Merging can cause new associated files to appear + -- and the smudge filter will add them to the database. + -- To ensure that this process sees those changes, + -- close the database if it was open. + Database.Keys.closeDb + return r + +{- Resolves a conflicted merge. It's important that any conflicts be + - resolved in a way that itself avoids later merge conflicts, since + - multiple repositories may be doing this concurrently. + - + - Only merge conflicts where at least one side is an annexed file + - is resolved. + - + - This uses the Keys pointed to by the files to construct new + - filenames. So when both sides modified annexed file foo, + - it will be deleted, and replaced with files foo.variant-A and + - foo.variant-B. + - + - On the other hand, when one side deleted foo, and the other modified it, + - it will be deleted, and the modified version stored as file + - foo.variant-A (or B). + - + - It's also possible that one side has foo as an annexed file, and + - the other as a directory or non-annexed file. The annexed file + - is renamed to resolve the merge, and the other object is preserved as-is. + - + - The merge is resolved in the work tree and files + - staged, to clean up from a conflicted merge that was run in the work + - tree. + - + - This is complicated by needing to support merges run in an overlay + - work tree, in which case the CWD won't be within the work tree. + - In this mode, there is no need to update the work tree at all, + - as the overlay work tree will get deleted. + - + - Unlocked files remain unlocked after merging, and locked files + - remain locked. When the merge conflict is between a locked and unlocked + - file, that otherwise point to the same content, the unlocked mode wins. + - This is done because only unlocked files work in filesystems that don't + - support symlinks. + - + - Returns false when there are no merge conflicts to resolve. + - A git merge can fail for other reasons, and this allows detecting + - such failures. + -} +resolveMerge :: Maybe Git.Ref -> Git.Ref -> Bool -> Annex Bool +resolveMerge us them inoverlay = do + top <- if inoverlay + then pure "." + else fromRepo Git.repoPath + (fs, cleanup) <- inRepo (LsFiles.unmerged [top]) + srcmap <- if inoverlay + then pure M.empty + else inodeMap $ pure (concatMap getunmergedfiles fs, return True) + (mergedks, mergedfs) <- unzip <$> mapM (resolveMerge' srcmap us them inoverlay) fs + let mergedks' = concat mergedks + let mergedfs' = catMaybes mergedfs + let merged = not (null mergedfs') + void $ liftIO cleanup + + unless inoverlay $ do + (deleted, cleanup2) <- inRepo (LsFiles.deleted [] [top]) + unless (null deleted) $ + Annex.Queue.addCommand [] "rm" + [Param "--quiet", Param "-f", Param "--"] + (map fromRawFilePath deleted) + void $ liftIO cleanup2 + + when merged $ do + Annex.Queue.flush + unless inoverlay $ do + unstagedmap <- inodeMap $ inRepo $ + LsFiles.notInRepo [] False [top] + cleanConflictCruft mergedks' mergedfs' unstagedmap + showLongNote "Merge conflict was automatically resolved; you may want to examine the result." + return merged + where + getunmergedfiles u = catMaybes + [ Just (LsFiles.unmergedFile u) + , LsFiles.unmergedSiblingFile u + ] + +resolveMerge' :: InodeMap -> Maybe Git.Ref -> Git.Ref -> Bool -> LsFiles.Unmerged -> Annex ([Key], Maybe FilePath) +resolveMerge' _ Nothing _ _ _ = return ([], Nothing) +resolveMerge' unstagedmap (Just us) them inoverlay u = do + kus <- getkey LsFiles.valUs + kthem <- getkey LsFiles.valThem + case (kus, kthem) of + -- Both sides of conflict are annexed files + (Just keyUs, Just keyThem) + | keyUs /= keyThem -> resolveby [keyUs, keyThem] $ do + makevariantannexlink keyUs LsFiles.valUs + makevariantannexlink keyThem LsFiles.valThem + -- cleanConflictCruft can't handle unlocked + -- files, so delete here. + unless inoverlay $ + unless (islocked LsFiles.valUs) $ + liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath file) + | otherwise -> resolveby [keyUs, keyThem] $ + -- Only resolve using symlink when both + -- were locked, otherwise use unlocked + -- pointer. + -- In either case, keep original filename. + if islocked LsFiles.valUs && islocked LsFiles.valThem + then makesymlink keyUs file + else makepointer keyUs file (combinedmodes) + -- Our side is annexed file, other side is not. + -- Make the annexed file into a variant file and graft in the + -- other file/directory as it was. + (Just keyUs, Nothing) -> resolveby [keyUs] $ do + graftin them file LsFiles.valThem LsFiles.valThem LsFiles.valUs + makevariantannexlink keyUs LsFiles.valUs + -- Our side is not annexed file, other side is. + (Nothing, Just keyThem) -> resolveby [keyThem] $ do + graftin us file LsFiles.valUs LsFiles.valUs LsFiles.valThem + makevariantannexlink keyThem LsFiles.valThem + -- Neither side is annexed file; cannot resolve. + (Nothing, Nothing) -> return ([], Nothing) + where + file = fromRawFilePath $ LsFiles.unmergedFile u + sibfile = fromRawFilePath <$> LsFiles.unmergedSiblingFile u + + getkey select = + case select (LsFiles.unmergedSha u) of + Just sha -> catKey sha + Nothing -> pure Nothing + + islocked select = select (LsFiles.unmergedTreeItemType u) == Just TreeSymlink + + combinedmodes = case catMaybes [ourmode, theirmode] of + [] -> Nothing + l -> Just (combineModes l) + where + ourmode = fromTreeItemType + <$> LsFiles.valUs (LsFiles.unmergedTreeItemType u) + theirmode = fromTreeItemType + <$> LsFiles.valThem (LsFiles.unmergedTreeItemType u) + + makevariantannexlink key select + | islocked select = makesymlink key dest + | otherwise = makepointer key dest destmode + where + dest = variantFile file key + destmode = fromTreeItemType <$> select (LsFiles.unmergedTreeItemType u) + + stagefile :: FilePath -> Annex FilePath + stagefile f + | inoverlay = ( f) . fromRawFilePath <$> fromRepo Git.repoPath + | otherwise = pure f + + makesymlink key dest = do + l <- calcRepo $ gitAnnexLink (toRawFilePath dest) key + unless inoverlay $ replacewithsymlink dest l + dest' <- toRawFilePath <$> stagefile dest + stageSymlink dest' =<< hashSymlink l + + replacewithsymlink dest link = replaceWorkTreeFile dest $ + makeGitLink link + + makepointer key dest destmode = do + unless inoverlay $ + unlessM (reuseOldFile unstagedmap key file dest) $ + linkFromAnnex key (toRawFilePath dest) destmode >>= \case + LinkAnnexFailed -> liftIO $ + writePointerFile (toRawFilePath dest) key destmode + _ -> noop + dest' <- toRawFilePath <$> stagefile dest + stagePointerFile dest' destmode =<< hashPointerFile key + unless inoverlay $ + Database.Keys.addAssociatedFile key + =<< inRepo (toTopFilePath (toRawFilePath dest)) + + {- Stage a graft of a directory or file from a branch + - and update the work tree. -} + graftin b item selectwant selectwant' selectunwant = do + Annex.Queue.addUpdateIndex + =<< fromRepo (UpdateIndex.lsSubTree b item) + + let replacefile isexecutable = case selectwant' (LsFiles.unmergedSha u) of + Nothing -> noop + Just sha -> replaceWorkTreeFile item $ \tmp -> do + c <- catObject sha + liftIO $ L.writeFile (decodeBS tmp) c + when isexecutable $ + liftIO $ void $ tryIO $ + modifyFileMode tmp $ + addModes executeModes + + -- Update the work tree to reflect the graft. + unless inoverlay $ case (selectwant (LsFiles.unmergedTreeItemType u), selectunwant (LsFiles.unmergedTreeItemType u)) of + (Just TreeSymlink, _) -> do + case selectwant' (LsFiles.unmergedSha u) of + Nothing -> noop + Just sha -> do + link <- catSymLinkTarget sha + replacewithsymlink item link + (Just TreeFile, Just TreeSymlink) -> replacefile False + (Just TreeExecutable, Just TreeSymlink) -> replacefile True + _ -> ifM (liftIO $ doesDirectoryExist item) + -- a conflict between a file and a directory + -- leaves the directory, so since a directory + -- is there, it must be what was wanted + ( noop + -- probably a file with conflict markers is + -- in the work tree; replace with grafted + -- file content (this is needed when + -- the annexed file is unlocked) + , replacefile False + ) + + resolveby ks a = do + {- Remove conflicted file from index so merge can be resolved. + - If there's a sibling conflicted file, remove it too. -} + Annex.Queue.addCommand [] "rm" + [ Param "--quiet" + , Param "-f" + , Param "--cached" + , Param "--" + ] + (catMaybes [Just file, sibfile]) + liftIO $ maybe noop + (removeWhenExistsWith R.removeLink . toRawFilePath) + sibfile + void a + return (ks, Just file) + +{- git-merge moves conflicting files away to files + - named something like f~HEAD or f~branch or just f, but the + - exact name chosen can vary. Once the conflict is resolved, + - this cruft can be deleted. To avoid deleting legitimate + - files that look like this, only delete files that are + - A) not staged in git and + - B) have a name related to the merged files and + - C) are pointers to or have the content of keys that were involved + - in the merge. + -} +cleanConflictCruft :: [Key] -> [FilePath] -> InodeMap -> Annex () +cleanConflictCruft resolvedks resolvedfs unstagedmap = do + is <- S.fromList . map (inodeCacheToKey Strongly) . concat + <$> mapM Database.Keys.getInodeCaches resolvedks + forM_ (M.toList unstagedmap) $ \(i, f) -> + whenM (matchesresolved is i f) $ + liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f) + where + fs = S.fromList resolvedfs + ks = S.fromList resolvedks + inks = maybe False (flip S.member ks) + matchesresolved is i f + | S.member f fs || S.member (conflictCruftBase f) fs = anyM id + [ pure $ either (const False) (`S.member` is) i + , inks <$> isAnnexLink (toRawFilePath f) + , inks <$> liftIO (isPointerFile (toRawFilePath f)) + ] + | otherwise = return False + +conflictCruftBase :: FilePath -> FilePath +conflictCruftBase f = reverse $ drop 1 $ dropWhile (/= '~') $ reverse f + +{- When possible, reuse an existing file from the srcmap as the + - content of a worktree file in the resolved merge. It must have the + - same name as the origfile, or a name that git would use for conflict + - cruft. And, its inode cache must be a known one for the key. -} +reuseOldFile :: InodeMap -> Key -> FilePath -> FilePath -> Annex Bool +reuseOldFile srcmap key origfile destfile = do + is <- map (inodeCacheToKey Strongly) + <$> Database.Keys.getInodeCaches key + liftIO $ go $ mapMaybe (\i -> M.lookup (Right i) srcmap) is + where + go [] = return False + go (f:fs) + | f == origfile || conflictCruftBase f == origfile = + ifM (doesFileExist f) + ( do + renameFile f destfile + return True + , go fs + ) + | otherwise = go fs + +commitResolvedMerge :: Git.Branch.CommitMode -> Annex Bool +commitResolvedMerge commitmode = do + commitquiet <- Git.Branch.CommitQuiet <$> commandProgressDisabled + inRepo $ Git.Branch.commitCommand commitmode commitquiet + [ Param "--no-verify" + , Param "-m" + , Param "git-annex automatic merge conflict fix" + ] + +type InodeMap = M.Map (Either FilePath InodeCacheKey) FilePath + +inodeMap :: Annex ([RawFilePath], IO Bool) -> Annex InodeMap +inodeMap getfiles = do + (fs, cleanup) <- getfiles + fsis <- forM fs $ \f -> do + s <- liftIO $ R.getSymbolicLinkStatus f + let f' = fromRawFilePath f + if isSymbolicLink s + then pure $ Just (Left f', f') + else withTSDelta (\d -> liftIO $ toInodeCache d f s) + >>= return . \case + Just i -> Just (Right (inodeCacheToKey Strongly i), f') + Nothing -> Nothing + void $ liftIO cleanup + return $ M.fromList $ catMaybes fsis diff --git a/Annex/BloomFilter.hs b/Annex/BloomFilter.hs new file mode 100644 index 0000000000..571f1c6c17 --- /dev/null +++ b/Annex/BloomFilter.hs @@ -0,0 +1,54 @@ +{- git-annex bloom filter + - + - Copyright 2010-2015 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.BloomFilter where + +import Annex.Common +import qualified Annex +import Utility.Bloom + +import Control.Monad.ST + +{- A bloom filter capable of holding half a million keys with a + - false positive rate of 1 in 10000000 uses around 16 mb of memory, + - so will easily fit on even my lowest memory systems. + -} +bloomCapacity :: Annex Int +bloomCapacity = fromMaybe 500000 . annexBloomCapacity <$> Annex.getGitConfig +bloomAccuracy :: Annex Int +bloomAccuracy = fromMaybe 10000000 . annexBloomAccuracy <$> Annex.getGitConfig +bloomBitsHashes :: Annex (Int, Int) +bloomBitsHashes = do + capacity <- bloomCapacity + accuracy <- bloomAccuracy + case safeSuggestSizing capacity (1 / fromIntegral accuracy) of + Left e -> do + warning $ UnquotedString $ + "bloomfilter " ++ e ++ "; falling back to sane value" + -- precaulculated value for 500000 (1/10000000) + return (16777216,23) + Right v -> return v + +{- Creates a bloom filter, and runs an action to populate it. + - + - The action is passed a callback that it can use to feed values into the + - bloom filter. + - + - Once the action completes, the mutable filter is frozen + - for later use. + -} +genBloomFilter :: Hashable v => ((v -> Annex ()) -> Annex ()) -> Annex (Bloom v) +genBloomFilter populate = do + (numbits, numhashes) <- bloomBitsHashes + bloom <- lift $ newMB (cheapHashes numhashes) numbits + populate $ \v -> lift $ insertMB bloom v + lift $ unsafeFreezeMB bloom + where + lift = liftIO . stToIO + +bloomFilter :: [v] -> Bloom v -> [v] +bloomFilter l bloom = filter (\v -> v `notElemB` bloom) l diff --git a/Annex/Branch.hs b/Annex/Branch.hs new file mode 100644 index 0000000000..49225592b2 --- /dev/null +++ b/Annex/Branch.hs @@ -0,0 +1,1084 @@ +{- management of the git-annex branch + - + - Copyright 2011-2023 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Branch ( + fullname, + name, + hasOrigin, + hasSibling, + siblingBranches, + create, + getBranch, + UpdateMade(..), + update, + forceUpdate, + updateTo, + get, + getHistorical, + getUnmergedRefs, + RegardingUUID(..), + change, + ChangeOrAppend(..), + changeOrAppend, + maybeChange, + commitMessage, + createMessage, + commit, + forceCommit, + files, + rememberTreeish, + performTransitions, + withIndex, + precache, + overBranchFileContents, + updatedFromTree, +) where + +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as L +import qualified Data.Set as S +import qualified Data.Map as M +import Data.Function +import Data.Char +import Data.ByteString.Builder +import Control.Concurrent (threadDelay) +import Control.Concurrent.MVar +import qualified System.FilePath.ByteString as P +import System.PosixCompat.Files (isRegularFile) + +import Annex.Common +import Types.BranchState +import Annex.BranchState +import Annex.Journal +import Annex.GitOverlay +import Annex.Tmp +import qualified Git +import qualified Git.Command +import qualified Git.Ref +import qualified Git.RefLog +import qualified Git.Sha +import qualified Git.Branch +import qualified Git.UnionMerge +import qualified Git.UpdateIndex +import qualified Git.Tree +import qualified Git.LsTree +import Git.LsTree (lsTreeParams) +import qualified Git.HashObject +import Annex.HashObject +import Git.Types (Ref(..), fromRef, fromRef', RefDate, TreeItemType(..)) +import Git.FilePath +import Annex.CatFile +import Git.CatFile (catObjectStreamLsTree) +import Annex.Perms +import Logs +import Logs.Transitions +import Logs.File +import Logs.Trust.Pure +import Logs.Remote.Pure +import Logs.Export.Pure +import Logs.Difference.Pure +import qualified Annex.Queue +import Types.Transitions +import Annex.Branch.Transitions +import qualified Annex +import Annex.Hook +import Utility.Directory.Stream +import Utility.Tmp +import qualified Utility.RawFilePath as R + +{- Name of the branch that is used to store git-annex's information. -} +name :: Git.Ref +name = Git.Ref "git-annex" + +{- Fully qualified name of the branch. -} +fullname :: Git.Ref +fullname = Git.Ref $ "refs/heads/" <> fromRef' name + +{- Branch's name in origin. -} +originname :: Git.Ref +originname = Git.Ref $ "refs/remotes/origin/" <> fromRef' name + +{- Does origin/git-annex exist? -} +hasOrigin :: Annex Bool +hasOrigin = inRepo $ Git.Ref.exists originname + +{- Does the git-annex branch or a sibling foo/git-annex branch exist? -} +hasSibling :: Annex Bool +hasSibling = not . null <$> siblingBranches + +{- List of git-annex (shas, branches), including the main one and any + - from remotes. Duplicates are filtered out. -} +siblingBranches :: Annex [(Git.Sha, Git.Branch)] +siblingBranches = inRepo $ Git.Ref.matchingUniq [name] + +{- Creates the branch, if it does not already exist. -} +create :: Annex () +create = void getBranch + +{- Returns the sha of the branch, creating it first if necessary. -} +getBranch :: Annex Git.Ref +getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha + where + go True = do + inRepo $ Git.Command.run + [ Param "branch" + , Param "--no-track" + , Param $ fromRef name + , Param $ fromRef originname + ] + fromMaybe (giveup $ "failed to create " ++ fromRef name) + <$> branchsha + go False = withIndex' True $ do + -- Create the index file. This is not necessary, + -- except to avoid a bug in git 2.37 that causes + -- git write-tree to segfault when the index file does not + -- exist. + inRepo $ flip Git.UpdateIndex.streamUpdateIndex [] + cmode <- annexCommitMode <$> Annex.getGitConfig + cmessage <- createMessage + inRepo $ Git.Branch.commitAlways cmode cmessage fullname [] + use sha = do + setIndexSha sha + return sha + branchsha = inRepo $ Git.Ref.sha fullname + +{- Ensures that the branch and index are up-to-date; should be + - called before data is read from it. Runs only once per git-annex run. -} +update :: Annex BranchState +update = runUpdateOnce $ updateTo =<< siblingBranches + +{- Forces an update even if one has already been run. -} +forceUpdate :: Annex UpdateMade +forceUpdate = updateTo =<< siblingBranches + +{- Merges the specified Refs into the index, if they have any changes not + - already in it. The Branch names are only used in the commit message; + - it's even possible that the provided Branches have not been updated to + - point to the Refs yet. + - + - The branch is fast-forwarded if possible, otherwise a merge commit is + - made. + - + - Before Refs are merged into the index, it's important to first stage the + - journal into the index. Otherwise, any changes in the journal would + - later get staged, and might overwrite changes made during the merge. + - This is only done if some of the Refs do need to be merged. + - + - Also handles performing any Transitions that have not yet been + - performed, in either the local branch, or the Refs. + -} +updateTo :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade +updateTo pairs = ifM (annexMergeAnnexBranches <$> Annex.getGitConfig) + ( updateTo' pairs + , return (UpdateMade False False) + ) + +updateTo' :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade +updateTo' pairs = do + -- ensure branch exists, and get its current ref + branchref <- getBranch + ignoredrefs <- getIgnoredRefs + let unignoredrefs = excludeset ignoredrefs pairs + (tomerge, notnewer) <- if null unignoredrefs + then return ([], []) + else do + mergedrefs <- getMergedRefs + partitionM isnewer $ + excludeset mergedrefs unignoredrefs + {- In a read-only repository, catching permission denied lets + - query operations still work, although they will need to do + - additional work since the refs are not merged. -} + catchPermissionDenied + (const (updatefailedperms tomerge)) + (go branchref tomerge notnewer) + where + excludeset s = filter (\(r, _) -> S.notMember r s) + + isnewer (r, _) = inRepo $ Git.Branch.changed fullname r + + go branchref tomerge notnewer = do + dirty <- journalDirty gitAnnexJournalDir + journalcleaned <- if null tomerge + {- Even when no refs need to be merged, the index + - may still be updated if the branch has gotten ahead + - of the index, or just if the journal is dirty. -} + then ifM (needUpdateIndex branchref) + ( lockJournal $ \jl -> do + forceUpdateIndex jl branchref + {- When there are journalled changes + - as well as the branch being updated, + - a commit needs to be done. -} + when dirty $ + go' branchref dirty [] jl + return True + , if dirty + then ifM (annexAlwaysCommit <$> Annex.getGitConfig) + ( lockJournal $ \jl -> do + go' branchref dirty [] jl + return True + , return False + ) + else return True + ) + else lockJournal $ \jl -> do + go' branchref dirty tomerge jl + return True + journalclean <- if journalcleaned + then not <$> privateUUIDsKnown + else pure False + addMergedRefs notnewer + return $ UpdateMade + { refsWereMerged = not (null tomerge) + , journalClean = journalclean + } + + go' branchref dirty tomerge jl = stagejournalwhen dirty jl $ do + let (refs, branches) = unzip tomerge + merge_desc <- if null tomerge + then commitMessage + else return $ "merging " ++ + unwords (map Git.Ref.describe branches) ++ + " into " ++ fromRef name + localtransitions <- getLocalTransitions + unless (null tomerge) $ do + showSideAction (UnquotedString merge_desc) + mapM_ checkBranchDifferences refs + mergeIndex jl refs + let commitrefs = nub $ fullname:refs + ifM (handleTransitions jl localtransitions commitrefs) + ( runAnnexHook postUpdateAnnexHook + , do + ff <- if dirty + then return False + else inRepo $ Git.Branch.fastForward fullname refs + if ff + then updateIndex jl branchref + else commitIndex jl branchref merge_desc commitrefs + ) + addMergedRefs tomerge + invalidateCacheAll + + stagejournalwhen dirty jl a + | dirty = stageJournal jl a + | otherwise = withIndex a + + -- Preparing for read-only branch access with unmerged remote refs. + updatefailedperms tomerge = do + let refs = map fst tomerge + -- Gather any transitions that are new to either the + -- local branch or a remote ref, which will need to be + -- applied on the fly. + localts <- getLocalTransitions + remotets <- mapM getRefTransitions refs + ts <- if all (localts ==) remotets + then return [] + else + let tcs = mapMaybe getTransitionCalculator $ + knownTransitionList $ + combineTransitions (localts:remotets) + in if null tcs + then return [] + else do + config <- Annex.getGitConfig + trustmap <- calcTrustMap <$> getStaged trustLog + remoteconfigmap <- calcRemoteConfigMap <$> getStaged remoteLog + return $ map (\c -> c trustmap remoteconfigmap config) tcs + return $ UpdateFailedPermissions + { refsUnmerged = refs + , newTransitions = ts + } + +{- Gets the content of a file, which may be in the journal, or in the index + - (and committed to the branch). + - + - Returns an empty string if the file doesn't exist yet. + - + - Updates the branch if necessary, to ensure the most up-to-date available + - content is returned. + - + - When permissions prevented updating the branch, reads the content from the + - journal, plus the branch, plus all unmerged refs. In this case, any + - transitions that have not been applied to all refs will be applied on + - the fly. + -} +get :: RawFilePath -> Annex L.ByteString +get file = do + st <- update + case getCache file st of + Just content -> return content + Nothing -> do + content <- if journalIgnorable st + then getRef fullname file + else if null (unmergedRefs st) + then getLocal file + else unmergedbranchfallback st + setCache file content + return content + where + unmergedbranchfallback st = do + l <- getLocal file + bs <- forM (unmergedRefs st) $ \ref -> getRef ref file + let content = l <> mconcat bs + return $ applytransitions (unhandledTransitions st) content + applytransitions [] content = content + applytransitions (changer:rest) content = case changer file content of + PreserveFile -> applytransitions rest content + ChangeFile builder -> do + let content' = toLazyByteString builder + if L.null content' + -- File is deleted, can't run any other + -- transitions on it. + then content' + else applytransitions rest content' + +{- When the git-annex branch is unable to be updated due to permissions, + - and there are other git-annex branches that have not been merged into + - it, this gets the refs of those branches. -} +getUnmergedRefs :: Annex [Git.Ref] +getUnmergedRefs = unmergedRefs <$> update + +{- Used to cache the value of a file, which has been read from the branch + - using some optimised method. The journal has to be checked, in case + - it has a newer version of the file that has not reached the branch yet. + -} +precache :: RawFilePath -> L.ByteString -> Annex () +precache file branchcontent = do + st <- getState + content <- if journalIgnorable st + then pure branchcontent + else getJournalFileStale (GetPrivate True) file >>= return . \case + NoJournalledContent -> branchcontent + JournalledContent journalcontent -> journalcontent + PossiblyStaleJournalledContent journalcontent -> + branchcontent <> journalcontent + setCache file content + +{- Like get, but does not merge the branch, so the info returned may not + - reflect changes in remotes. + - (Changing the value this returns, and then merging is always the + - same as using get, and then changing its value.) -} +getLocal :: RawFilePath -> Annex L.ByteString +getLocal = getLocal' (GetPrivate True) + +getLocal' :: GetPrivate -> RawFilePath -> Annex L.ByteString +getLocal' getprivate file = do + fastDebug "Annex.Branch" ("read " ++ fromRawFilePath file) + go =<< getJournalFileStale getprivate file + where + go NoJournalledContent = getRef fullname file + go (JournalledContent journalcontent) = return journalcontent + go (PossiblyStaleJournalledContent journalcontent) = do + v <- getRef fullname file + return (v <> journalcontent) + +{- Gets the content of a file as staged in the branch's index. -} +getStaged :: RawFilePath -> Annex L.ByteString +getStaged = getRef indexref + where + -- This makes git cat-file be run with ":file", + -- so it looks at the index. + indexref = Ref "" + +getHistorical :: RefDate -> RawFilePath -> Annex L.ByteString +getHistorical date file = + -- This check avoids some ugly error messages when the reflog + -- is empty. + ifM (null <$> inRepo (Git.RefLog.get' [Param (fromRef fullname), Param "-n1"])) + ( giveup ("No reflog for " ++ fromRef fullname) + , getRef (Git.Ref.dateRef fullname date) file + ) + +getRef :: Ref -> RawFilePath -> Annex L.ByteString +getRef ref file = withIndex $ catFile ref file + +{- Applies a function to modify the content of a file. + - + - Note that this does not cause the branch to be merged, it only + - modifies the current content of the file on the branch. + -} +change :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> content) -> Annex () +change ru file f = lockJournal $ \jl -> f <$> getToChange ru file >>= set jl ru file + +{- Applies a function which can modify the content of a file, or not. -} +maybeChange :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> Maybe content) -> Annex () +maybeChange ru file f = lockJournal $ \jl -> do + v <- getToChange ru file + case f v of + Just jv -> + let b = journalableByteString jv + in when (v /= b) $ set jl ru file b + _ -> noop + +data ChangeOrAppend t = Change t | Append t + +{- Applies a function that can either modify the content of the file, + - or append to the file. Appending can be more efficient when several + - lines are written to a file in succession. + - + - When annex.alwayscompact=false, the function is not passed the content + - of the journal file when the journal file already exists, and whatever + - value it provides is always appended to the journal file. That avoids + - reading the journal file, and so can be faster when many lines are being + - written to it. The information that is recorded will be effectively the + - same, only obsolete log lines will not get compacted. + - + - Currently, only appends when annex.alwayscompact=false. That is to + - avoid appending when an older version of git-annex is also in use in the + - same repository. An interrupted append could leave the journal file in a + - state that would confuse the older version. This is planned to be + - changed in a future repository version. + -} +changeOrAppend :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> ChangeOrAppend content) -> Annex () +changeOrAppend ru file f = lockJournal $ \jl -> + checkCanAppendJournalFile jl ru file >>= \case + Just appendable -> ifM (annexAlwaysCompact <$> Annex.getGitConfig) + ( do + oldc <- getToChange ru file + case f oldc of + Change newc -> set jl ru file newc + Append toappend -> + set jl ru file $ + oldc <> journalableByteString toappend + -- Use this instead in v11 + -- or whatever. + -- append jl file appendable toappend + , case f mempty of + -- Append even though a change was + -- requested; since mempty was passed in, + -- the lines requested to change are + -- minimized. + Change newc -> append jl file appendable newc + Append toappend -> append jl file appendable toappend + ) + Nothing -> do + oldc <- getToChange ru file + case f oldc of + Change newc -> set jl ru file newc + -- Journal file does not exist yet, so + -- cannot append and have to write it all. + Append toappend -> set jl ru file $ + oldc <> journalableByteString toappend + +{- Only get private information when the RegardingUUID is itself private. -} +getToChange :: RegardingUUID -> RawFilePath -> Annex L.ByteString +getToChange ru f = flip getLocal' f . GetPrivate =<< regardingPrivateUUID ru + +{- Records new content of a file into the journal. + - + - This is not exported; all changes have to be made via change. This + - ensures that information that was written to the branch is not + - overwritten. Also, it avoids a get followed by a set without taking into + - account whether private information was gotten from the private + - git-annex index, and should not be written to the public git-annex + - branch. + -} +set :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex () +set jl ru f c = do + journalChanged + setJournalFile jl ru f c + fastDebug "Annex.Branch" ("set " ++ fromRawFilePath f) + -- Could cache the new content, but it would involve + -- evaluating a Journalable Builder twice, which is not very + -- efficient. Instead, assume that it's not common to need to read + -- a log file immediately after writing it. + invalidateCache f + +{- Appends content to the journal file. -} +append :: Journalable content => JournalLocked -> RawFilePath -> AppendableJournalFile -> content -> Annex () +append jl f appendable toappend = do + journalChanged + appendJournalFile jl appendable toappend + fastDebug "Annex.Branch" ("append " ++ fromRawFilePath f) + invalidateCache f + +{- Commit message used when making a commit of whatever data has changed + - to the git-annex branch. -} +commitMessage :: Annex String +commitMessage = fromMaybe "update" <$> getCommitMessage + +{- Commit message used when creating the branch. -} +createMessage :: Annex String +createMessage = fromMaybe "branch created" <$> getCommitMessage + +getCommitMessage :: Annex (Maybe String) +getCommitMessage = do + config <- Annex.getGitConfig + case annexCommitMessageCommand config of + Nothing -> return (annexCommitMessage config) + Just cmd -> catchDefaultIO (annexCommitMessage config) $ + Just <$> liftIO (readProcess "sh" ["-c", cmd]) + +{- Stages the journal, and commits staged changes to the branch. -} +commit :: String -> Annex () +commit = whenM (journalDirty gitAnnexJournalDir) . forceCommit + +{- Commits the current index to the branch even without any journalled + - changes. -} +forceCommit :: String -> Annex () +forceCommit message = lockJournal $ \jl -> + stageJournal jl $ do + ref <- getBranch + commitIndex jl ref message [fullname] + +{- Commits the staged changes in the index to the branch. + - + - Ensures that the branch's index file is first updated to merge the state + - of the branch at branchref, before running the commit action. This + - is needed because the branch may have had changes pushed to it, that + - are not yet reflected in the index. + - + - The branchref value can have been obtained using getBranch at any + - previous point, though getting it a long time ago makes the race + - more likely to occur. + - + - Note that changes may be pushed to the branch at any point in time! + - So, there's a race. If the commit is made using the newly pushed tip of + - the branch as its parent, and that ref has not yet been merged into the + - index, then the result is that the commit will revert the pushed + - changes, since they have not been merged into the index. This race + - is detected and another commit made to fix it. + - + - (It's also possible for the branch to be overwritten, + - losing the commit made here. But that's ok; the data is still in the + - index and will get committed again later.) + -} +commitIndex :: JournalLocked -> Git.Ref -> String -> [Git.Ref] -> Annex () +commitIndex jl branchref message parents = do + showStoringStateAction + commitIndex' jl branchref message message 0 parents +commitIndex' :: JournalLocked -> Git.Ref -> String -> String -> Integer -> [Git.Ref] -> Annex () +commitIndex' jl branchref message basemessage retrynum parents = do + updateIndex jl branchref + cmode <- annexCommitMode <$> Annex.getGitConfig + committedref <- inRepo $ Git.Branch.commitAlways cmode message fullname parents + setIndexSha committedref + parentrefs <- commitparents <$> catObject committedref + when (racedetected branchref parentrefs) $ + fixrace committedref parentrefs + where + -- look for "parent ref" lines and return the refs + commitparents = map (Git.Ref . snd) . filter isparent . + map (toassoc . L.toStrict) . L.split newline + newline = fromIntegral (ord '\n') + toassoc = separate' (== (fromIntegral (ord ' '))) + isparent (k,_) = k == "parent" + + {- The race can be detected by checking the commit's + - parent, which will be the newly pushed branch, + - instead of the expected ref that the index was updated to. -} + racedetected expectedref parentrefs + | expectedref `elem` parentrefs = False -- good parent + | otherwise = True -- race! + + {- To recover from the race, union merge the lost refs + - into the index. -} + fixrace committedref lostrefs = do + showSideAction "recovering from race" + let retrynum' = retrynum+1 + -- small sleep to let any activity that caused + -- the race settle down + liftIO $ threadDelay (100000 + fromInteger retrynum') + mergeIndex jl lostrefs + let racemessage = basemessage ++ " (recovery from race #" ++ show retrynum' ++ "; expected commit parent " ++ show branchref ++ " but found " ++ show lostrefs ++ " )" + commitIndex' jl committedref racemessage basemessage retrynum' [committedref] + +{- Lists all files on the branch. including ones in the journal + - that have not been committed yet. + - + - There may be duplicates in the list, when the journal has files that + - have not been written to the branch yet. + - + - In a read-only repository that has other git-annex branches that have + - not been merged in, returns Nothing, because it's not possible to + - efficiently handle that. + -} +files :: Annex (Maybe ([RawFilePath], IO Bool)) +files = do + st <- update + if not (null (unmergedRefs st)) + then return Nothing + else do + (bfs, cleanup) <- branchFiles + jfs <- journalledFiles + pjfs <- journalledFilesPrivate + -- ++ forces the content of the first list to be + -- buffered in memory, so use journalledFiles, + -- which should be much smaller most of the time. + -- branchFiles will stream as the list is consumed. + let l = jfs ++ pjfs ++ bfs + return (Just (l, cleanup)) + +{- Lists all files currently in the journal, but not files in the private + - journal. -} +journalledFiles :: Annex [RawFilePath] +journalledFiles = getJournalledFilesStale gitAnnexJournalDir + +journalledFilesPrivate :: Annex [RawFilePath] +journalledFilesPrivate = ifM privateUUIDsKnown + ( getJournalledFilesStale gitAnnexPrivateJournalDir + , return [] + ) + +{- Files in the branch, not including any from journalled changes, + - and without updating the branch. -} +branchFiles :: Annex ([RawFilePath], IO Bool) +branchFiles = withIndex $ inRepo branchFiles' + +branchFiles' :: Git.Repo -> IO ([RawFilePath], IO Bool) +branchFiles' = Git.Command.pipeNullSplit' $ + lsTreeParams Git.LsTree.LsTreeRecursive (Git.LsTree.LsTreeLong False) + fullname + [Param "--name-only"] + +{- Populates the branch's index file with the current branch contents. + - + - This is only done when the index doesn't yet exist, and the index + - is used to build up changes to be committed to the branch, and merge + - in changes from other branches. + -} +genIndex :: Git.Repo -> IO () +genIndex g = Git.UpdateIndex.streamUpdateIndex g + [Git.UpdateIndex.lsTree fullname g] + +{- Merges the specified refs into the index. + - Any changes staged in the index will be preserved. -} +mergeIndex :: JournalLocked -> [Git.Ref] -> Annex () +mergeIndex jl branches = do + prepareModifyIndex jl + withHashObjectHandle $ \hashhandle -> + withCatFileHandle $ \ch -> + inRepo $ \g -> Git.UnionMerge.mergeIndex hashhandle ch g branches + +{- Removes any stale git lock file, to avoid git falling over when + - updating the index. + - + - Since all modifications of the index are performed inside this module, + - and only when the journal is locked, the fact that the journal has to be + - locked when this is called ensures that no other process is currently + - modifying the index. So any index.lock file must be stale, caused + - by git running when the system crashed, or the repository's disk was + - removed, etc. + -} +prepareModifyIndex :: JournalLocked -> Annex () +prepareModifyIndex _jl = do + index <- fromRepo gitAnnexIndex + void $ liftIO $ tryIO $ R.removeLink (index <> ".lock") + +{- Runs an action using the branch's index file. -} +withIndex :: Annex a -> Annex a +withIndex = withIndex' False +withIndex' :: Bool -> Annex a -> Annex a +withIndex' bootstrapping a = withIndexFile AnnexIndexFile $ \f -> do + checkIndexOnce $ unlessM (liftIO $ doesFileExist f) $ do + unless bootstrapping create + createAnnexDirectory $ toRawFilePath $ takeDirectory f + unless bootstrapping $ inRepo genIndex + a + +{- Updates the branch's index to reflect the current contents of the branch. + - Any changes staged in the index will be preserved. + - + - Compares the ref stored in the lock file with the current + - ref of the branch to see if an update is needed. + -} +updateIndex :: JournalLocked -> Git.Ref -> Annex () +updateIndex jl branchref = whenM (needUpdateIndex branchref) $ + forceUpdateIndex jl branchref + +forceUpdateIndex :: JournalLocked -> Git.Ref -> Annex () +forceUpdateIndex jl branchref = do + withIndex $ mergeIndex jl [fullname] + setIndexSha branchref + +{- Checks if the index needs to be updated. -} +needUpdateIndex :: Git.Ref -> Annex Bool +needUpdateIndex branchref = do + f <- fromRawFilePath <$> fromRepo gitAnnexIndexStatus + committedref <- Git.Ref . firstLine' <$> + liftIO (catchDefaultIO mempty $ B.readFile f) + return (committedref /= branchref) + +{- Record that the branch's index has been updated to correspond to a + - given ref of the branch. -} +setIndexSha :: Git.Ref -> Annex () +setIndexSha ref = do + f <- fromRepo gitAnnexIndexStatus + writeLogFile f $ fromRef ref ++ "\n" + runAnnexHook postUpdateAnnexHook + +{- Stages the journal into the index, and runs an action that + - commits the index to the branch. Note that the action is run + - inside withIndex so will automatically use the branch's index. + - + - Before staging, this removes any existing git index file lock. + - This is safe to do because stageJournal is the only thing that + - modifies this index file, and only one can run at a time, because + - the journal is locked. So any existing git index file lock must be + - stale, and the journal must contain any data that was in the process + - of being written to the index file when it crashed. + -} +stageJournal :: JournalLocked -> Annex () -> Annex () +stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do + prepareModifyIndex jl + g <- gitRepo + st <- getState + let dir = gitAnnexJournalDir st g + (jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir) + withHashObjectHandle $ \h -> + withJournalHandle gitAnnexJournalDir $ \jh -> + Git.UpdateIndex.streamUpdateIndex g + [genstream dir h jh jlogh] + commitindex + liftIO $ cleanup (fromRawFilePath dir) jlogh jlogf + where + genstream dir h jh jlogh streamer = readDirectory jh >>= \case + Nothing -> return () + Just file -> do + let path = dir P. toRawFilePath file + unless (dirCruft file) $ whenM (isfile path) $ do + sha <- Git.HashObject.hashFile h path + hPutStrLn jlogh file + streamer $ Git.UpdateIndex.updateIndexLine + sha TreeFile (asTopFilePath $ fileJournal $ toRawFilePath file) + genstream dir h jh jlogh streamer + isfile file = isRegularFile <$> R.getFileStatus file + -- Clean up the staged files, as listed in the temp log file. + -- The temp file is used to avoid needing to buffer all the + -- filenames in memory. + cleanup dir jlogh jlogf = do + hFlush jlogh + hSeek jlogh AbsoluteSeek 0 + stagedfs <- lines <$> hGetContents jlogh + mapM_ (removeFile . (dir )) stagedfs + hClose jlogh + removeWhenExistsWith (R.removeLink) (toRawFilePath jlogf) + openjlog tmpdir = liftIO $ openTmpFileIn tmpdir "jlog" + +getLocalTransitions :: Annex Transitions +getLocalTransitions = + parseTransitionsStrictly "local" + <$> getLocal transitionsLog + +{- This is run after the refs have been merged into the index, + - but before the result is committed to the branch. + - (Which is why it's passed the contents of the local branches's + - transition log before that merge took place.) + - + - When the refs contain transitions that have not yet been done locally, + - the transitions are performed on the index, and a new branch + - is created from the result. + - + - When there are transitions recorded locally that have not been done + - to the remote refs, the transitions are performed in the index, + - and committed to the existing branch. In this case, the untransitioned + - remote refs cannot be merged into the branch (since transitions + - throw away history), so they are added to the list of refs to ignore, + - to avoid re-merging content from them again. + -} +handleTransitions :: JournalLocked -> Transitions -> [Git.Ref] -> Annex Bool +handleTransitions jl localts refs = do + remotets <- mapM getRefTransitions refs + if all (localts ==) remotets + then return False + else do + let m = M.fromList (zip refs remotets) + let allts = combineTransitions (localts:remotets) + let (transitionedrefs, untransitionedrefs) = + partition (\r -> M.lookup r m == Just allts) refs + performTransitionsLocked jl allts (localts /= allts) transitionedrefs + ignoreRefs untransitionedrefs + return True + +{- Performs the specified transitions on the contents of the index file, + - commits it to the branch, or creates a new branch. + -} +performTransitions :: Transitions -> Bool -> [Ref] -> Annex () +performTransitions ts neednewlocalbranch transitionedrefs = lockJournal $ \jl -> + performTransitionsLocked jl ts neednewlocalbranch transitionedrefs +performTransitionsLocked :: JournalLocked -> Transitions -> Bool -> [Ref] -> Annex () +performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do + -- For simplicity & speed, we're going to use the Annex.Queue to + -- update the git-annex branch, while it usually holds changes + -- for the head branch. Flush any such changes. + Annex.Queue.flush + -- Stop any running git cat-files, to ensure that the + -- getStaged calls below use the current index, and not some older + -- one. + catFileStop + withIndex $ do + prepareModifyIndex jl + run $ mapMaybe getTransitionCalculator tlist + Annex.Queue.flush + if neednewlocalbranch + then do + cmode <- annexCommitMode <$> Annex.getGitConfig + -- Creating a new empty branch must happen + -- atomically, so if this is interrupted, + -- it will not leave the new branch created + -- but without exports grafted in. + c <- inRepo $ Git.Branch.commitShaAlways + cmode message transitionedrefs + void $ regraftexports c + else do + ref <- getBranch + ref' <- regraftexports ref + commitIndex jl ref' message + (nub $ fullname:transitionedrefs) + where + message + | neednewlocalbranch && null transitionedrefs = "new branch for transition " ++ tdesc + | otherwise = "continuing transition " ++ tdesc + tdesc = show $ map describeTransition tlist + tlist = knownTransitionList ts + + {- The changes to make to the branch are calculated and applied to + - the branch directly, rather than going through the journal, + - which would be innefficient. (And the journal is not designed + - to hold changes to every file in the branch at once.) + - + - When a file in the branch is changed by transition code, + - its new content is remembered and fed into the code for subsequent + - transitions. + -} + run [] = noop + run changers = do + config <- Annex.getGitConfig + trustmap <- calcTrustMap <$> getStaged trustLog + remoteconfigmap <- calcRemoteConfigMap <$> getStaged remoteLog + -- partially apply, improves performance + let changers' = map (\c -> c trustmap remoteconfigmap config) changers + (fs, cleanup) <- branchFiles + forM_ fs $ \f -> do + content <- getStaged f + apply changers' f content + liftIO $ void cleanup + + apply [] _ _ = return () + apply (changer:rest) file content = case changer file content of + PreserveFile -> apply rest file content + ChangeFile builder -> do + let content' = toLazyByteString builder + if L.null content' + then do + Annex.Queue.addUpdateIndex + =<< inRepo (Git.UpdateIndex.unstageFile file) + -- File is deleted; can't run any other + -- transitions on it. + return () + else do + sha <- hashBlob content' + Annex.Queue.addUpdateIndex $ Git.UpdateIndex.pureStreamer $ + Git.UpdateIndex.updateIndexLine sha TreeFile (asTopFilePath file) + apply rest file content' + + -- Trees mentioned in export.log were grafted into the old + -- git-annex branch to make sure they remain available. + -- Re-graft the trees. + regraftexports parent = do + l <- exportedTreeishes . M.elems . parseExportLogMap + <$> getStaged exportLog + c <- regraft l parent + inRepo $ Git.Branch.update' fullname c + setIndexSha c + return c + where + regraft [] c = pure c + regraft (et:ets) c = + -- Verify that the tree object exists. + catObjectDetails et >>= \case + Just _ -> + prepRememberTreeish et graftpoint c + >>= regraft ets + Nothing -> regraft ets c + graftpoint = asTopFilePath exportTreeGraftPoint + +checkBranchDifferences :: Git.Ref -> Annex () +checkBranchDifferences ref = do + theirdiffs <- allDifferences . parseDifferencesLog + <$> catFile ref differenceLog + mydiffs <- annexDifferences <$> Annex.getGitConfig + when (theirdiffs /= mydiffs) $ + giveup "Remote repository is tuned in incompatible way; cannot be merged with local repository." + +ignoreRefs :: [Git.Sha] -> Annex () +ignoreRefs rs = do + old <- getIgnoredRefs + let s = S.unions [old, S.fromList rs] + f <- fromRepo gitAnnexIgnoredRefs + writeLogFile f $ + unlines $ map fromRef $ S.elems s + +getIgnoredRefs :: Annex (S.Set Git.Sha) +getIgnoredRefs = + S.fromList . mapMaybe Git.Sha.extractSha . fileLines' <$> content + where + content = do + f <- fromRawFilePath <$> fromRepo gitAnnexIgnoredRefs + liftIO $ catchDefaultIO mempty $ B.readFile f + +addMergedRefs :: [(Git.Sha, Git.Branch)] -> Annex () +addMergedRefs [] = return () +addMergedRefs new = do + old <- getMergedRefs' + -- Keep only the newest sha for each branch. + let l = nubBy ((==) `on` snd) (new ++ old) + f <- fromRepo gitAnnexMergedRefs + writeLogFile f $ + unlines $ map (\(s, b) -> fromRef s ++ '\t' : fromRef b) l + +getMergedRefs :: Annex (S.Set Git.Sha) +getMergedRefs = S.fromList . map fst <$> getMergedRefs' + +getMergedRefs' :: Annex [(Git.Sha, Git.Branch)] +getMergedRefs' = do + f <- fromRawFilePath <$> fromRepo gitAnnexMergedRefs + s <- liftIO $ catchDefaultIO mempty $ B.readFile f + return $ map parse $ fileLines' s + where + parse l = + let (s, b) = separate' (== (fromIntegral (ord '\t'))) l + in (Ref s, Ref b) + +{- Grafts a treeish into the branch at the specified location, + - and then removes it. This ensures that the treeish won't get garbage + - collected, and will always be available as long as the git-annex branch + - is available. + - + - Returns the sha of the git commit made to the git-annex branch. + -} +rememberTreeish :: Git.Ref -> TopFilePath -> Annex Git.Sha +rememberTreeish treeish graftpoint = lockJournal $ \jl -> do + branchref <- getBranch + updateIndex jl branchref + c <- prepRememberTreeish treeish graftpoint branchref + inRepo $ Git.Branch.update' fullname c + -- The tree in c is the same as the tree in branchref, + -- and the index was updated to that above, so it's safe to + -- say that the index contains c. + setIndexSha c + return c + +{- Create a series of commits that graft a tree onto the parent commit, + - and then remove it. -} +prepRememberTreeish :: Git.Ref -> TopFilePath -> Git.Ref -> Annex Git.Sha +prepRememberTreeish treeish graftpoint parent = do + origtree <- fromMaybe (giveup "unable to determine git-annex branch tree") <$> + inRepo (Git.Ref.tree parent) + addedt <- inRepo $ Git.Tree.graftTree treeish graftpoint origtree + cmode <- annexCommitMode <$> Annex.getGitConfig + c <- inRepo $ Git.Branch.commitTree cmode + ["graft"] [parent] addedt + inRepo $ Git.Branch.commitTree cmode + ["graft cleanup"] [c] origtree + +{- Runs an action on the content of selected files from the branch. + - This is much faster than reading the content of each file in turn, + - because it lets git cat-file stream content without blocking. + - + - The action is passed a callback that it can repeatedly call to read + - the next file and its contents. When there are no more files, the + - callback will return Nothing. + - + - In some cases the callback may return the same file more than once, + - with different content. This happens rarely, only when the journal + - contains additional information, and the last version of the + - file it returns is the most current one. + - + - In a read-only repository that has other git-annex branches that have + - not been merged in, returns Nothing, because it's not possible to + - efficiently handle that. + -} +overBranchFileContents + :: (RawFilePath -> Maybe v) + -> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a) + -> Annex (Maybe a) +overBranchFileContents select go = do + st <- update + if not (null (unmergedRefs st)) + then return Nothing + else Just <$> overBranchFileContents' select go st + +overBranchFileContents' + :: (RawFilePath -> Maybe v) + -> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a) + -> BranchState + -> Annex a +overBranchFileContents' select go st = do + g <- Annex.gitRepo + (l, cleanup) <- inRepo $ Git.LsTree.lsTree + Git.LsTree.LsTreeRecursive + (Git.LsTree.LsTreeLong False) + fullname + let select' f = fmap (\v -> (v, f)) (select f) + buf <- liftIO newEmptyMVar + let go' reader = go $ liftIO reader >>= \case + Just ((v, f), content) -> do + content' <- checkjournal f content + return (Just (v, f, content')) + Nothing + | journalIgnorable st -> return Nothing + -- The journal did not get committed to the + -- branch, and may contain files that + -- are not present in the branch, which + -- need to be provided to the action still. + -- This can cause the action to be run a + -- second time with a file it already ran on. + | otherwise -> liftIO (tryTakeMVar buf) >>= \case + Nothing -> do + jfs <- journalledFiles + pjfs <- journalledFilesPrivate + drain buf jfs pjfs + Just (jfs, pjfs) -> drain buf jfs pjfs + catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go' + `finally` liftIO (void cleanup) + where + -- Check the journal, in case it did not get committed to the branch + checkjournal f branchcontent + | journalIgnorable st = return branchcontent + | otherwise = getJournalFileStale (GetPrivate True) f >>= return . \case + NoJournalledContent -> branchcontent + JournalledContent journalledcontent -> + Just journalledcontent + PossiblyStaleJournalledContent journalledcontent -> + Just (fromMaybe mempty branchcontent <> journalledcontent) + + drain buf fs pfs = case getnext fs pfs of + Just (v, f, fs', pfs') -> do + liftIO $ putMVar buf (fs', pfs') + content <- getJournalFileStale (GetPrivate True) f >>= \case + NoJournalledContent -> return Nothing + JournalledContent journalledcontent -> + return (Just journalledcontent) + PossiblyStaleJournalledContent journalledcontent -> do + -- This is expensive, but happens + -- only when there is a private + -- journal file. + content <- getRef fullname f + return (Just (content <> journalledcontent)) + return (Just (v, f, content)) + Nothing -> do + liftIO $ putMVar buf ([], []) + return Nothing + + getnext [] [] = Nothing + getnext (f:fs) pfs = case select f of + Nothing -> getnext fs pfs + Just v -> Just (v, f, fs, pfs) + getnext [] (pf:pfs) = case select pf of + Nothing -> getnext [] pfs + Just v -> Just (v, pf, [], pfs) + +{- Check if the git-annex branch has been updated from the oldtree. + - If so, returns the tuple of the old and new trees. -} +updatedFromTree :: Git.Sha -> Annex (Maybe (Git.Sha, Git.Sha)) +updatedFromTree oldtree = + inRepo (Git.Ref.tree fullname) >>= \case + Just currtree | currtree /= oldtree -> + return $ Just (oldtree, currtree) + _ -> return Nothing diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs new file mode 100644 index 0000000000..d7f45cb067 --- /dev/null +++ b/Annex/Branch/Transitions.hs @@ -0,0 +1,108 @@ +{- git-annex branch transitions + - + - Copyright 2013-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Branch.Transitions ( + getTransitionCalculator, + filterBranch, +) where + +import Common +import Logs +import Logs.Transitions +import qualified Logs.UUIDBased as UUIDBased +import qualified Logs.Presence.Pure as Presence +import qualified Logs.Chunk.Pure as Chunk +import qualified Logs.MetaData.Pure as MetaData +import qualified Logs.Remote.Pure as Remote +import Logs.MapLog +import Types.TrustLevel +import Types.UUID +import Types.MetaData +import Types.Remote +import Types.Transitions +import Types.GitConfig (GitConfig) +import Types.ProposedAccepted +import Annex.SpecialRemote.Config + +import qualified Data.Map as M +import qualified Data.Set as S +import qualified Data.Attoparsec.ByteString.Lazy as A +import Data.ByteString.Builder + +getTransitionCalculator :: Transition -> Maybe (TrustMap -> M.Map UUID RemoteConfig -> GitConfig -> TransitionCalculator) +getTransitionCalculator ForgetGitHistory = Nothing +getTransitionCalculator ForgetDeadRemotes = Just dropDead + +-- Removes data about all dead repos. +-- +-- The trust log is not changed, because other, unmerged clones +-- may contain other data about the dead repos. So we need to remember +-- which are dead to later remove that. +-- +-- When the remote log contains a sameas-uuid pointing to a dead uuid, +-- the uuid of that remote configuration is also effectively dead, +-- though not in the trust log. There may be per-remote state stored using +-- the latter uuid, that also needs to be removed. The sameas-uuid +-- is not removed from the remote log, for the same reason the trust log +-- is not changed. +dropDead :: TrustMap -> M.Map UUID RemoteConfig -> GitConfig -> TransitionCalculator +dropDead trustmap remoteconfigmap gc f content + | f == trustLog = PreserveFile + | f == remoteLog = ChangeFile $ + Remote.buildRemoteConfigLog $ + mapLogWithKey minimizesameasdead $ + filterMapLog (notdead trustmap) id $ + Remote.parseRemoteConfigLog content + | otherwise = filterBranch (notdead trustmap') gc f content + where + notdead m u = M.findWithDefault def u m /= DeadTrusted + trustmap' = trustmap `M.union` + M.map (const DeadTrusted) (M.filter sameasdead remoteconfigmap) + sameasdead cm = + case toUUID . fromProposedAccepted <$> M.lookup sameasUUIDField cm of + Nothing -> False + Just u' -> M.lookup u' trustmap == Just DeadTrusted + minimizesameasdead u l + | M.lookup u trustmap' == Just DeadTrusted = + l { UUIDBased.value = minimizesameasdead' (UUIDBased.value l) } + | otherwise = l + minimizesameasdead' c = M.restrictKeys c (S.singleton sameasUUIDField) + +filterBranch :: (UUID -> Bool) -> GitConfig -> TransitionCalculator +filterBranch wantuuid gc f content = case getLogVariety gc f of + Just OldUUIDBasedLog -> ChangeFile $ + UUIDBased.buildLogOld byteString $ + filterMapLog wantuuid id $ + UUIDBased.parseLogOld A.takeByteString content + Just NewUUIDBasedLog -> ChangeFile $ + UUIDBased.buildLogNew byteString $ + filterMapLog wantuuid id $ + UUIDBased.parseLogNew A.takeByteString content + Just (ChunkLog _) -> ChangeFile $ + Chunk.buildLog $ filterMapLog wantuuid fst $ + Chunk.parseLog content + Just (LocationLog _) -> ChangeFile $ Presence.buildLog $ + Presence.compactLog $ + filterLocationLog wantuuid $ + Presence.parseLog content + Just (UrlLog _) -> PreserveFile + Just RemoteMetaDataLog -> ChangeFile $ MetaData.buildLog $ + filterRemoteMetaDataLog wantuuid $ + MetaData.simplifyLog $ MetaData.parseLog content + Just OtherLog -> PreserveFile + Nothing -> PreserveFile + +filterMapLog :: (UUID -> Bool) -> (k -> UUID) -> MapLog k v -> MapLog k v +filterMapLog wantuuid getuuid = filterMapLogWith (\k _v -> wantuuid (getuuid k)) + +filterLocationLog :: (UUID -> Bool) -> [Presence.LogLine] -> [Presence.LogLine] +filterLocationLog wantuuid = filter $ + wantuuid . toUUID . Presence.fromLogInfo . Presence.info + +filterRemoteMetaDataLog :: (UUID -> Bool) -> MetaData.Log MetaData -> MetaData.Log MetaData +filterRemoteMetaDataLog wantuuid = + MetaData.filterOutEmpty . MetaData.filterRemoteMetaData wantuuid diff --git a/Annex/BranchState.hs b/Annex/BranchState.hs new file mode 100644 index 0000000000..0f0e553259 --- /dev/null +++ b/Annex/BranchState.hs @@ -0,0 +1,144 @@ +{- git-annex branch state management + - + - Runtime state about the git-annex branch, and a small cache. + - + - Copyright 2011-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.BranchState where + +import Annex.Common +import Types.BranchState +import Types.Transitions +import qualified Annex +import Logs +import qualified Git + +import Control.Concurrent +import qualified Data.ByteString.Lazy as L + +getState :: Annex BranchState +getState = do + v <- Annex.getRead Annex.branchstate + liftIO $ readMVar v + +changeState :: (BranchState -> BranchState) -> Annex () +changeState changer = do + v <- Annex.getRead Annex.branchstate + liftIO $ modifyMVar_ v $ return . changer + +{- Runs an action to check that the index file exists, if it's not been + - checked before in this run of git-annex. -} +checkIndexOnce :: Annex () -> Annex () +checkIndexOnce a = unlessM (indexChecked <$> getState) $ do + a + changeState $ \s -> s { indexChecked = True } + +data UpdateMade + = UpdateMade + { refsWereMerged :: Bool + , journalClean :: Bool + } + | UpdateFailedPermissions + { refsUnmerged :: [Git.Sha] + , newTransitions :: [TransitionCalculator] + } + +{- Runs an action to update the branch, if it's not been updated before + - in this run of git-annex. + - + - When interactive access is enabled, the journal is always checked when + - reading values from the branch, and so this does not need to update + - the branch. + - + - When the action leaves the journal clean, by staging anything that + - was in it, an optimisation is enabled: The journal does not need to + - be checked going forward, until new information gets written to it. + - + - When the action is unable to update the branch due to a permissions + - problem, the journal is still read every time. + -} +runUpdateOnce :: Annex UpdateMade -> Annex BranchState +runUpdateOnce update = do + st <- getState + if branchUpdated st || needInteractiveAccess st + then return st + else do + um <- update + let stf = case um of + UpdateMade {} -> \st' -> st' + { branchUpdated = True + , journalIgnorable = journalClean um + } + UpdateFailedPermissions {} -> \st' -> st' + { branchUpdated = True + , journalIgnorable = False + , unmergedRefs = refsUnmerged um + , unhandledTransitions = newTransitions um + , cachedFileContents = [] + } + changeState stf + return (stf st) + +{- Avoids updating the branch. A useful optimisation when the branch + - is known to have not changed, or git-annex won't be relying on info + - queried from it being as up-to-date as possible. -} +disableUpdate :: Annex () +disableUpdate = changeState $ \s -> s { branchUpdated = True } + +{- Called when a change is made to the journal. -} +journalChanged :: Annex () +journalChanged = do + -- Optimisation: Typically journalIgnorable will already be True + -- (when one thing gets journalled, often other things do to), + -- so avoid an unnecessary write to the MVar that changeState + -- would do. + -- + -- This assumes that another thread is not setting journalIgnorable + -- at the same time, but since runUpdateOnce is the only + -- thing that sets it, and it only runs once, that + -- should not happen. + st <- getState + when (journalIgnorable st) $ + changeState $ \st' -> st' { journalIgnorable = False } + +{- When git-annex is somehow interactive, eg in --batch mode, + - and needs to always notice changes made to the journal by other + - processes, this disables optimisations that avoid normally reading the + - journal. + - + - It also avoids using the cache, so changes committed by other processes + - will be seen. + -} +enableInteractiveBranchAccess :: Annex () +enableInteractiveBranchAccess = changeState $ \s -> s + { needInteractiveAccess = True + , journalIgnorable = False + } + +setCache :: RawFilePath -> L.ByteString -> Annex () +setCache file content = changeState $ \s -> s + { cachedFileContents = add (cachedFileContents s) } + where + add l + | length l < logFilesToCache = (file, content) : l + | otherwise = (file, content) : Prelude.init l + +getCache :: RawFilePath -> BranchState -> Maybe L.ByteString +getCache file state = go (cachedFileContents state) + where + go [] = Nothing + go ((f,c):rest) + | f == file && not (needInteractiveAccess state) = Just c + | otherwise = go rest + +invalidateCache :: RawFilePath -> Annex () +invalidateCache f = changeState $ \s -> s + { cachedFileContents = filter (\(f', _) -> f' /= f) + (cachedFileContents s) + } + +invalidateCacheAll :: Annex () +invalidateCacheAll = changeState $ \s -> s { cachedFileContents = [] } diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs new file mode 100644 index 0000000000..35162b91a1 --- /dev/null +++ b/Annex/CatFile.hs @@ -0,0 +1,221 @@ +{- git cat-file interface, with handle automatically stored in the Annex monad + - + - Copyright 2011-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns #-} + +module Annex.CatFile ( + catFile, + catFileDetails, + catObject, + catTree, + catCommit, + catObjectDetails, + withCatFileHandle, + catObjectMetaData, + catFileStop, + catKey, + catKey', + catSymLinkTarget, + catKeyFile, + catKeyFileHEAD, + catKeyFileHidden, + catObjectMetaDataHidden, +) where + +import qualified Data.ByteString.Lazy as L +import qualified Data.Map as M +import System.PosixCompat.Types +import Control.Concurrent.STM + +import Annex.Common +import qualified Git +import qualified Git.CatFile +import qualified Annex +import Git.Types +import Git.FilePath +import Git.Index +import qualified Git.Ref +import Annex.Link +import Annex.CurrentBranch +import Types.AdjustedBranch +import Types.CatFileHandles +import Utility.ResourcePool + +catFile :: Git.Branch -> RawFilePath -> Annex L.ByteString +catFile branch file = withCatFileHandle $ \h -> + liftIO $ Git.CatFile.catFile h branch file + +catFileDetails :: Git.Branch -> RawFilePath -> Annex (Maybe (L.ByteString, Sha, ObjectType)) +catFileDetails branch file = withCatFileHandle $ \h -> + liftIO $ Git.CatFile.catFileDetails h branch file + +catObject :: Git.Ref -> Annex L.ByteString +catObject ref = withCatFileHandle $ \h -> + liftIO $ Git.CatFile.catObject h ref + +catObjectMetaData :: Git.Ref -> Annex (Maybe (Sha, Integer, ObjectType)) +catObjectMetaData ref = withCatFileMetaDataHandle $ \h -> + liftIO $ Git.CatFile.catObjectMetaData h ref + +catTree :: Git.Ref -> Annex [(FilePath, FileMode)] +catTree ref = withCatFileHandle $ \h -> + liftIO $ Git.CatFile.catTree h ref + +catCommit :: Git.Ref -> Annex (Maybe Commit) +catCommit ref = withCatFileHandle $ \h -> + liftIO $ Git.CatFile.catCommit h ref + +catObjectDetails :: Git.Ref -> Annex (Maybe (L.ByteString, Sha, ObjectType)) +catObjectDetails ref = withCatFileHandle $ \h -> + liftIO $ Git.CatFile.catObjectDetails h ref + +{- There can be multiple index files, and a different cat-file is needed + - for each. That is selected by setting GIT_INDEX_FILE in the gitEnv + - before running this. -} +withCatFileHandle :: (Git.CatFile.CatFileHandle -> Annex a) -> Annex a +withCatFileHandle = withCatFileHandle' + Git.CatFile.catFileStart + catFileMap + (\v m -> v { catFileMap = m }) + +withCatFileMetaDataHandle :: (Git.CatFile.CatFileMetaDataHandle -> Annex a) -> Annex a +withCatFileMetaDataHandle = withCatFileHandle' + Git.CatFile.catFileMetaDataStart + catFileMetaDataMap + (\v m -> v { catFileMetaDataMap = m }) + +withCatFileHandle' + :: (Repo -> IO hdl) + -> (CatMap -> M.Map FilePath (ResourcePool hdl)) + -> (CatMap -> M.Map FilePath (ResourcePool hdl) -> CatMap) + -> (hdl -> Annex a) + -> Annex a +withCatFileHandle' startcat get set a = do + cfh <- Annex.getState Annex.catfilehandles + indexfile <- fromMaybe "" . maybe Nothing (lookup indexEnv) + <$> fromRepo gitEnv + p <- case cfh of + CatFileHandlesNonConcurrent m -> case M.lookup indexfile (get m) of + Just p -> return p + Nothing -> do + p <- mkResourcePoolNonConcurrent startcatfile + let !m' = set m (M.insert indexfile p (get m)) + Annex.changeState $ \s -> s + { Annex.catfilehandles = CatFileHandlesNonConcurrent m' } + return p + CatFileHandlesPool tm -> do + m <- liftIO $ atomically $ takeTMVar tm + case M.lookup indexfile (get m) of + Just p -> do + liftIO $ atomically $ putTMVar tm m + return p + Nothing -> do + p <- mkResourcePool maxCatFiles + let !m' = set m (M.insert indexfile p (get m)) + liftIO $ atomically $ putTMVar tm m' + return p + withResourcePool p startcatfile a + where + startcatfile = inRepo startcat + +{- A lot of git cat-file processes are unlikely to improve concurrency, + - because a query to them takes only a little bit of CPU, and tends to be + - bottlenecked on disk. Also, they each open a number of files, so + - using too many might run out of file handles. So, only start a maximum + - of 2. + - + - Note that each different index file gets its own pool of cat-files; + - this is the size of each pool. In all, 4 times this many cat-files + - may end up running. + -} +maxCatFiles :: Int +maxCatFiles = 2 + +{- Stops all running cat-files. Should only be run when it's known that + - nothing is using the handles, eg at shutdown. -} +catFileStop :: Annex () +catFileStop = do + cfh <- Annex.getState Annex.catfilehandles + m <- case cfh of + CatFileHandlesNonConcurrent m -> do + Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent emptyCatMap } + return m + CatFileHandlesPool tm -> + liftIO $ atomically $ swapTMVar tm emptyCatMap + liftIO $ forM_ (M.elems (catFileMap m)) $ \p -> + freeResourcePool p Git.CatFile.catFileStop + liftIO $ forM_ (M.elems (catFileMetaDataMap m)) $ \p -> + freeResourcePool p Git.CatFile.catFileMetaDataStop + +{- From ref to a symlink or a pointer file, get the key. -} +catKey :: Ref -> Annex (Maybe Key) +catKey ref = catObjectMetaData ref >>= \case + Just (_, sz, _) -> catKey' ref sz + Nothing -> return Nothing + +catKey' :: Ref -> FileSize -> Annex (Maybe Key) +catKey' ref sz + -- Avoid catting large files, that cannot be symlinks or + -- pointer files, which would require buffering their + -- content in memory, as well as a lot of IO. + | sz <= fromIntegral maxPointerSz = + parseLinkTargetOrPointer . L.toStrict <$> catObject ref +catKey' _ _ = return Nothing + +{- Gets a symlink target. -} +catSymLinkTarget :: Sha -> Annex RawFilePath +catSymLinkTarget sha = fromInternalGitPath . L.toStrict <$> get + where + -- Avoid buffering the whole file content, which might be large. + -- 8192 is enough if it really is a symlink. + get = L.take 8192 <$> catObject sha + +{- From a file in the repository back to the key. + - + - Ideally, this should reflect the key that's staged in the index, + - not the key that's committed to HEAD. Unfortunately, git cat-file + - does not refresh the index file after it's started up, so things + - newly staged in the index won't show up. It does, however, notice + - when branches change. + - + - For command-line git-annex use, that doesn't matter. It's perfectly + - reasonable for things staged in the index after the currently running + - git-annex process to not be noticed by it. However, we do want to see + - what's in the index, since it may have uncommitted changes not in HEAD + - + - For the assistant, this is much more of a problem, since it commits + - files and then needs to be able to immediately look up their keys. + - OTOH, the assistant doesn't keep changes staged in the index for very + - long at all before committing them -- and it won't look at the keys + - of files until after committing them. + - + - So, this gets info from the index, unless running as a daemon. + -} +catKeyFile :: RawFilePath -> Annex (Maybe Key) +catKeyFile f = ifM (Annex.getState Annex.daemon) + ( catKeyFileHEAD f + , maybe (pure Nothing) catKey =<< inRepo (Git.Ref.fileRef f) + ) + +catKeyFileHEAD :: RawFilePath -> Annex (Maybe Key) +catKeyFileHEAD f = maybe (pure Nothing) catKey + =<< inRepo (Git.Ref.fileFromRef Git.Ref.headRef f) + +{- Look in the original branch from whence an adjusted branch is based + - to find the file. But only when the adjustment hides some files. -} +catKeyFileHidden :: RawFilePath -> CurrBranch -> Annex (Maybe Key) +catKeyFileHidden = hiddenCat catKey + +catObjectMetaDataHidden :: RawFilePath -> CurrBranch -> Annex (Maybe (Sha, Integer, ObjectType)) +catObjectMetaDataHidden = hiddenCat catObjectMetaData + +hiddenCat :: (Ref -> Annex (Maybe a)) -> RawFilePath -> CurrBranch -> Annex (Maybe a) +hiddenCat a f (Just origbranch, Just adj) + | adjustmentHidesFiles adj = + maybe (pure Nothing) a + =<< inRepo (Git.Ref.fileFromRef origbranch f) +hiddenCat _ _ _ = return Nothing diff --git a/Annex/ChangedRefs.hs b/Annex/ChangedRefs.hs new file mode 100644 index 0000000000..7a9ce8a34f --- /dev/null +++ b/Annex/ChangedRefs.hs @@ -0,0 +1,111 @@ +{- Waiting for changed git refs + - + - Copyright 2014-2016 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.ChangedRefs + ( ChangedRefs(..) + , ChangedRefsHandle + , waitChangedRefs + , drainChangedRefs + , stopWatchingChangedRefs + , watchChangedRefs + ) where + +import Annex.Common +import Utility.DirWatcher +import Utility.DirWatcher.Types +import Utility.Directory.Create +import qualified Git +import Git.Sha +import qualified Utility.SimpleProtocol as Proto + +import Control.Concurrent +import Control.Concurrent.STM +import Control.Concurrent.STM.TBMChan +import qualified Data.ByteString as S +import qualified System.FilePath.ByteString as P + +newtype ChangedRefs = ChangedRefs [Git.Ref] + deriving (Show) + +instance Proto.Serializable ChangedRefs where + serialize (ChangedRefs l) = unwords $ map Git.fromRef l + deserialize = Just . ChangedRefs . map (Git.Ref . encodeBS) . words + +data ChangedRefsHandle = ChangedRefsHandle DirWatcherHandle (TBMChan Git.Sha) + +-- | Wait for one or more git refs to change. +-- +-- When possible, coalesce ref writes that occur closely together +-- in time. Delay up to 0.05 seconds to get more ref writes. +waitChangedRefs :: ChangedRefsHandle -> IO ChangedRefs +waitChangedRefs (ChangedRefsHandle _ chan) = + atomically (readTBMChan chan) >>= \case + Nothing -> return $ ChangedRefs [] + Just r -> do + threadDelay 50000 + rs <- atomically $ loop [] + return $ ChangedRefs (r:rs) + where + loop rs = tryReadTBMChan chan >>= \case + Just (Just r) -> loop (r:rs) + _ -> return rs + +-- | Remove any changes that might be buffered in the channel, +-- without waiting for any new changes. +drainChangedRefs :: ChangedRefsHandle -> IO () +drainChangedRefs (ChangedRefsHandle _ chan) = atomically go + where + go = tryReadTBMChan chan >>= \case + Just (Just _) -> go + _ -> return () + +stopWatchingChangedRefs :: ChangedRefsHandle -> IO () +stopWatchingChangedRefs h@(ChangedRefsHandle wh chan) = do + stopWatchDir wh + atomically $ closeTBMChan chan + drainChangedRefs h + +watchChangedRefs :: Annex (Maybe ChangedRefsHandle) +watchChangedRefs = do + -- This channel is used to accumulate notifications, + -- because the DirWatcher might have multiple threads that find + -- changes at the same time. It is bounded to allow a watcher + -- to be started once and reused, without too many changes being + -- buffered in memory. + chan <- liftIO $ newTBMChanIO 100 + + g <- gitRepo + let gittop = Git.localGitDir g + let refdir = gittop P. "refs" + liftIO $ createDirectoryUnder [gittop] refdir + + let notifyhook = Just $ notifyHook chan + let hooks = mkWatchHooks + { addHook = notifyhook + , modifyHook = notifyhook + } + + if canWatch + then do + h <- liftIO $ watchDir + (fromRawFilePath refdir) + (const False) True hooks id + return $ Just $ ChangedRefsHandle h chan + else return Nothing + +notifyHook :: TBMChan Git.Sha -> FilePath -> Maybe FileStatus -> IO () +notifyHook chan reffile _ + | ".lock" `isSuffixOf` reffile = noop + | otherwise = void $ do + sha <- catchDefaultIO Nothing $ + extractSha <$> S.readFile reffile + -- When the channel is full, there is probably no reader + -- running, or ref changes have been occurring very fast, + -- so it's ok to not write the change to it. + maybe noop (void . atomically . tryWriteTBMChan chan) sha diff --git a/Annex/CheckAttr.hs b/Annex/CheckAttr.hs new file mode 100644 index 0000000000..6ad8fafce6 --- /dev/null +++ b/Annex/CheckAttr.hs @@ -0,0 +1,74 @@ +{- git check-attr interface + - + - Copyright 2012-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.CheckAttr ( + annexAttrs, + checkAttr, + checkAttrs, + checkAttrStop, + mkConcurrentCheckAttrHandle, +) where + +import Annex.Common +import qualified Git.CheckAttr as Git +import qualified Annex +import Utility.ResourcePool +import Types.Concurrency +import Annex.Concurrent.Utility + +{- All gitattributes used by git-annex. -} +annexAttrs :: [Git.Attr] +annexAttrs = + [ "annex.backend" + , "annex.largefiles" + , "annex.numcopies" + , "annex.mincopies" + ] + +checkAttr :: Git.Attr -> RawFilePath -> Annex String +checkAttr attr file = withCheckAttrHandle $ \h -> do + r <- liftIO $ Git.checkAttr h attr file + if r == Git.unspecifiedAttr + then return "" + else return r + +checkAttrs :: [Git.Attr] -> RawFilePath -> Annex [String] +checkAttrs attrs file = withCheckAttrHandle $ \h -> + liftIO $ Git.checkAttrs h attrs file + +withCheckAttrHandle :: (Git.CheckAttrHandle -> Annex a) -> Annex a +withCheckAttrHandle a = + maybe mkpool go =<< Annex.getState Annex.checkattrhandle + where + go p = withResourcePool p start a + start = inRepo $ Git.checkAttrStart annexAttrs + mkpool = do + -- This only runs in non-concurrent code paths; + -- a concurrent pool is set up earlier when needed. + p <- mkResourcePoolNonConcurrent start + Annex.changeState $ \s -> s { Annex.checkattrhandle = Just p } + go p + +mkConcurrentCheckAttrHandle :: Concurrency -> Annex (ResourcePool Git.CheckAttrHandle) +mkConcurrentCheckAttrHandle c = + Annex.getState Annex.checkattrhandle >>= \case + Just p@(ResourcePool {}) -> return p + _ -> mkResourcePool =<< liftIO (maxCheckAttrs c) + +{- git check-attr is typically CPU bound, and is not likely to be the main + - bottleneck for any command. So limit to the number of CPU cores, maximum, + - while respecting the -Jn value. + -} +maxCheckAttrs :: Concurrency -> IO Int +maxCheckAttrs = concurrencyUpToCpus + +checkAttrStop :: Annex () +checkAttrStop = maybe noop stop =<< Annex.getState Annex.checkattrhandle + where + stop p = do + liftIO $ freeResourcePool p Git.checkAttrStop + Annex.changeState $ \s -> s { Annex.checkattrhandle = Nothing } diff --git a/Annex/CheckIgnore.hs b/Annex/CheckIgnore.hs new file mode 100644 index 0000000000..d3c03f210a --- /dev/null +++ b/Annex/CheckIgnore.hs @@ -0,0 +1,64 @@ +{- git check-ignore interface, with handle automatically stored in + - the Annex monad + - + - Copyright 2013-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.CheckIgnore ( + CheckGitIgnore(..), + checkIgnored, + checkIgnoreStop, + mkConcurrentCheckIgnoreHandle, +) where + +import Annex.Common +import qualified Git.CheckIgnore as Git +import qualified Annex +import Utility.ResourcePool +import Types.Concurrency +import Annex.Concurrent.Utility + +newtype CheckGitIgnore = CheckGitIgnore Bool + +checkIgnored :: CheckGitIgnore -> RawFilePath -> Annex Bool +checkIgnored (CheckGitIgnore False) _ = pure False +checkIgnored (CheckGitIgnore True) file = + ifM (Annex.getRead Annex.force) + ( pure False + , withCheckIgnoreHandle $ \h -> liftIO $ Git.checkIgnored h file + ) + +withCheckIgnoreHandle :: (Git.CheckIgnoreHandle -> Annex a) -> Annex a +withCheckIgnoreHandle a = + maybe mkpool go =<< Annex.getState Annex.checkignorehandle + where + go p = withResourcePool p start a + start = inRepo Git.checkIgnoreStart + mkpool = do + -- This only runs in non-concurrent code paths; + -- a concurrent pool is set up earlier when needed. + p <- mkResourcePoolNonConcurrent start + Annex.changeState $ \s -> s { Annex.checkignorehandle = Just p } + go p + +mkConcurrentCheckIgnoreHandle :: Concurrency -> Annex (ResourcePool Git.CheckIgnoreHandle) +mkConcurrentCheckIgnoreHandle c = + Annex.getState Annex.checkignorehandle >>= \case + Just p@(ResourcePool {}) -> return p + _ -> mkResourcePool =<< liftIO (maxCheckIgnores c) + +{- git check-ignore is typically CPU bound, and is not likely to be the main + - bottleneck for any command. So limit to the number of CPU cores, maximum, + - while respecting the -Jn value. + -} +maxCheckIgnores :: Concurrency -> IO Int +maxCheckIgnores = concurrencyUpToCpus + +checkIgnoreStop :: Annex () +checkIgnoreStop = maybe noop stop =<< Annex.getState Annex.checkignorehandle + where + stop p = do + liftIO $ freeResourcePool p Git.checkIgnoreStop + Annex.changeState $ \s -> s { Annex.checkignorehandle = Nothing } diff --git a/Annex/Cluster.hs b/Annex/Cluster.hs new file mode 100644 index 0000000000..f3283094d3 --- /dev/null +++ b/Annex/Cluster.hs @@ -0,0 +1,180 @@ +{- clusters + - + - Copyright 2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE RankNTypes, OverloadedStrings #-} + +module Annex.Cluster where + +import Annex.Common +import qualified Annex +import Types.Cluster +import Logs.Cluster +import P2P.Proxy +import P2P.Protocol +import P2P.IO +import Annex.Proxy +import Annex.UUID +import Annex.BranchState +import Logs.Location +import Logs.PreferredContent +import Types.Command +import Remote.List +import qualified Remote +import qualified Types.Remote as Remote + +import qualified Data.Map as M +import qualified Data.Set as S +import System.Random + +{- Proxy to a cluster. -} +proxyCluster + :: ClusterUUID + -> CommandPerform + -> ServerMode + -> ClientSide + -> (forall a. Annex () -> ((a -> CommandPerform) -> Annex (Either ProtoFailure a) -> CommandPerform)) + -> CommandPerform +proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do + enableInteractiveBranchAccess + getClientProtocolVersion (fromClusterUUID clusteruuid) clientside + withclientversion (protoerrhandler noop) + where + withclientversion (Just (clientmaxversion, othermsg)) = do + -- The protocol versions supported by the nodes are not + -- known at this point, and would be too expensive to + -- determine. Instead, pick the newest protocol version + -- that we and the client both speak. The proxy code + -- checks protocol versions of remotes, so nodes can + -- have different protocol versions. + let protocolversion = min maxProtocolVersion clientmaxversion + sendClientProtocolVersion clientside othermsg protocolversion + (getclientbypass protocolversion) (protoerrhandler noop) + withclientversion Nothing = proxydone + + getclientbypass protocolversion othermsg = + getClientBypass clientside protocolversion othermsg + (withclientbypass protocolversion) (protoerrhandler noop) + + withclientbypass protocolversion (bypassuuids, othermsg) = do + (selectnode, closenodes) <- + clusterProxySelector clusteruuid + protocolversion bypassuuids + proxystate <- liftIO mkProxyState + concurrencyconfig <- concurrencyConfigJobs + let proxyparams = ProxyParams + { proxyMethods = mkProxyMethods + , proxyState = proxystate + , proxyServerMode = servermode + , proxyClientSide = clientside + , proxyUUID = fromClusterUUID clusteruuid + , proxySelector = selectnode + , proxyConcurrencyConfig = concurrencyconfig + , proxyClientProtocolVersion = protocolversion + } + proxy proxydone proxyparams othermsg + (protoerrhandler closenodes) + +clusterProxySelector + :: ClusterUUID + -> ProtocolVersion + -> Bypass + -> Annex (ProxySelector, Annex ()) +clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do + nodeuuids <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs) + <$> getClusters + myclusters <- annexClusters <$> Annex.getGitConfig + allremotes <- concat . Remote.byCost <$> remoteList + hereu <- getUUID + let bypass' = S.insert hereu bypass + let clusterremotes = filter (isnode bypass' allremotes nodeuuids myclusters) allremotes + fastDebug "Annex.Cluster" $ unwords + [ "cluster gateway at", fromUUID hereu + , "connecting to", show (map Remote.name clusterremotes) + , "bypass", show (S.toList bypass) + ] + nodes <- mapM (proxyRemoteSide protocolversion (Bypass bypass')) clusterremotes + let closenodes = mapM_ closeRemoteSide nodes + let proxyselector = ProxySelector + { proxyCHECKPRESENT = nodecontaining nodes + , proxyGET = nodecontaining nodes + -- The key is sent to multiple nodes at the same time, + -- skipping nodes where it's known/expected to already be + -- present to avoid needing to connect to those, and + -- skipping nodes where it's not preferred content. + , proxyPUT = \af k -> do + locs <- S.fromList <$> loggedLocations k + let l = filter (flip S.notMember locs . Remote.uuid . remote) nodes + l' <- filterM (\n -> isPreferredContent (Just (Remote.uuid (remote n))) mempty (Just k) af True) l + -- PUT to no nodes doesn't work, so fall + -- back to all nodes. + return $ nonempty [l', l] nodes + -- Remove the key from every node that contains it. + -- But, since it's possible the location log for some nodes + -- could be out of date, actually try to remove from every + -- node. + , proxyREMOVE = const (pure nodes) + , proxyGETTIMESTAMP = pure nodes + -- Content is not locked on the cluster as a whole, + -- instead it can be locked on individual nodes that are + -- proxied to the client. + , proxyLOCKCONTENT = const (pure Nothing) + } + return (proxyselector, closenodes) + where + -- Nodes of the cluster have remote.name.annex-cluster-node + -- containing its name. + -- + -- Or, a node can be the cluster proxied by another gateway. + isnode bypass' rs nodeuuids myclusters r = + case remoteAnnexClusterNode (Remote.gitconfig r) of + Just names + | any (isclustername myclusters) names -> + flip S.member nodeuuids $ + ClusterNodeUUID $ Remote.uuid r + | otherwise -> False + Nothing -> isclusterviagateway bypass' rs r + + -- Is this remote the same cluster, proxied via another gateway? + -- + -- Must avoid bypassed gateways to prevent cycles. + isclusterviagateway bypass' rs r = + case mkClusterUUID (Remote.uuid r) of + Just cu | cu == clusteruuid -> + case remoteAnnexProxiedBy (Remote.gitconfig r) of + Just proxyuuid | proxyuuid `S.notMember` bypass' -> + not $ null $ + filter isclustergateway $ + filter (\p -> Remote.uuid p == proxyuuid) rs + _ -> False + _ -> False + + isclustergateway r = any (== clusteruuid) $ + remoteAnnexClusterGateway $ Remote.gitconfig r + + isclustername myclusters name = + M.lookup name myclusters == Just clusteruuid + + nodecontaining nodes k = do + locs <- S.fromList <$> loggedLocations k + case filter (flip S.member locs . Remote.uuid . remote) nodes of + [] -> return Nothing + (node:[]) -> return (Just node) + (node:rest) -> + -- The list of nodes is ordered by cost. + -- Use any of the ones with equally low + -- cost. + let lowestcost = Remote.cost (remote node) + samecost = node : takeWhile (\n -> Remote.cost (remote n) == lowestcost) rest + in do + n <- liftIO $ getStdRandom $ + randomR (0, length samecost - 1) + return (Just (samecost !! n)) + + nonempty (l:ls) fallback + | null l = nonempty ls fallback + | otherwise = l + nonempty [] fallback = fallback diff --git a/Annex/Common.hs b/Annex/Common.hs new file mode 100644 index 0000000000..0fc602205a --- /dev/null +++ b/Annex/Common.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE CPP #-} + +module Annex.Common (module X) where + +import Common as X +import Types as X +import Key as X +import Types.UUID as X +import Annex as X (gitRepo, inRepo, fromRepo, calcRepo, calcRepo') +import Annex.Locations as X +import Annex.Debug as X (fastDebug, debug) +import Messages as X +import Git.Quote as X +#ifndef mingw32_HOST_OS +import System.Posix.IO as X hiding (createPipe, append) +#endif diff --git a/Annex/Concurrent.hs b/Annex/Concurrent.hs new file mode 100644 index 0000000000..72ea40318f --- /dev/null +++ b/Annex/Concurrent.hs @@ -0,0 +1,113 @@ +{- git-annex concurrent state + - + - Copyright 2015-2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Concurrent ( + module Annex.Concurrent, + module Annex.Concurrent.Utility +) where + +import Annex +import Annex.Common +import Annex.Concurrent.Utility +import qualified Annex.Queue +import Types.Concurrency +import Types.CatFileHandles +import Annex.CatFile +import Annex.CheckAttr +import Annex.HashObject +import Annex.CheckIgnore + +import qualified Data.Map as M + +setConcurrency :: ConcurrencySetting -> Annex () +setConcurrency (ConcurrencyCmdLine s) = setConcurrency' s ConcurrencyCmdLine +setConcurrency (ConcurrencyGitConfig s) = setConcurrency' s ConcurrencyGitConfig + +setConcurrency' :: Concurrency -> (Concurrency -> ConcurrencySetting) -> Annex () +setConcurrency' NonConcurrent f = + Annex.changeState $ \s -> s + { Annex.concurrency = f NonConcurrent + } +setConcurrency' c f = do + oldc <- Annex.getState Annex.concurrency + case oldc of + ConcurrencyCmdLine NonConcurrent -> fromnonconcurrent + ConcurrencyGitConfig NonConcurrent -> fromnonconcurrent + _ + | oldc == newc -> return () + | otherwise -> + Annex.changeState $ \s -> s + { Annex.concurrency = newc + } + where + newc = f c + fromnonconcurrent = do + catFileStop + checkAttrStop + hashObjectStop + checkIgnoreStop + cfh <- liftIO catFileHandlesPool + cah <- mkConcurrentCheckAttrHandle c + hoh <- mkConcurrentHashObjectHandle c + cih <- mkConcurrentCheckIgnoreHandle c + Annex.changeState $ \s -> s + { Annex.concurrency = newc + , Annex.catfilehandles = cfh + , Annex.checkattrhandle = Just cah + , Annex.hashobjecthandle = Just hoh + , Annex.checkignorehandle = Just cih + } + +{- Allows forking off a thread that uses a copy of the current AnnexState + - to run an Annex action. + - + - The returned IO action can be used to start the thread. + - It returns an Annex action that must be run in the original + - calling context to merge the forked AnnexState back into the + - current AnnexState. + -} +forkState :: Annex a -> Annex (IO (Annex a)) +forkState a = do + rd <- Annex.getRead id + st <- dupState + return $ do + (ret, (newst, _rd)) <- run (st, rd) a + return $ do + mergeState newst + return ret + +{- Returns a copy of the current AnnexState that is safe to be + - used when forking off a thread. + - + - After an Annex action is run using this AnnexState, it + - should be merged back into the current Annex's state, + - by calling mergeState. + -} +dupState :: Annex AnnexState +dupState = do + st <- Annex.getState id + -- Make sure that concurrency is enabled, if it was not already, + -- so the concurrency-safe resource pools are set up. + st' <- case getConcurrency' (Annex.concurrency st) of + NonConcurrent -> do + setConcurrency (ConcurrencyCmdLine (Concurrent 1)) + Annex.getState id + _ -> return st + return $ st' + -- each thread has its own repoqueue + { Annex.repoqueue = Nothing + -- no errors from this thread yet + , Annex.errcounter = 0 + } + +{- Merges the passed AnnexState into the current Annex state. -} +mergeState :: AnnexState -> Annex () +mergeState st = do + forM_ (M.toList $ Annex.cleanupactions st) $ + uncurry addCleanupAction + Annex.Queue.mergeFrom st + changeState $ \s -> s { errcounter = errcounter s + errcounter st } diff --git a/Annex/Concurrent/Utility.hs b/Annex/Concurrent/Utility.hs new file mode 100644 index 0000000000..2810f6da6b --- /dev/null +++ b/Annex/Concurrent/Utility.hs @@ -0,0 +1,31 @@ +{- git-annex concurrency utilities + - + - Copyright 2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Concurrent.Utility where + +import Annex +import Types.Concurrency + +import GHC.Conc + +getConcurrency :: Annex Concurrency +getConcurrency = getConcurrency' <$> getState concurrency + +getConcurrency' :: ConcurrencySetting -> Concurrency +getConcurrency' (ConcurrencyCmdLine c) = c +getConcurrency' (ConcurrencyGitConfig c) = c + +{- Honor the requested level of concurrency, but only up to the number of + - CPU cores. Useful for things that are known to be CPU bound. -} +concurrencyUpToCpus :: Concurrency -> IO Int +concurrencyUpToCpus c = do + let cn = case c of + Concurrent n -> n + NonConcurrent -> 1 + ConcurrentPerCpu -> 1 + pn <- getNumProcessors + return (min cn pn) diff --git a/Annex/Content.hs b/Annex/Content.hs new file mode 100644 index 0000000000..4ad045d763 --- /dev/null +++ b/Annex/Content.hs @@ -0,0 +1,1116 @@ +{- git-annex file content managing + - + - Copyright 2010-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Content ( + inAnnex, + inAnnex', + inAnnexSafe, + inAnnexCheck, + objectFileExists, + lockContentShared, + lockContentForRemoval, + ContentRemovalLock, + RetrievalSecurityPolicy(..), + getViaTmp, + getViaTmpFromDisk, + verificationOfContentFailed, + checkDiskSpaceToGet, + checkSecureHashes, + prepTmp, + withTmp, + checkDiskSpace, + needMoreDiskSpace, + moveAnnex, + populatePointerFile, + linkToAnnex, + linkFromAnnex, + linkFromAnnex', + LinkAnnexResult(..), + unlinkAnnex, + checkedCopyFile, + linkOrCopy, + linkOrCopy', + sendAnnex, + prepSendAnnex, + prepSendAnnex', + removeAnnex, + moveBad, + KeyLocation(..), + listKeys, + listKeys', + saveState, + downloadUrl, + preseedTmp, + dirKeys, + withObjectLoc, + staleKeysPrune, + pruneTmpWorkDirBefore, + isUnmodified, + isUnmodifiedCheap, + verifyKeyContentPostRetrieval, + verifyKeyContent, + VerifyConfig, + VerifyConfigA(..), + Verification(..), + unVerified, + withTmpWorkDir, + KeyStatus(..), + isKeyUnlockedThin, + getKeyStatus, + getKeyFileStatus, + cleanObjectDirs, + contentSize, +) where + +import System.IO.Unsafe (unsafeInterleaveIO) +import qualified Data.Set as S + +import Annex.Common +import Annex.Content.Presence +import Annex.Content.LowLevel +import Annex.Content.PointerFile +import Annex.Verify +import qualified Git +import qualified Annex +import qualified Annex.Queue +import qualified Annex.Branch +import qualified Annex.Url as Url +import qualified Backend +import qualified Database.Keys +import Git.FilePath +import Annex.Perms +import Annex.Link +import Annex.LockPool +import Annex.LockFile +import Annex.UUID +import Annex.InodeSentinal +import Annex.ReplaceFile +import Annex.AdjustedBranch (adjustedBranchRefresh) +import Annex.DirHashes +import Messages.Progress +import Types.Remote (RetrievalSecurityPolicy(..), VerifyConfigA(..)) +import Types.NumCopies +import Types.Key +import Types.Transfer +import Logs.Transfer +import Logs.Location +import Utility.InodeCache +import Utility.CopyFile +import Utility.Metered +import Utility.HumanTime +import Utility.TimeStamp +#ifndef mingw32_HOST_OS +import Utility.FileMode +#endif +import qualified Utility.RawFilePath as R + +import qualified System.FilePath.ByteString as P +import System.PosixCompat.Files (isSymbolicLink, linkCount) +import Data.Time.Clock.POSIX + +{- Prevents the content from being removed while the action is running. + - Uses a shared lock. + - + - If locking fails, or the content is not present, throws an exception + - rather than running the action. + - + - When a Duration is provided, the content is prevented from being removed + - for that amount of time, even if the current process is terminated. + - (This is only done when using a separate lock file from the content + - file eg in v10 and higher repositories.) + -} +lockContentShared :: Key -> Maybe Duration -> (VerifiedCopy -> Annex a) -> Annex a +lockContentShared key mduration a = do + retention <- case mduration of + Nothing -> pure Nothing + Just duration -> do + rt <- calcRepo (gitAnnexContentRetentionTimestamp key) + now <- liftIO getPOSIXTime + pure $ Just + ( rt + , now + fromIntegral (durationSeconds duration) + ) + lockContentUsing (lock retention) key notpresent $ + ifM (inAnnex key) + ( do + u <- getUUID + withVerifiedCopy LockedCopy u (return (Right True)) a + , notpresent + ) + where + notpresent = giveup $ "failed to lock content: not present" +#ifndef mingw32_HOST_OS + lock retention _ (Just lockfile) = + ( posixLocker tryLockShared lockfile >>= \case + Just lck -> do + writeretention retention + return (Just lck) + Nothing -> return Nothing + , Just $ posixLocker tryLockExclusive lockfile >>= \case + Just lck -> do + dropretention retention + return (Just lck) + Nothing -> return Nothing + ) + lock _ contentfile Nothing = + ( tryLockShared Nothing contentfile + , Nothing + ) +#else + lock retention obj lckf = + let (locker, postunlock) = winLocker lockShared obj lckf + in + ( locker >>= \case + Just lck -> do + writeretention retention + return (Just lck) + Nothing -> return Nothing + , Just $ \lckfile -> do + maybe noop (\pu -> pu lckfile) postunlock + lockdropretention obj lckf retention + ) + + lockdropretention _ _ Nothing = noop + lockdropretention obj lckf retention = do + -- In order to dropretention, have to + -- take an exclusive lock. + let (exlocker, expostunlock) = + winLocker lockExclusive obj lckf + exlocker >>= \case + Nothing -> noop + Just lck -> do + dropretention retention + liftIO $ dropLock lck + case (expostunlock, lckf) of + (Just pu, Just f) -> pu f + _ -> noop +#endif + + writeretention Nothing = noop + writeretention (Just (rt, retentionts)) = + writeContentRetentionTimestamp key rt retentionts + + -- When this is called, an exclusive lock has been taken, so no other + -- processes can be writing to the retention time stamp file. + -- The timestamp in the file may have been written by this + -- call to lockContentShared or a later call. Only delete the file + -- in the former case. + dropretention Nothing = noop + dropretention (Just (rt, retentionts)) = + readContentRetentionTimestamp rt >>= \case + Just ts | ts == retentionts -> + removeRetentionTimeStamp key rt + _ -> noop + +{- Exclusively locks content, including checking the retention timestamp, + - while performing an action that might remove it. + - + - If locking fails, throws an exception rather than running the action. + - + - When the content file itself is used as the lock file, + - and locking fails because the the content is not present, runs the + - fallback action instead. However, the content is not guaranteed to be + - present when this succeeds. + -} +lockContentForRemoval :: Key -> Annex a -> (ContentRemovalLock -> Annex a) -> Annex a +lockContentForRemoval key fallback a = lockContentUsing lock key fallback $ + a (ContentRemovalLock key) + where +#ifndef mingw32_HOST_OS + lock _ (Just lockfile) = + ( checkRetentionTimestamp key + (posixLocker tryLockExclusive lockfile) + , Nothing + ) + {- No lock file, so the content file itself is locked. + - Since content files are stored with the write bit + - disabled, have to fiddle with permissions to open + - for an exclusive lock. -} + lock contentfile Nothing = + let lck = bracket_ + (thawContent contentfile) + (freezeContent contentfile) + (tryLockExclusive Nothing contentfile) + in (lck, Nothing) +#else + lock obj lckf = + let (exlocker, expostunlock) = + winLocker lockExclusive obj lckf + in (checkRetentionTimestamp key exlocker, expostunlock) +#endif + +{- Passed the object content file, and maybe a separate lock file to use, + - when the content file itself should not be locked. -} +type ContentLocker + = RawFilePath + -> Maybe LockFile + -> + ( Annex (Maybe LockHandle) + -- ^ Takes the lock, which may be shared or exclusive. +#ifndef mingw32_HOST_OS + , Maybe (Annex (Maybe LockHandle)) + -- ^ When the above takes a shared lock, this is used + -- to take an exclusive lock, after dropping the shared lock, + -- and prior to deleting the lock file, in order to + -- ensure that no other processes also have a shared lock. +#else + , Maybe (RawFilePath -> Annex ()) + -- ^ On Windows, this is called after the lock is dropped, + -- but before the lock file is cleaned up. +#endif + ) + +#ifndef mingw32_HOST_OS +posixLocker :: (Maybe ModeSetter -> LockFile -> Annex (Maybe LockHandle)) -> LockFile -> Annex (Maybe LockHandle) +posixLocker takelock lockfile = do + mode <- annexFileMode + modifyContentDirWhenExists lockfile $ + takelock (Just mode) lockfile +#else +winLocker :: (LockFile -> IO (Maybe LockHandle)) -> ContentLocker +winLocker takelock _ (Just lockfile) = + let lck = do + modifyContentDir lockfile $ + void $ liftIO $ tryIO $ + writeFile (fromRawFilePath lockfile) "" + liftIO $ takelock lockfile + in (lck, Nothing) +-- never reached; windows always uses a separate lock file +winLocker _ _ Nothing = (return Nothing, Nothing) +#endif + +{- The fallback action is run if the ContentLocker throws an IO exception + - and the content is not present. It's not guaranteed to always run when + - the content is not present, because the content file is not always + - the file that is locked. -} +lockContentUsing :: ContentLocker -> Key -> Annex a -> Annex a -> Annex a +lockContentUsing contentlocker key fallback a = withContentLockFile key $ \mlockfile -> do + contentfile <- calcRepo (gitAnnexLocation key) + let (locker, sharedtoexclusive) = contentlocker contentfile mlockfile + bracket + (lock locker mlockfile) + (either (const noop) (unlock sharedtoexclusive mlockfile)) + go + where + alreadylocked = giveup "content is locked" + failedtolock e = giveup $ "failed to lock content: " ++ show e + +#ifndef mingw32_HOST_OS + lock locker mlockfile = +#else + lock locker _mlockfile = +#endif + tryIO $ locker >>= \case + Nothing -> alreadylocked + Just h -> +#ifndef mingw32_HOST_OS + case mlockfile of + Nothing -> return h + Just lockfile -> + ifM (checkSaneLock lockfile h) + ( return h + , alreadylocked + ) +#else + return h +#endif + + go (Right _) = a + go (Left e) = ifM (inAnnex key) + ( failedtolock e + , fallback + ) + +#ifndef mingw32_HOST_OS + unlock sharedtoexclusive mlockfile lck = case (sharedtoexclusive, mlockfile) of + -- We have a shared lock, so other processes may also + -- have shared locks of the same lock file. To avoid + -- deleting the lock file when there are other shared + -- locks, try to convert to an exclusive lock, and only + -- delete it when that succeeds. + -- + -- Since other processes might be doing the same, + -- a race is possible where we open the lock file + -- and then another process takes the exclusive lock and + -- deletes it, leaving us with an invalid lock. To avoid + -- that race, checkSaneLock is used after taking the lock + -- here, and above. + (Just exclusivelocker, Just lockfile) -> do + liftIO $ dropLock lck + exclusivelocker >>= \case + Nothing -> return () + Just h -> do + whenM (checkSaneLock lockfile h) $ do + cleanuplockfile lockfile + liftIO $ dropLock h + -- We have an exclusive lock, so no other process can have + -- the lock file locked, and so it's safe to remove it, as + -- long as all lock attempts use checkSaneLock. + _ -> do + maybe noop cleanuplockfile mlockfile + liftIO $ dropLock lck +#else + unlock postunlock mlockfile lck = do + -- Can't delete a locked file on Windows, + -- so close our lock first. If there are other shared + -- locks, they will prevent the lock file deletion from + -- happening. + liftIO $ dropLock lck + case mlockfile of + Nothing -> noop -- never reached + Just lockfile -> do + maybe noop (\pu -> pu lockfile) postunlock + cleanuplockfile lockfile +#endif + + cleanuplockfile lockfile = void $ tryNonAsync $ do + thawContentDir lockfile + liftIO $ removeWhenExistsWith R.removeLink lockfile + cleanObjectDirs lockfile + +{- Runs an action, passing it the temp file to get, + - and if the action succeeds, verifies the file matches + - the key and moves the file into the annex as a key's content. -} +getViaTmp :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> AssociatedFile -> Maybe FileSize -> (RawFilePath -> Annex (Bool, Verification)) -> Annex Bool +getViaTmp rsp v key af sz action = + checkDiskSpaceToGet key sz False $ + getViaTmpFromDisk rsp v key af action + +{- Like getViaTmp, but does not check that there is enough disk space + - for the incoming key. For use when the key content is already on disk + - and not being copied into place. -} +getViaTmpFromDisk :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> AssociatedFile -> (RawFilePath -> Annex (Bool, Verification)) -> Annex Bool +getViaTmpFromDisk rsp v key af action = checkallowed $ do + tmpfile <- prepTmp key + resuming <- liftIO $ R.doesPathExist tmpfile + (ok, verification) <- action tmpfile + -- When the temp file already had content, we don't know if + -- that content is good or not, so only trust if it the action + -- Verified it in passing. Otherwise, force verification even + -- if the VerifyConfig normally disables it. + let verification' = if resuming + then case verification of + Verified -> Verified + _ -> MustVerify + else verification + if ok + then ifM (verifyKeyContentPostRetrieval rsp v verification' key tmpfile) + ( pruneTmpWorkDirBefore tmpfile (moveAnnex key af) + , do + verificationOfContentFailed tmpfile + return False + ) + -- On transfer failure, the tmp file is left behind, in case + -- caller wants to resume its transfer + else return False + where + -- Avoid running the action to get the content when the + -- RetrievalSecurityPolicy would cause verification to always fail. + checkallowed a = case rsp of + RetrievalAllKeysSecure -> a + RetrievalVerifiableKeysSecure -> ifM (isVerifiable key) + ( a + , ifM (annexAllowUnverifiedDownloads <$> Annex.getGitConfig) + ( a + , warnUnverifiableInsecure key >> return False + ) + ) + +{- When the content of a file that was successfully transferred from a remote + - fails to verify, use this to display a message so the user knows why it + - failed, and to clean up the corrupted content. + - + - The bad content is not retained, because the transfer of it succeeded. + - So it's not incomplete and a resume using it will not work. While + - some protocols like rsync could recover such a bad content file, + - they are assumed to not write out bad data to a file in the first place. + - Most protocols, including the P2P protocol, pick up downloads where they + - left off, and so if the bad content were not deleted, repeated downloads + - would continue to fail. + -} +verificationOfContentFailed :: RawFilePath -> Annex () +verificationOfContentFailed tmpfile = do + warning "Verification of content failed" + pruneTmpWorkDirBefore tmpfile + (liftIO . removeWhenExistsWith R.removeLink) + +{- Checks if there is enough free disk space to download a key + - to its temp file. + - + - When the temp file already exists, count the space it is using as + - free, since the download will overwrite it or resume. + - + - Wen there's enough free space, runs the download action. + -} +checkDiskSpaceToGet :: Key -> Maybe FileSize -> a -> Annex a -> Annex a +checkDiskSpaceToGet key sz unabletoget getkey = do + tmp <- fromRepo (gitAnnexTmpObjectLocation key) + e <- liftIO $ doesFileExist (fromRawFilePath tmp) + alreadythere <- liftIO $ if e + then getFileSize tmp + else return 0 + ifM (checkDiskSpace sz Nothing key alreadythere True) + ( do + -- The tmp file may not have been left writable + when e $ thawContent tmp + getkey + , return unabletoget + ) + +prepTmp :: Key -> Annex RawFilePath +prepTmp key = do + tmp <- fromRepo $ gitAnnexTmpObjectLocation key + createAnnexDirectory (parentDir tmp) + return tmp + +{- Prepares a temp file for a key, runs an action on it, and cleans up + - the temp file. If the action throws an exception, the temp file is + - left behind, which allows for resuming. + -} +withTmp :: Key -> (RawFilePath -> Annex a) -> Annex a +withTmp key action = do + tmp <- prepTmp key + res <- action tmp + pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink) + return res + +{- Moves a key's content into .git/annex/objects/ + - + - When a key has associated pointer files, the object is hard + - linked (or copied) to the files, and the object file is left thawed. + - + - What if the key there already has content? This could happen for + - various reasons; perhaps the same content is being annexed again. + - Perhaps there has been a hash collision generating the keys. + - + - The current strategy is to assume that in this case it's safe to delete + - one of the two copies of the content; and the one already in the annex + - is left there, assuming it's the original, canonical copy. + - + - I considered being more paranoid, and checking that both files had + - the same content. Decided against it because A) users explicitly choose + - a backend based on its hashing properties and so if they're dealing + - with colliding files it's their own fault and B) adding such a check + - would not catch all cases of colliding keys. For example, perhaps + - a remote has a key; if it's then added again with different content then + - the overall system now has two different pieces of content for that + - key, and one of them will probably get deleted later. So, adding the + - check here would only raise expectations that git-annex cannot truly + - meet. + - + - May return false, when a particular variety of key is not being + - accepted into the repository. Will display a warning message in this + - case. May also throw exceptions in some cases. + -} +moveAnnex :: Key -> AssociatedFile -> RawFilePath -> Annex Bool +moveAnnex key af src = ifM (checkSecureHashes' key) + ( do + withObjectLoc key storeobject + return True + , return False + ) + where + storeobject dest = ifM (liftIO $ R.doesPathExist dest) + ( alreadyhave + , adjustedBranchRefresh af $ modifyContentDir dest $ do + liftIO $ moveFile src dest + -- Freeze the object file now that it is in place. + -- Waiting until now to freeze it allows for freeze + -- hooks that prevent moving the file. + freezeContent dest + g <- Annex.gitRepo + fs <- map (`fromTopFilePath` g) + <$> Database.Keys.getAssociatedFiles key + unless (null fs) $ do + destic <- withTSDelta $ + liftIO . genInodeCache dest + ics <- mapM (populatePointerFile (Restage True) key dest) fs + Database.Keys.addInodeCaches key + (catMaybes (destic:ics)) + ) + alreadyhave = liftIO $ R.removeLink src + +checkSecureHashes :: Key -> Annex (Maybe String) +checkSecureHashes key = ifM (Backend.isCryptographicallySecureKey key) + ( return Nothing + , ifM (annexSecureHashesOnly <$> Annex.getGitConfig) + ( return $ Just $ "annex.securehashesonly blocked adding " ++ decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " key" + , return Nothing + ) + ) + +checkSecureHashes' :: Key -> Annex Bool +checkSecureHashes' key = checkSecureHashes key >>= \case + Nothing -> return True + Just msg -> do + warning $ UnquotedString $ msg ++ " to annex objects" + return False + +data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop + deriving (Eq) + +{- Populates the annex object file by hard linking or copying a source + - file to it. -} +linkToAnnex :: Key -> RawFilePath -> Maybe InodeCache -> Annex LinkAnnexResult +linkToAnnex key src srcic = ifM (checkSecureHashes' key) + ( do + dest <- calcRepo (gitAnnexLocation key) + modifyContentDir dest $ linkAnnex To key src srcic dest Nothing + , return LinkAnnexFailed + ) + +{- Makes a destination file be a link or copy from the annex object. + - + - linkAnnex stats the file after copying it to add to the inode + - cache. But dest may be a file in the working tree, which could + - get modified immediately after being populated. To avoid such a + - race, call linkAnnex on a temporary file and move it into place + - afterwards. Note that a consequence of this is that, if the file + - already exists, it will be overwritten. + -} +linkFromAnnex :: Key -> RawFilePath -> Maybe FileMode -> Annex LinkAnnexResult +linkFromAnnex key dest destmode = + replaceFile' (const noop) (fromRawFilePath dest) (== LinkAnnexOk) $ \tmp -> + linkFromAnnex' key tmp destmode + +{- This is only safe to use when dest is not a worktree file. -} +linkFromAnnex' :: Key -> RawFilePath -> Maybe FileMode -> Annex LinkAnnexResult +linkFromAnnex' key dest destmode = do + src <- calcRepo (gitAnnexLocation key) + srcic <- withTSDelta (liftIO . genInodeCache src) + linkAnnex From key src srcic dest destmode + +data FromTo = From | To + +{- Hard links or copies from or to the annex object location. + - Updates inode cache. + - + - Freezes or thaws the destination appropriately. + - + - When a hard link is made, the annex object necessarily has to be thawed + - too. So, adding an object to the annex with a hard link can prevent + - losing the content if the source file is deleted, but does not + - guard against modifications. + - + - Nothing is done if the destination file already exists. + -} +linkAnnex :: FromTo -> Key -> RawFilePath -> Maybe InodeCache -> RawFilePath -> Maybe FileMode -> Annex LinkAnnexResult +linkAnnex _ _ _ Nothing _ _ = return LinkAnnexFailed +linkAnnex fromto key src (Just srcic) dest destmode = + withTSDelta (liftIO . genInodeCache dest) >>= \case + Just destic -> do + cs <- Database.Keys.getInodeCaches key + if null cs + then Database.Keys.addInodeCaches key [srcic, destic] + else Database.Keys.addInodeCaches key [srcic] + return LinkAnnexNoop + Nothing -> linkOrCopy key src dest destmode >>= \case + Nothing -> failed + Just r -> do + case fromto of + From -> thawContent dest + To -> case r of + Copied -> freezeContent dest + Linked -> noop + checksrcunchanged + where + failed = do + Database.Keys.addInodeCaches key [srcic] + return LinkAnnexFailed + checksrcunchanged = withTSDelta (liftIO . genInodeCache src) >>= \case + Just srcic' | compareStrong srcic srcic' -> do + destic <- withTSDelta (liftIO . genInodeCache dest) + Database.Keys.addInodeCaches key $ + catMaybes [destic, Just srcic] + return LinkAnnexOk + _ -> do + liftIO $ removeWhenExistsWith R.removeLink dest + failed + +{- Removes the annex object file for a key. Lowlevel. -} +unlinkAnnex :: Key -> Annex () +unlinkAnnex key = do + obj <- calcRepo (gitAnnexLocation key) + modifyContentDir obj $ do + secureErase obj + liftIO $ removeWhenExistsWith R.removeLink obj + +{- Runs an action to transfer an object's content. The action is also + - passed the size of the object. + - + - In some cases, it's possible for the file to change as it's being sent. + - If this happens, runs the rollback action and throws an exception. + - The rollback action should remove the data that was transferred. + -} +sendAnnex :: Key -> Maybe FilePath -> Annex () -> (FilePath -> FileSize -> Annex a) -> Annex a +sendAnnex key o rollback sendobject = go =<< prepSendAnnex' key o + where + go (Just (f, sz, check)) = do + r <- sendobject f sz + check >>= \case + Nothing -> return r + Just err -> do + rollback + giveup err + go Nothing = giveup "content not available to send" + +{- Returns a file that contains an object's content, + - and a check to run after the transfer is complete. + - + - When a file is unlocked, it's possible for its content to + - change as it's being sent. The check detects this case + - and returns False. + - + - Note that the returned check action is, in some cases, run in the + - Annex monad of the remote that is receiving the object, rather than + - the sender. So it cannot rely on Annex state. + -} +prepSendAnnex :: Key -> Maybe FilePath -> Annex (Maybe (FilePath, FileSize, Annex Bool)) +prepSendAnnex key Nothing = withObjectLoc key $ \f -> do + let retval c cs = return $ Just + ( fromRawFilePath f + , inodeCacheFileSize c + , sameInodeCache f cs + ) + cache <- Database.Keys.getInodeCaches key + if null cache + -- Since no inode cache is in the database, this + -- object is not currently unlocked. But that could + -- change while the transfer is in progress, so + -- generate an inode cache for the starting + -- content. + then maybe (return Nothing) (\fc -> retval fc [fc]) + =<< withTSDelta (liftIO . genInodeCache f) + -- Verify that the object is not modified. Usually this + -- only has to check the inode cache, but if the cache + -- is somehow stale, it will fall back to verifying its + -- content. + else withTSDelta (liftIO . genInodeCache f) >>= \case + Just fc -> ifM (isUnmodified' key f fc cache) + ( retval fc (fc:cache) + , return Nothing + ) + Nothing -> return Nothing +-- If the provided object file is the annex object file, handle as above. +prepSendAnnex key (Just o) = withObjectLoc key $ \aof -> + let o' = toRawFilePath o + in if aof == o' + then prepSendAnnex key Nothing + else do + withTSDelta (liftIO . genInodeCache o') >>= \case + Nothing -> return Nothing + Just c -> return $ Just + ( o + , inodeCacheFileSize c + , sameInodeCache o' [c] + ) + +prepSendAnnex' :: Key -> Maybe FilePath -> Annex (Maybe (FilePath, FileSize, Annex (Maybe String))) +prepSendAnnex' key o = prepSendAnnex key o >>= \case + Just (f, sz, checksuccess) -> + let checksuccess' = ifM checksuccess + ( return Nothing + , return (Just "content changed while it was being sent") + ) + in return (Just (f, sz, checksuccess')) + Nothing -> return Nothing + +cleanObjectLoc :: Key -> Annex () -> Annex () +cleanObjectLoc key cleaner = do + file <- calcRepo (gitAnnexLocation key) + void $ tryIO $ thawContentDir file + {- Thawing is not necessary when the file was frozen only + - by removing write perms. But if there is a thaw hook, it may do + - something else that is necessary to allow the file to be + - deleted. + -} + whenM hasThawHook $ + void $ tryIO $ thawContent file + + cleaner + cleanObjectDirs file + +{- Given a filename inside the object directory, tries to remove the object + - directory, as well as the object hash directories. + - + - Does nothing if the object directory is not empty, and does not + - throw an exception if it's unable to remove a directory. -} +cleanObjectDirs :: RawFilePath -> Annex () +cleanObjectDirs f = do + HashLevels n <- objectHashLevels <$> Annex.getGitConfig + liftIO $ go f (succ n) + where + go _ 0 = noop + go file n = do + let dir = parentDir file + maybe noop (const $ go dir (n-1)) + <=< catchMaybeIO $ tryWhenExists $ + removeDirectory (fromRawFilePath dir) + +{- Removes a key's file from .git/annex/objects/ -} +removeAnnex :: ContentRemovalLock -> Annex () +removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file -> + cleanObjectLoc key $ do + secureErase file + liftIO $ removeWhenExistsWith R.removeLink file + g <- Annex.gitRepo + mapM_ (\f -> void $ tryIO $ resetpointer $ fromTopFilePath f g) + =<< Database.Keys.getAssociatedFiles key + Database.Keys.removeInodeCaches key + where + -- Check associated pointer file for modifications, and reset if + -- it's unmodified. + resetpointer file = unlessM (liftIO $ isSymbolicLink <$> R.getSymbolicLinkStatus file) $ + ifM (isUnmodified key file) + ( adjustedBranchRefresh (AssociatedFile (Just file)) $ + depopulatePointerFile key file + -- Modified file, so leave it alone. + -- If it was a hard link to the annex object, + -- that object might have been frozen as part of the + -- removal process, so thaw it. + , void $ tryIO $ thawContent file + ) + +{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and + - returns the file it was moved to. -} +moveBad :: Key -> Annex RawFilePath +moveBad key = do + src <- calcRepo (gitAnnexLocation key) + bad <- fromRepo gitAnnexBadDir + let dest = bad P. P.takeFileName src + createAnnexDirectory (parentDir dest) + cleanObjectLoc key $ + liftIO $ moveFile src dest + logStatus key InfoMissing + return dest + +data KeyLocation = InAnnex | InAnywhere + +{- InAnnex only lists keys with content in .git/annex/objects. + - InAnywhere lists all keys that have directories in + - .git/annex/objects, whether or not the content is present. + -} +listKeys :: KeyLocation -> Annex [Key] +listKeys keyloc = listKeys' keyloc (const (pure True)) + +{- Due to use of unsafeInterleaveIO, the passed filter action + - will be run in a copy of the Annex state, so any changes it + - makes to the state will not be preserved. -} +listKeys' :: KeyLocation -> (Key -> Annex Bool) -> Annex [Key] +listKeys' keyloc want = do + dir <- fromRepo gitAnnexObjectDir + s <- Annex.getState id + r <- Annex.getRead id + depth <- gitAnnexLocationDepth <$> Annex.getGitConfig + liftIO $ walk (s, r) depth (fromRawFilePath dir) + where + walk s depth dir = do + contents <- catchDefaultIO [] (dirContents dir) + if depth < 2 + then do + contents' <- filterM present contents + keys <- filterM (Annex.eval s . want) $ + mapMaybe (fileKey . P.takeFileName . toRawFilePath) contents' + continue keys [] + else do + let deeper = walk s (depth - 1) + continue [] (map deeper contents) + continue keys [] = return keys + continue keys (a:as) = do + {- Force lazy traversal with unsafeInterleaveIO. -} + morekeys <- unsafeInterleaveIO a + continue (morekeys++keys) as + + inanywhere = case keyloc of + InAnywhere -> True + _ -> False + + present _ | inanywhere = pure True + present d = presentInAnnex d + + presentInAnnex = doesFileExist . contentfile + contentfile d = d takeFileName d + +{- Things to do to record changes to content when shutting down. + - + - It's acceptable to avoid committing changes to the branch, + - especially if performing a short-lived action. + -} +saveState :: Bool -> Annex () +saveState nocommit = doSideAction $ do + Annex.Queue.flush + Database.Keys.flushDb + unless nocommit $ + whenM (annexAlwaysCommit <$> Annex.getGitConfig) $ + Annex.Branch.commit =<< Annex.Branch.commitMessage + +{- Downloads content from any of a list of urls, displaying a progress + - meter. + - + - Only displays error message if all the urls fail to download. + - When listfailedurls is set, lists each url and why it failed. + - Otherwise, only displays one error message, from one of the urls + - that failed. + -} +downloadUrl :: Bool -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> FilePath -> Url.UrlOptions -> Annex Bool +downloadUrl listfailedurls k p iv urls file uo = + -- Poll the file to handle configurations where an external + -- download command is used. + meteredFile (toRawFilePath file) (Just p) k (go urls []) + where + go (u:us) errs p' = Url.download' p' iv u file uo >>= \case + Right () -> return True + Left err -> do + -- If the incremental verifier was fed anything + -- while the download that failed ran, it's unable + -- to be used for the other urls. + case iv of + Just iv' -> + liftIO $ positionIncrementalVerifier iv' >>= \case + Just n | n > 0 -> unableIncrementalVerifier iv' + _ -> noop + Nothing -> noop + go us ((u, err) : errs) p' + go [] [] _ = return False + go [] errs@((_, err):_) _ = do + if listfailedurls + then warning $ UnquotedString $ + unlines $ flip map errs $ \(u, err') -> + u ++ " " ++ err' + else warning $ UnquotedString err + return False + +{- Copies a key's content, when present, to a temp file. + - This is used to speed up some rsyncs. -} +preseedTmp :: Key -> FilePath -> Annex Bool +preseedTmp key file = go =<< inAnnex key + where + go False = return False + go True = do + ok <- copy + when ok $ thawContent (toRawFilePath file) + return ok + copy = ifM (liftIO $ doesFileExist file) + ( return True + , do + s <- fromRawFilePath <$> (calcRepo $ gitAnnexLocation key) + liftIO $ ifM (doesFileExist s) + ( copyFileExternal CopyTimeStamps s file + , return False + ) + ) + +{- Finds files directly inside a directory like gitAnnexBadDir + - (not in subdirectories) and returns the corresponding keys. -} +dirKeys :: (Git.Repo -> RawFilePath) -> Annex [Key] +dirKeys dirspec = do + dir <- fromRawFilePath <$> fromRepo dirspec + ifM (liftIO $ doesDirectoryExist dir) + ( do + contents <- liftIO $ getDirectoryContents dir + files <- liftIO $ filterM doesFileExist $ + map (dir ) contents + return $ mapMaybe (fileKey . P.takeFileName . toRawFilePath) files + , return [] + ) + +{- Looks in the specified directory for bad/tmp keys, and returns a list + - of those that might still have value, or might be stale and removable. + - + - Also, stale keys that can be proven to have no value + - (ie, their content is already present) are deleted. + -} +staleKeysPrune :: (Git.Repo -> RawFilePath) -> Bool -> Annex [Key] +staleKeysPrune dirspec nottransferred = do + contents <- dirKeys dirspec + + dups <- filterM inAnnex contents + let stale = contents `exclude` dups + + dir <- fromRepo dirspec + forM_ dups $ \k -> + pruneTmpWorkDirBefore (dir P. keyFile k) + (liftIO . R.removeLink) + + if nottransferred + then do + inprogress <- S.fromList . map (transferKey . fst) + <$> getTransfers + return $ filter (`S.notMember` inprogress) stale + else return stale + +{- Prune the work dir associated with the specified content file, + - before performing an action that deletes the file, or moves it away. + - + - This preserves the invariant that the workdir never exists without + - the content file. + -} +pruneTmpWorkDirBefore :: RawFilePath -> (RawFilePath -> Annex a) -> Annex a +pruneTmpWorkDirBefore f action = do + let workdir = fromRawFilePath $ gitAnnexTmpWorkDir f + liftIO $ whenM (doesDirectoryExist workdir) $ + removeDirectoryRecursive workdir + action f + +{- Runs an action, passing it a temporary work directory where + - it can write files while receiving the content of a key. + - + - Preserves the invariant that the workdir never exists without the + - content file, by creating an empty content file first. + - + - On exception, or when the action returns Nothing, + - the temporary work directory is retained (unless + - empty), so anything in it can be used on resume. + -} +withTmpWorkDir :: Key -> (RawFilePath -> Annex (Maybe a)) -> Annex (Maybe a) +withTmpWorkDir key action = do + -- Create the object file if it does not exist. This way, + -- staleKeysPrune only has to look for object files, and can + -- clean up gitAnnexTmpWorkDir for those it finds. + obj <- prepTmp key + let obj' = fromRawFilePath obj + unlessM (liftIO $ doesFileExist obj') $ do + liftIO $ writeFile obj' "" + setAnnexFilePerm obj + let tmpdir = gitAnnexTmpWorkDir obj + createAnnexDirectory tmpdir + res <- action tmpdir + case res of + Just _ -> liftIO $ removeDirectoryRecursive (fromRawFilePath tmpdir) + Nothing -> liftIO $ void $ tryIO $ removeDirectory (fromRawFilePath tmpdir) + return res + +{- Finds items in the first, smaller list, that are not + - present in the second, larger list. + - + - Constructing a single set, of the list that tends to be + - smaller, appears more efficient in both memory and CPU + - than constructing and taking the S.difference of two sets. -} +exclude :: Ord a => [a] -> [a] -> [a] +exclude [] _ = [] -- optimisation +exclude smaller larger = S.toList $ remove larger $ S.fromList smaller + where + remove a b = foldl (flip S.delete) b a + +data KeyStatus + = KeyMissing + | KeyPresent + | KeyUnlockedThin + -- ^ An annex.thin worktree file is hard linked to the object. + | KeyLockedThin + -- ^ The object has hard links, but the file being fscked + -- is not the one that hard links to it. + deriving (Show) + +isKeyUnlockedThin :: KeyStatus -> Bool +isKeyUnlockedThin KeyUnlockedThin = True +isKeyUnlockedThin KeyLockedThin = False +isKeyUnlockedThin KeyPresent = False +isKeyUnlockedThin KeyMissing = False + +getKeyStatus :: Key -> Annex KeyStatus +getKeyStatus key = catchDefaultIO KeyMissing $ do + afs <- not . null <$> Database.Keys.getAssociatedFiles key + obj <- calcRepo (gitAnnexLocation key) + multilink <- ((> 1) . linkCount <$> liftIO (R.getFileStatus obj)) + return $ if multilink && afs + then KeyUnlockedThin + else KeyPresent + +getKeyFileStatus :: Key -> RawFilePath -> Annex KeyStatus +getKeyFileStatus key file = do + s <- getKeyStatus key + case s of + KeyUnlockedThin -> catchDefaultIO KeyUnlockedThin $ + ifM (isJust <$> isAnnexLink file) + ( return KeyLockedThin + , return KeyUnlockedThin + ) + _ -> return s + +{- Gets the size of the content of a key when it is present. + - Useful when the key does not have keySize set. + - + - When the object file appears possibly modified with annex.thin set, does + - not do an expensive verification that the content is good, just returns + - Nothing. + -} +contentSize :: Key -> Annex (Maybe FileSize) +contentSize key = catchDefaultIO Nothing $ + withObjectLoc key $ \loc -> + withTSDelta (liftIO . genInodeCache loc) >>= \case + Just ic -> ifM (unmodified ic) + ( return (Just (inodeCacheFileSize ic)) + , return Nothing + ) + Nothing -> return Nothing + where + unmodified ic = + ifM (annexThin <$> Annex.getGitConfig) + ( isUnmodifiedCheap' key ic + , return True + ) + +{- Avoids writing a timestamp when the file already contains a later + - timestamp. The file is written atomically, so when it contained an + - earlier timestamp, a reader will always see one or the other timestamp. + -} +writeContentRetentionTimestamp :: Key -> RawFilePath -> POSIXTime -> Annex () +writeContentRetentionTimestamp key rt t = do + lckfile <- calcRepo (gitAnnexContentRetentionTimestampLock key) + modifyContentDirWhenExists lckfile $ bracket (lock lckfile) unlock $ \_ -> + readContentRetentionTimestamp rt >>= \case + Just ts | ts >= t -> return () + _ -> replaceFile (const noop) (fromRawFilePath rt) $ \tmp -> + liftIO $ writeFile (fromRawFilePath tmp) $ show t + where + lock = takeExclusiveLock + unlock = liftIO . dropLock + +{- Does not need locking because the file is written atomically. -} +readContentRetentionTimestamp :: RawFilePath -> Annex (Maybe POSIXTime) +readContentRetentionTimestamp rt = + liftIO $ join <$> tryWhenExists + (parsePOSIXTime <$> readFile (fromRawFilePath rt)) + +{- Checks if the retention timestamp is in the future, if so returns + - Nothing. + - + - If the retention timestamp is in the past, the retention timestamp file + - is deleted. This cleans up stale retention timestamps. + - + - The locker should take a lock that prevents any other processes from + - writing to the retention timestamp. So the retention timestamp lock + - is not used here and can also be deleted when deleting the retention + - timestamp file. + -} +checkRetentionTimestamp :: Key -> Annex (Maybe LockHandle) -> Annex (Maybe LockHandle) +checkRetentionTimestamp key locker = do + rt <- calcRepo (gitAnnexContentRetentionTimestamp key) + readContentRetentionTimestamp rt >>= \case + Nothing -> locker + Just ts -> do + now <- liftIO getPOSIXTime + if now > ts + then locker >>= \case + Nothing -> return Nothing + Just lock -> do + removeRetentionTimeStamp key rt + return (Just lock) + else return Nothing + +{- Remove the retention timestamp and its lock file. Another lock must + - be held, that prevents anything else writing to the file at the same + - time. -} +removeRetentionTimeStamp :: Key -> RawFilePath -> Annex () +removeRetentionTimeStamp key rt = modifyContentDirWhenExists rt $ do + liftIO $ removeWhenExistsWith R.removeLink rt + rtl <- calcRepo (gitAnnexContentRetentionTimestampLock key) + liftIO $ removeWhenExistsWith R.removeLink rtl diff --git a/Annex/Content/LowLevel.hs b/Annex/Content/LowLevel.hs new file mode 100644 index 0000000000..9d732f6a6e --- /dev/null +++ b/Annex/Content/LowLevel.hs @@ -0,0 +1,141 @@ +{- git-annex low-level content functions + - + - Copyright 2010-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Content.LowLevel where + +import Annex.Common +import Logs.Transfer +import qualified Annex +import Utility.DiskFree +import Utility.FileMode +import Utility.DataUnits +import Utility.CopyFile +import qualified Utility.RawFilePath as R + +import qualified System.FilePath.ByteString as P +import System.PosixCompat.Files (linkCount) + +{- Runs the secure erase command if set, otherwise does nothing. + - File may or may not be deleted at the end; caller is responsible for + - making sure it's deleted. -} +secureErase :: RawFilePath -> Annex () +secureErase file = maybe noop go =<< annexSecureEraseCommand <$> Annex.getGitConfig + where + go basecmd = void $ liftIO $ + boolSystem "sh" [Param "-c", Param $ gencmd basecmd] + gencmd = massReplace [ ("%file", shellEscape (fromRawFilePath file)) ] + +data LinkedOrCopied = Linked | Copied + +{- Hard links or copies src to dest, which must not already exist. + - + - Only uses a hard link when annex.thin is enabled and when src is + - not already hardlinked to elsewhere. + - + - Checks disk reserve before copying against the size of the key, + - and will fail if not enough space, or if the dest file already exists. + - + - The FileMode, if provided, influences the mode of the dest file. + - In particular, if it has an execute bit set, the dest file's + - execute bit will be set. The mode is not fully copied over because + - git doesn't support file modes beyond execute. + -} +linkOrCopy :: Key -> RawFilePath -> RawFilePath -> Maybe FileMode -> Annex (Maybe LinkedOrCopied) +linkOrCopy = linkOrCopy' (annexThin <$> Annex.getGitConfig) + +linkOrCopy' :: Annex Bool -> Key -> RawFilePath -> RawFilePath -> Maybe FileMode -> Annex (Maybe LinkedOrCopied) +linkOrCopy' canhardlink key src dest destmode = catchDefaultIO Nothing $ + ifM canhardlink + ( hardlink + , copy =<< getstat + ) + where + hardlink = do + s <- getstat + if linkCount s > 1 + then copy s + else liftIO (R.createLink src dest >> preserveGitMode dest destmode >> return (Just Linked)) + `catchIO` const (copy s) + copy s = ifM (checkedCopyFile' key src dest destmode s) + ( return (Just Copied) + , return Nothing + ) + getstat = liftIO $ R.getFileStatus src + +{- Checks disk space before copying. -} +checkedCopyFile :: Key -> RawFilePath -> RawFilePath -> Maybe FileMode -> Annex Bool +checkedCopyFile key src dest destmode = catchBoolIO $ + checkedCopyFile' key src dest destmode + =<< liftIO (R.getFileStatus src) + +checkedCopyFile' :: Key -> RawFilePath -> RawFilePath -> Maybe FileMode -> FileStatus -> Annex Bool +checkedCopyFile' key src dest destmode s = catchBoolIO $ do + sz <- liftIO $ getFileSize' src s + ifM (checkDiskSpace' sz (Just $ P.takeDirectory dest) key 0 True) + ( liftIO $ + copyFileExternal CopyAllMetaData (fromRawFilePath src) (fromRawFilePath dest) + <&&> preserveGitMode dest destmode + , return False + ) + +preserveGitMode :: RawFilePath -> Maybe FileMode -> IO Bool +preserveGitMode f (Just mode) + | isExecutable mode = catchBoolIO $ do + modifyFileMode f $ addModes executeModes + return True + | otherwise = catchBoolIO $ do + modifyFileMode f $ removeModes executeModes + return True +preserveGitMode _ _ = return True + +{- Checks that there is disk space available to store a given key, + - in a destination directory (or the annex) printing a warning if not. + - + - If the destination is on the same filesystem as the annex, + - checks for any other running downloads, removing the amount of data still + - to be downloaded from the free space. This way, we avoid overcommitting + - when doing concurrent downloads. + -} +checkDiskSpace :: Maybe FileSize -> Maybe RawFilePath -> Key -> Integer -> Bool -> Annex Bool +checkDiskSpace msz destdir key = checkDiskSpace' sz destdir key + where + sz = fromMaybe 1 (fromKey keySize key <|> msz) + +checkDiskSpace' :: FileSize -> Maybe RawFilePath -> Key -> Integer -> Bool -> Annex Bool +checkDiskSpace' sz destdir key alreadythere samefilesystem = ifM (Annex.getRead Annex.force) + ( return True + , do + -- We can't get inprogress and free at the same + -- time, and both can be changing, so there's a + -- small race here. Err on the side of caution + -- by getting inprogress first, so if it takes + -- a while, we'll see any decrease in the free + -- disk space. + inprogress <- if samefilesystem + then sizeOfDownloadsInProgress (/= key) + else pure 0 + dir >>= liftIO . getDiskFree . fromRawFilePath >>= \case + Just have -> do + reserve <- annexDiskReserve <$> Annex.getGitConfig + let delta = sz + reserve - have - alreadythere + inprogress + let ok = delta <= 0 + unless ok $ + warning $ UnquotedString $ + needMoreDiskSpace delta + return ok + _ -> return True + ) + where + dir = maybe (fromRepo gitAnnexDir) return destdir + +needMoreDiskSpace :: Integer -> String +needMoreDiskSpace n = "not enough free space, need " ++ + roughSize storageUnits True n ++ " more" ++ forcemsg + where + forcemsg = " (use --force to override this check or adjust annex.diskreserve)" diff --git a/Annex/Content/PointerFile.hs b/Annex/Content/PointerFile.hs new file mode 100644 index 0000000000..c2acc9ab93 --- /dev/null +++ b/Annex/Content/PointerFile.hs @@ -0,0 +1,71 @@ +{- git-annex pointer files + - + - Copyright 2010-2018 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Content.PointerFile where + +import Annex.Common +import Annex.Perms +import Annex.Link +import Annex.ReplaceFile +import Annex.InodeSentinal +import Annex.Content.LowLevel +import Utility.InodeCache +import qualified Utility.RawFilePath as R +#if ! defined(mingw32_HOST_OS) +import Utility.Touch +import qualified System.Posix.Files as Posix +#endif + +import System.PosixCompat.Files (fileMode) + +{- Populates a pointer file with the content of a key. + - + - If the file already has some other content, it is not modified. + - + - Returns an InodeCache if it populated the pointer file. + -} +populatePointerFile :: Restage -> Key -> RawFilePath -> RawFilePath -> Annex (Maybe InodeCache) +populatePointerFile restage k obj f = go =<< liftIO (isPointerFile f) + where + go (Just k') | k == k' = do + let f' = fromRawFilePath f + destmode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus f + liftIO $ removeWhenExistsWith R.removeLink f + (ic, populated) <- replaceWorkTreeFile f' $ \tmp -> do + ok <- linkOrCopy k obj tmp destmode >>= \case + Just _ -> thawContent tmp >> return True + Nothing -> liftIO (writePointerFile tmp k destmode) >> return False + ic <- withTSDelta (liftIO . genInodeCache tmp) + return (ic, ok) + maybe noop (restagePointerFile restage f) ic + if populated + then return ic + else return Nothing + go _ = return Nothing + +{- Removes the content from a pointer file, replacing it with a pointer. + - + - Does not check if the pointer file is modified. -} +depopulatePointerFile :: Key -> RawFilePath -> Annex () +depopulatePointerFile key file = do + st <- liftIO $ catchMaybeIO $ R.getFileStatus file + let mode = fmap fileMode st + secureErase file + liftIO $ removeWhenExistsWith R.removeLink file + ic <- replaceWorkTreeFile (fromRawFilePath file) $ \tmp -> do + liftIO $ writePointerFile tmp key mode +#if ! defined(mingw32_HOST_OS) + -- Don't advance mtime; this avoids unnecessary re-smudging + -- by git in some cases. + liftIO $ maybe noop + (\t -> touch tmp t False) + (fmap Posix.modificationTimeHiRes st) +#endif + withTSDelta (liftIO . genInodeCache tmp) + maybe noop (restagePointerFile (Restage True) file) ic diff --git a/Annex/Content/Presence.hs b/Annex/Content/Presence.hs new file mode 100644 index 0000000000..2eb0016ddd --- /dev/null +++ b/Annex/Content/Presence.hs @@ -0,0 +1,215 @@ +{- git-annex object content presence + - + - Copyright 2010-2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Content.Presence ( + inAnnex, + inAnnex', + inAnnexSafe, + inAnnexCheck, + objectFileExists, + withObjectLoc, + isUnmodified, + isUnmodified', + isUnmodifiedCheap, + isUnmodifiedCheap', + withContentLockFile, + contentLockFile, +) where + +import Annex.Content.Presence.LowLevel +import Annex.Common +import qualified Annex +import Annex.LockPool +import Annex.LockFile +import Annex.Version +import Types.RepoVersion +import qualified Database.Keys +import Annex.InodeSentinal +import Utility.InodeCache +import qualified Utility.RawFilePath as R +import qualified Git +import Config + +#ifdef mingw32_HOST_OS +import Annex.Perms +#endif + +import qualified System.FilePath.ByteString as P + +{- Checks if a given key's content is currently present. -} +inAnnex :: Key -> Annex Bool +inAnnex key = inAnnexCheck key $ liftIO . R.doesPathExist + +{- Runs an arbitrary check on a key's content. -} +inAnnexCheck :: Key -> (RawFilePath -> Annex Bool) -> Annex Bool +inAnnexCheck key check = inAnnex' id False check key + +{- inAnnex that performs an arbitrary check of the key's content. -} +inAnnex' :: (a -> Bool) -> a -> (RawFilePath -> Annex a) -> Key -> Annex a +inAnnex' isgood bad check key = withObjectLoc key $ \loc -> do + r <- check loc + if isgood r + then ifM (annexThin <$> Annex.getGitConfig) + -- When annex.thin is set, the object file + -- could be modified; make sure it's not. + -- (Suppress any messages about + -- checksumming, to avoid them cluttering + -- the display.) + ( ifM (doQuietAction $ isUnmodified key loc) + ( return r + , return bad + ) + , return r + ) + else return bad + +{- Like inAnnex, checks if the object file for a key exists, + - but there are no guarantees it has the right content. -} +objectFileExists :: Key -> Annex Bool +objectFileExists key = + calcRepo (gitAnnexLocation key) + >>= liftIO . R.doesPathExist + +{- A safer check; the key's content must not only be present, but + - is not in the process of being removed. -} +inAnnexSafe :: Key -> Annex (Maybe Bool) +inAnnexSafe key = inAnnex' (fromMaybe True) (Just False) go key + where + is_locked = Nothing + is_unlocked = Just True + is_missing = Just False + + go contentfile = withContentLockFile key $ flip checklock contentfile + +#ifndef mingw32_HOST_OS + checklock Nothing contentfile = checkOr is_missing contentfile + {- The content file must exist, but the lock file generally + - won't exist unless a removal is in process. -} + checklock (Just lockfile) contentfile = + ifM (liftIO $ doesFileExist (fromRawFilePath contentfile)) + ( checkOr is_unlocked lockfile + , return is_missing + ) + checkOr d lockfile = checkLocked lockfile >>= return . \case + Nothing -> d + Just True -> is_locked + Just False -> is_unlocked +#else + checklock Nothing contentfile = liftIO $ ifM (doesFileExist (fromRawFilePath contentfile)) + ( lockShared contentfile >>= \case + Nothing -> return is_locked + Just lockhandle -> do + dropLock lockhandle + return is_unlocked + , return is_missing + ) + {- In Windows, see if we can take a shared lock. If so, + - remove the lock file to clean up after ourselves. -} + checklock (Just lockfile) contentfile = + ifM (liftIO $ doesFileExist (fromRawFilePath contentfile)) + ( modifyContentDir lockfile $ liftIO $ + lockShared lockfile >>= \case + Nothing -> return is_locked + Just lockhandle -> do + dropLock lockhandle + void $ tryIO $ removeWhenExistsWith R.removeLink lockfile + return is_unlocked + , return is_missing + ) +#endif + +{- Runs an action with the lock file to use to lock a key's content. + - When the content file itself should be locked, runs the action with + - Nothing. + - + - In v9 and below, while the action is running, a shared lock is held of the + - gitAnnexContentLockLock. That prevents the v10 upgrade, which changes how + - content locking works, from running at the same time as content is locked + - using the old method. + -} +withContentLockFile :: Key -> (Maybe RawFilePath -> Annex a) -> Annex a +withContentLockFile k a = do + v <- getVersion + if versionNeedsWritableContentFiles v + then fromRepo gitAnnexContentLockLock >>= \lck -> withSharedLock lck $ do + {- While the lock is held, check to see if the git + - config has changed, and reload it if so. This + - updates the annex.version after the v10 upgrade, + - so that a process that started in a v9 repository + - will switch over to v10 content lock files at the + - right time. -} + gitdir <- fromRepo Git.localGitDir + let gitconfig = gitdir P. "config" + ic <- withTSDelta (liftIO . genInodeCache gitconfig) + oldic <- Annex.getState Annex.gitconfiginodecache + v' <- if fromMaybe False (compareStrong <$> ic <*> oldic) + then pure v + else do + Annex.changeState $ \s -> + s { Annex.gitconfiginodecache = ic } + reloadConfig + getVersion + go (v') + else go v + where + go v = contentLockFile k v >>= a + +contentLockFile :: Key -> Maybe RepoVersion -> Annex (Maybe RawFilePath) +#ifndef mingw32_HOST_OS +{- Older versions of git-annex locked content files themselves, but newer + - versions use a separate lock file, to better support repos shared + - among users in eg a group. -} +contentLockFile key v + | versionNeedsWritableContentFiles v = pure Nothing + | otherwise = Just <$> calcRepo (gitAnnexContentLock key) +#else +{- Windows always has to use a separate lock file from the content, since + - locking the actual content file would interfere with the user's + - use of it. -} +contentLockFile key _ = Just <$> calcRepo (gitAnnexContentLock key) +#endif + +{- Performs an action, passing it the location to use for a key's content. -} +withObjectLoc :: Key -> (RawFilePath -> Annex a) -> Annex a +withObjectLoc key a = a =<< calcRepo (gitAnnexLocation key) + +{- Check if a file contains the unmodified content of the key. + - + - The expensive way to tell is to do a verification of its content. + - The cheaper way is to see if the InodeCache for the key matches the + - file. -} +isUnmodified :: Key -> RawFilePath -> Annex Bool +isUnmodified key f = + withTSDelta (liftIO . genInodeCache f) >>= \case + Just fc -> do + ic <- Database.Keys.getInodeCaches key + isUnmodified' key f fc ic + Nothing -> return False + +isUnmodified' :: Key -> RawFilePath -> InodeCache -> [InodeCache] -> Annex Bool +isUnmodified' = isUnmodifiedLowLevel Database.Keys.addInodeCaches + +{- Cheap check if a file contains the unmodified content of the key, + - only checking the InodeCache of the key. + - + - When the InodeCache is stale, this may incorrectly report that a file is + - modified. + - + - Note that, on systems not supporting high-resolution mtimes, + - this may report a false positive when repeated edits are made to a file + - within a small time window (eg 1 second). + -} +isUnmodifiedCheap :: Key -> RawFilePath -> Annex Bool +isUnmodifiedCheap key f = maybe (pure False) (isUnmodifiedCheap' key) + =<< withTSDelta (liftIO . genInodeCache f) + +isUnmodifiedCheap' :: Key -> InodeCache -> Annex Bool +isUnmodifiedCheap' key fc = isUnmodifiedCheapLowLevel fc + =<< Database.Keys.getInodeCaches key diff --git a/Annex/Content/Presence/LowLevel.hs b/Annex/Content/Presence/LowLevel.hs new file mode 100644 index 0000000000..6f50c187b2 --- /dev/null +++ b/Annex/Content/Presence/LowLevel.hs @@ -0,0 +1,36 @@ +{- git-annex object content presence, low-level functions + - + - Copyright 2010-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Content.Presence.LowLevel where + +import Annex.Common +import Annex.Verify +import Annex.InodeSentinal +import Utility.InodeCache + +isUnmodifiedLowLevel :: (Key -> [InodeCache] -> Annex ()) -> Key -> RawFilePath -> InodeCache -> [InodeCache] -> Annex Bool +isUnmodifiedLowLevel addinodecaches key f fc ic = + isUnmodifiedCheapLowLevel fc ic <||> expensivecheck + where + expensivecheck = ifM (verifyKeyContent key f) + ( do + -- The file could have been modified while it was + -- being verified. Detect that. + ifM (geti >>= maybe (return False) (compareInodeCaches fc)) + ( do + -- Update the InodeCache to avoid + -- performing this expensive check again. + addinodecaches key [fc] + return True + , return False + ) + , return False + ) + geti = withTSDelta (liftIO . genInodeCache f) + +isUnmodifiedCheapLowLevel :: InodeCache -> [InodeCache] -> Annex Bool +isUnmodifiedCheapLowLevel fc ic = anyM (compareInodeCaches fc) ic diff --git a/Annex/CopyFile.hs b/Annex/CopyFile.hs new file mode 100644 index 0000000000..55c7d908e2 --- /dev/null +++ b/Annex/CopyFile.hs @@ -0,0 +1,179 @@ +{- Copying files. + - + - Copyright 2011-2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.CopyFile where + +import Annex.Common +import Utility.Metered +import Utility.CopyFile +import Utility.FileMode +import Utility.Touch +import Utility.Hash (IncrementalVerifier(..)) +import qualified Utility.RawFilePath as R + +import Control.Concurrent +import qualified Data.ByteString as S +import Data.Time.Clock.POSIX +import System.PosixCompat.Files (fileMode) + +-- To avoid the overhead of trying copy-on-write every time, it's tried +-- once and if it fails, is not tried again. +newtype CopyCoWTried = CopyCoWTried (MVar Bool) + +newCopyCoWTried :: IO CopyCoWTried +newCopyCoWTried = CopyCoWTried <$> newEmptyMVar + +{- Copies a file is copy-on-write is supported. Otherwise, returns False. + - + - The destination file must not exist yet (or may exist but be empty), + - or it will fail to make a CoW copy, and will return false. + -} +tryCopyCoW :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> IO Bool +tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate = + -- If multiple threads reach this at the same time, they + -- will both try CoW, which is acceptable. + ifM (isEmptyMVar copycowtried) + ( ifM destfilealreadypopulated + ( return False + , do + ok <- docopycow + void $ tryPutMVar copycowtried ok + return ok + ) + , ifM (readMVar copycowtried) + ( do + -- CoW is known to work, so delete + -- dest if it exists in order to do a fast + -- CoW copy. + void $ tryIO $ removeFile dest + docopycow + , return False + ) + ) + where + docopycow = watchFileSize dest' meterupdate $ const $ + copyCoW CopyTimeStamps src dest + + dest' = toRawFilePath dest + + -- Check if the dest file already exists, which would prevent + -- probing CoW. If the file exists but is empty, there's no benefit + -- to resuming from it when CoW does not work, so remove it. + destfilealreadypopulated = + tryIO (R.getFileStatus dest') >>= \case + Left _ -> return False + Right st -> do + sz <- getFileSize' dest' st + if sz == 0 + then tryIO (removeFile dest) >>= \case + Right () -> return False + Left _ -> return True + else return True + +data CopyMethod = CopiedCoW | Copied + +{- Copies from src to dest, updating a meter. Preserves mode and mtime. + - Uses copy-on-write if it is supported. If the the destination already + - exists, an interrupted copy will resume where it left off. + - + - The IncrementalVerifier is updated with the content of the file as it's + - being copied. But it is not finalized at the end. + - + - When copy-on-write is used, the IncrementalVerifier is not fed + - the content of the file, and verification using it will fail. + - + - Note that, when the destination file already exists, it's read both + - to start calculating the hash, and also to verify that its content is + - the same as the start of the source file. It's possible that the + - destination file was created from some other source file, + - (eg when isStableKey is false), and doing this avoids getting a + - corrupted file in such cases. + -} +fileCopier :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> IO CopyMethod +#ifdef mingw32_HOST_OS +fileCopier _ src dest meterupdate iv = docopy +#else +fileCopier copycowtried src dest meterupdate iv = + ifM (tryCopyCoW copycowtried src dest meterupdate) + ( do + maybe noop unableIncrementalVerifier iv + return CopiedCoW + , docopy + ) +#endif + where + docopy = do + -- The file might have had the write bit removed, + -- so make sure we can write to it. + void $ tryIO $ allowWrite dest' + + withBinaryFile src ReadMode $ \hsrc -> + fileContentCopier hsrc dest meterupdate iv + + -- Copy src mode and mtime. + mode <- fileMode <$> R.getFileStatus (toRawFilePath src) + mtime <- utcTimeToPOSIXSeconds <$> getModificationTime src + R.setFileMode dest' mode + touch dest' mtime False + + return Copied + + dest' = toRawFilePath dest + +{- Copies content from a handle to a destination file. Does not + - use copy-on-write, and does not copy file mode and mtime. + -} +fileContentCopier :: Handle -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> IO () +fileContentCopier hsrc dest meterupdate iv = + withBinaryFile dest ReadWriteMode $ \hdest -> do + sofar <- compareexisting hdest zeroBytesProcessed + docopy hdest sofar + where + docopy hdest sofar = do + s <- S.hGet hsrc defaultChunkSize + if s == S.empty + then return () + else do + let sofar' = addBytesProcessed sofar (S.length s) + S.hPut hdest s + maybe noop (flip updateIncrementalVerifier s) iv + meterupdate sofar' + docopy hdest sofar' + + -- Leaves hdest and hsrc seeked to wherever the two diverge, + -- so typically hdest will be seeked to end, and hsrc to the same + -- position. + compareexisting hdest sofar = do + s <- S.hGet hdest defaultChunkSize + if s == S.empty + then return sofar + else do + s' <- getnoshort (S.length s) hsrc + if s == s' + then do + maybe noop (flip updateIncrementalVerifier s) iv + let sofar' = addBytesProcessed sofar (S.length s) + meterupdate sofar' + compareexisting hdest sofar' + else do + seekbefore hdest s + seekbefore hsrc s' + return sofar + + seekbefore h s = hSeek h RelativeSeek (fromIntegral (-1*S.length s)) + + -- Like hGet, but never returns less than the requested number of + -- bytes, unless it reaches EOF. + getnoshort n h = do + s <- S.hGet h n + if S.length s == n || S.empty == s + then return s + else do + s' <- getnoshort (n - S.length s) h + return (s <> s') diff --git a/Annex/CurrentBranch.hs b/Annex/CurrentBranch.hs new file mode 100644 index 0000000000..f6ae28442f --- /dev/null +++ b/Annex/CurrentBranch.hs @@ -0,0 +1,41 @@ +{- currently checked out branch + - + - Copyright 2018 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.CurrentBranch where + +import Annex.Common +import Types.AdjustedBranch +import Annex.AdjustedBranch.Name +import qualified Annex +import qualified Git +import qualified Git.Branch + +type CurrBranch = (Maybe Git.Branch, Maybe Adjustment) + +{- Gets the currently checked out branch. + - When on an adjusted branch, gets the original branch, and the adjustment. + - + - Cached for speed. + - + - Until a commit is made in a new repository, no branch is checked out. + - Since git-annex may make the first commit, this does not cache + - the absence of a branch. + -} +getCurrentBranch :: Annex CurrBranch +getCurrentBranch = maybe cache return + =<< Annex.getState Annex.cachedcurrentbranch + where + cache = inRepo Git.Branch.current >>= \case + Just b -> do + let v = case adjustedToOriginal b of + Nothing -> (Just b, Nothing) + Just (adj, origbranch) -> + (Just origbranch, Just adj) + Annex.changeState $ \s -> + s { Annex.cachedcurrentbranch = Just v } + return v + Nothing -> return (Nothing, Nothing) diff --git a/Annex/Debug.hs b/Annex/Debug.hs new file mode 100644 index 0000000000..fb7d7eef77 --- /dev/null +++ b/Annex/Debug.hs @@ -0,0 +1,35 @@ +{- git-annex debugging + - + - Copyright 2021-2023 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Debug ( + DebugSelector(..), + DebugSource(..), + debug, + fastDebug, + fastDebug', + configureDebug, + debugSelectorFromGitConfig, + parseDebugSelector, +) where + +import Common +import qualified Annex +import Utility.Debug hiding (fastDebug) +import qualified Utility.Debug +import Annex.Debug.Utility + +-- | This is faster than using debug, because the DebugSelector +-- is read from the Annex monad, which avoids any IORef access overhead +-- when debugging is not enabled. +fastDebug :: DebugSource -> String -> Annex.Annex () +fastDebug src msg = do + rd <- Annex.getRead id + fastDebug' rd src msg + +fastDebug' :: Annex.AnnexRead -> DebugSource -> String -> Annex.Annex () +fastDebug' rd src msg = when (Annex.debugenabled rd) $ + liftIO $ Utility.Debug.fastDebug (Annex.debugselector rd) src msg diff --git a/Annex/Debug/Utility.hs b/Annex/Debug/Utility.hs new file mode 100644 index 0000000000..79186d840d --- /dev/null +++ b/Annex/Debug/Utility.hs @@ -0,0 +1,32 @@ +{- git-annex debugging, utility functions + - + - Copyright 2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Debug.Utility ( + debugSelectorFromGitConfig, + parseDebugSelector, + DebugSelector, +) where + +import Types.GitConfig +import Utility.Debug +import Utility.Split +import Utility.FileSystemEncoding + +import qualified Data.ByteString as S + +debugSelectorFromGitConfig :: GitConfig -> DebugSelector +debugSelectorFromGitConfig = + maybe NoDebugSelector parseDebugSelector . annexDebugFilter + +parseDebugSelector :: String -> DebugSelector +parseDebugSelector = DebugSelector . matchDebugSource . splitSelectorNames + +splitSelectorNames :: String -> [S.ByteString] +splitSelectorNames = map encodeBS . splitc ',' + +matchDebugSource :: [S.ByteString] -> DebugSource -> Bool +matchDebugSource names (DebugSource s) = any (`S.isInfixOf` s) names diff --git a/Annex/Difference.hs b/Annex/Difference.hs new file mode 100644 index 0000000000..fa874476fd --- /dev/null +++ b/Annex/Difference.hs @@ -0,0 +1,60 @@ +{- git-annex repository differences + - + - Copyright 2015 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Difference ( + module Types.Difference, + setDifferences, +) where + +import Annex.Common +import Types.Difference +import Logs.Difference +import Config +import Annex.UUID +import Logs.UUID +import Annex.Version +import qualified Annex + +import qualified Data.Map as M + +-- Differences are only allowed to be tweaked when initializing a +-- repository for the first time, and then only if there is not another +-- known uuid. If the repository was cloned from elsewhere, it inherits +-- the existing settings. +-- +-- Must be called before setVersion, so it can check if this is the first +-- time the repository is being initialized. +setDifferences :: Annex () +setDifferences = do + u <- getUUID + otherds <- allDifferences <$> recordedDifferences + ds <- mappend otherds . annexDifferences <$> Annex.getGitConfig + when (ds /= mempty) $ do + ds' <- ifM (isJust <$> getVersion) + ( do + oldds <- recordedDifferencesFor u + when (ds /= oldds) $ + warning "Cannot change tunable parameters in already initialized repository." + return oldds + , if otherds == mempty + then ifM (any (/= u) . M.keys <$> uuidDescMap) + ( do + warning "Cannot change tunable parameters in a clone of an existing repository." + return mempty + , return ds + ) + else if otherds /= ds + then do + warning "The specified tunable parameters differ from values being used in other clones of this repository." + return otherds + else return ds + ) + forM_ (listDifferences ds') $ \d -> + setConfig (differenceConfigKey d) (differenceConfigVal d) + recordDifferences ds' u diff --git a/Annex/DirHashes.hs b/Annex/DirHashes.hs new file mode 100644 index 0000000000..7311acf3e6 --- /dev/null +++ b/Annex/DirHashes.hs @@ -0,0 +1,90 @@ +{- git-annex file locations + - + - Copyright 2010-2019 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.DirHashes ( + Hasher, + HashLevels(..), + objectHashLevels, + branchHashLevels, + branchHashDir, + dirHashes, + hashDirMixed, + hashDirLower, + display_32bits_as_dir +) where + +import Data.Default +import Data.Bits +import qualified Data.ByteArray as BA +import qualified Data.ByteArray.Encoding as BA +import qualified Data.ByteString as S +import qualified System.FilePath.ByteString as P + +import Common +import Key +import Types.GitConfig +import Types.Difference +import Utility.Hash +import Utility.MD5 + +type Hasher = Key -> RawFilePath + +-- Number of hash levels to use. 2 is the default. +newtype HashLevels = HashLevels Int + +instance Default HashLevels where + def = HashLevels 2 + +objectHashLevels :: GitConfig -> HashLevels +objectHashLevels = configHashLevels OneLevelObjectHash + +branchHashLevels :: GitConfig -> HashLevels +branchHashLevels = configHashLevels OneLevelBranchHash + +configHashLevels :: Difference -> GitConfig -> HashLevels +configHashLevels d config + | hasDifference d (annexDifferences config) = HashLevels 1 + | otherwise = def + +branchHashDir :: GitConfig -> Key -> S.ByteString +branchHashDir = hashDirLower . branchHashLevels + +{- Two different directory hashes may be used. The mixed case hash + - came first, and is fine, except for the problem of case-strict + - filesystems such as Linux VFAT (mounted with shortname=mixed), + - which do not allow using a directory "XX" when "xx" already exists. + - To support that, some git-annex repositories use the lower case-hash. + - All special remotes use the lower-case hash for new data, but old data + - may still use the mixed case hash. -} +dirHashes :: [HashLevels -> Hasher] +dirHashes = [hashDirLower, hashDirMixed] + +hashDirs :: HashLevels -> Int -> S.ByteString -> RawFilePath +hashDirs (HashLevels 1) sz s = P.addTrailingPathSeparator $ S.take sz s +hashDirs _ sz s = P.addTrailingPathSeparator $ h P. t + where + (h, t) = S.splitAt sz s + +hashDirLower :: HashLevels -> Hasher +hashDirLower n k = hashDirs n 3 $ S.pack $ take 6 $ conv $ + md5s $ serializeKey' $ nonChunkKey k + where + conv v = BA.unpack $ + (BA.convertToBase BA.Base16 v :: BA.Bytes) + +{- This was originally using Data.Hash.MD5 from MissingH. This new version +- is faster, but ugly as it has to replicate the 4 Word32's that produced. -} +hashDirMixed :: HashLevels -> Hasher +hashDirMixed n k = hashDirs n 2 $ S.pack $ take 4 $ + concatMap display_32bits_as_dir $ + encodeWord32 $ map fromIntegral $ BA.unpack $ + Utility.Hash.md5s $ serializeKey' $ nonChunkKey k + where + encodeWord32 (b1:b2:b3:b4:rest) = + (shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1) + : encodeWord32 rest + encodeWord32 _ = [] diff --git a/Annex/Drop.hs b/Annex/Drop.hs new file mode 100644 index 0000000000..ccbc18e6e1 --- /dev/null +++ b/Annex/Drop.hs @@ -0,0 +1,131 @@ +{- dropping of unwanted content + - + - Copyright 2012-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Drop where + +import Annex.Common +import Logs.Trust +import Annex.NumCopies +import Types.Remote (uuid, appendonly, config, remotetype, thirdPartyPopulated) +import qualified Remote +import qualified Command.Drop +import Command +import Annex.Wanted +import Annex.Content +import Annex.SpecialRemote.Config +import qualified Database.Keys + +import qualified Data.Set as S + +type Reason = String + +{- Drop a key from local and/or remote when allowed by the preferred content, + - required content, and numcopies settings. + - + - Skips trying to drop from remotes that are appendonly, since those drops + - would presumably fail. Also skips dropping from exporttree/importtree remotes, + - which don't allow dropping individual keys, and from thirdPartyPopulated + - remotes. + - + - The UUIDs are ones where the content is believed to be present. + - The Remote list can include other remotes that do not have the content; + - only ones that match the UUIDs will be dropped from. + - + - If allowed to drop fromhere, that drop will be done last. This is done + - because local drops do not need any LockedCopy evidence, and so dropping + - from local last allows the content to be removed from more remotes. + - + - A VerifiedCopy can be provided as an optimisation when eg, a key + - has just been uploaded to a remote. + - + - The runner is used to run CommandStart sequentially, it's typically + - callCommandAction. + -} +handleDropsFrom :: [UUID] -> [Remote] -> Reason -> Bool -> Key -> AssociatedFile -> SeekInput -> [VerifiedCopy] -> (CommandStart -> CommandCleanup) -> Annex () +handleDropsFrom locs rs reason fromhere key afile si preverified runner = do + fs <- Database.Keys.getAssociatedFilesIncluding afile key + n <- getcopies fs + void $ if fromhere && checkcopies n Nothing + then go fs rs n >>= dropl fs + else go fs rs n + where + getcopies fs = do + (untrusted, have) <- trustPartition UnTrusted locs + (numcopies, mincopies) <- getSafestNumMinCopies' afile key fs + return (numCopiesCount have, numcopies, mincopies, S.fromList untrusted) + + {- Check that we have enough copies still to drop the content. + - When the remote being dropped from is untrusted, it was not + - counted as a copy, so having only numcopies suffices. Otherwise, + - we need more than numcopies to safely drop. + - + - This is not the final check that it's safe to drop, but it + - avoids doing extra work to do that check later in cases where it + - will surely fail. + -} + checkcopies (have, numcopies, mincopies, _untrusted) Nothing = + have > fromNumCopies numcopies && have > fromMinCopies mincopies + checkcopies (have, numcopies, mincopies, untrusted) (Just u) + | S.member u untrusted = have >= fromNumCopies numcopies && have >= fromMinCopies mincopies + | otherwise = have > fromNumCopies numcopies && have > fromMinCopies mincopies + + decrcopies (have, numcopies, mincopies, untrusted) Nothing = + (have - 1, numcopies, mincopies, untrusted) + decrcopies v@(_have, _numcopies, _mincopies, untrusted) (Just u) + | S.member u untrusted = v + | otherwise = decrcopies v Nothing + + go _ [] n = pure n + go fs (r:rest) n + | uuid r `S.notMember` slocs = go fs rest n + | appendonly r = go fs rest n + | exportTree (config r) = go fs rest n + | importTree (config r) = go fs rest n + | thirdPartyPopulated (remotetype r) = go fs rest n + | checkcopies n (Just $ Remote.uuid r) = + dropr fs r n >>= go fs rest + | otherwise = pure n + + checkdrop fs n u a = + let afs = map (AssociatedFile . Just) fs + pcc = Command.Drop.PreferredContentChecked True + in ifM (wantDrop True u (Just key) afile (Just afs)) + ( dodrop n u (a pcc) + , return n + ) + + dodrop n@(have, numcopies, mincopies, _untrusted) u a = + ifM (safely $ runner $ a numcopies mincopies) + ( do + fastDebug "Annex.Drop" $ unwords + [ "dropped" + , case afile of + AssociatedFile Nothing -> serializeKey key + AssociatedFile (Just af) -> fromRawFilePath af + , "(from " ++ maybe "here" show u ++ ")" + , "(copies now " ++ show (have - 1) ++ ")" + , ": " ++ reason + ] + return $ decrcopies n u + , return n + ) + + dropl fs n = checkdrop fs n Nothing $ \pcc numcopies mincopies -> + stopUnless (inAnnex key) $ + Command.Drop.startLocal pcc afile ai si numcopies mincopies key preverified (Command.Drop.DroppingUnused False) + + dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \pcc numcopies mincopies -> + Command.Drop.startRemote pcc afile ai si numcopies mincopies key (Command.Drop.DroppingUnused False) r + + ai = mkActionItem (key, afile) + + slocs = S.fromList locs + + safely a = either (const False) id <$> tryNonAsync a + diff --git a/Annex/Environment.hs b/Annex/Environment.hs new file mode 100644 index 0000000000..2b917f4fa2 --- /dev/null +++ b/Annex/Environment.hs @@ -0,0 +1,73 @@ +{- git-annex environment + - + - Copyright 2012-2023 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Environment ( + checkEnvironment, + checkEnvironmentIO, + ensureCommit, +) where + +import Annex.Common +import Utility.UserInfo +import qualified Git.Config +import Config +import Utility.Env.Set + +import Control.Exception + +{- Checks that the system's environment allows git to function. + - Git requires a GECOS username, or suitable git configuration, or + - environment variables. When none of those are set, this will set the + - environment variables. + - + - Git also requires the system have a hostname containing a dot. + - Otherwise, it tries various methods to find a FQDN, and will fail if it + - does not. To avoid replicating that code here, which would break if its + - methods change, this function does not check the hostname is valid. + - Instead, git-annex init calls ensureCommit, which makes sure that git + - gets set up to allow committing. + -} +checkEnvironment :: Annex () +checkEnvironment = do + gitusername <- fromRepo $ Git.Config.getMaybe "user.name" + when (isNothing gitusername || gitusername == Just "") $ + unlessM userConfigOnly $ + liftIO checkEnvironmentIO + +checkEnvironmentIO :: IO () +checkEnvironmentIO = whenM (isNothing <$> myUserGecos) $ do + username <- either (const "unknown") id <$> myUserName + ensureEnv "GIT_AUTHOR_NAME" username + ensureEnv "GIT_COMMITTER_NAME" username + where + -- existing environment is not overwritten + ensureEnv var val = setEnv var val False + +{- Runs an action that commits to the repository, and if it fails, + - sets user.email and user.name to a dummy value and tries the action again. + - + - Note that user.email and user.name are left set afterwards, so this only + - needs to be used once to make sure that future commits will succeed. + -} +ensureCommit :: Annex a -> Annex a +ensureCommit a = either retry return =<< tryNonAsync a + where + retry e = ifM userConfigOnly + ( liftIO (throwIO e) + , do + name <- liftIO $ either (const "unknown") id <$> myUserName + setConfig "user.name" name + setConfig "user.email" name + a + ) + +userConfigOnly :: Annex Bool +userConfigOnly = do + v <- fromRepo $ Git.Config.getMaybe "user.useconfigonly" + return (fromMaybe False (Git.Config.isTrueFalse' =<< v)) diff --git a/Annex/Export.hs b/Annex/Export.hs new file mode 100644 index 0000000000..60039ef3b9 --- /dev/null +++ b/Annex/Export.hs @@ -0,0 +1,72 @@ +{- git-annex exports + - + - Copyright 2017-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Export where + +import Annex +import Annex.CatFile +import Types +import Types.Key +import qualified Git +import qualified Types.Remote as Remote +import Git.Quote +import Messages + +import Data.Maybe +import qualified Data.ByteString.Short as S (fromShort, toShort) + +-- From a sha pointing to the content of a file to the key +-- to use to export it. When the file is annexed, it's the annexed key. +-- When the file is stored in git, it's a special type of key to indicate +-- that. +exportKey :: Git.Sha -> Annex Key +exportKey sha = mk <$> catKey sha + where + mk (Just k) = k + mk Nothing = gitShaKey sha + +-- Encodes a git sha as a key. This is used to represent a non-annexed +-- file that is stored on a special remote, which necessarily needs a +-- key. +-- +-- This is not the same as a SHA1 key, because the mapping needs to be +-- bijective, also because git may not always use SHA1, and because git +-- takes a SHA1 of the file size + content, while git-annex SHA1 keys +-- only checksum the content. +gitShaKey :: Git.Sha -> Key +gitShaKey (Git.Ref s) = mkKey $ \kd -> kd + { keyName = S.toShort s + , keyVariety = OtherKey "GIT" + } + +-- Reverse of gitShaKey +keyGitSha :: Key -> Maybe Git.Sha +keyGitSha k + | fromKey keyVariety k == OtherKey "GIT" = + Just (Git.Ref (S.fromShort (fromKey keyName k))) + | otherwise = Nothing + +-- Is a key storing a git sha, and not used for an annexed file? +isGitShaKey :: Key -> Bool +isGitShaKey = isJust . keyGitSha + +warnExportImportConflict :: Remote -> Annex () +warnExportImportConflict r = do + isimport <- Remote.isImportSupported r + isexport <- Remote.isExportSupported r + let (ops, resolvcmd) = case (isexport, isimport) of + (False, True) -> ("imported from", "git-annex import") + (True, False) -> ("exported to", "git-annex export") + _ -> ("exported to and/or imported from", "git-annex export") + toplevelWarning True $ UnquotedString $ unwords + [ "Conflict detected. Different trees have been" + , ops, Remote.name r ++ ". Use" + , resolvcmd + , "to resolve this conflict." + ] diff --git a/Annex/ExternalAddonProcess.hs b/Annex/ExternalAddonProcess.hs new file mode 100644 index 0000000000..e573d2261d --- /dev/null +++ b/Annex/ExternalAddonProcess.hs @@ -0,0 +1,100 @@ +{- External addon processes for special remotes and backends. + - + - Copyright 2013-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.ExternalAddonProcess where + +import qualified Annex +import Annex.Common +import Git.Env +import Utility.Shell +import Messages.Progress + +import Control.Concurrent.Async + +data ExternalAddonProcess = ExternalAddonProcess + { externalSend :: Handle + , externalReceive :: Handle + -- Shut down the process. With True, it's forced to stop + -- immediately. + , externalShutdown :: Bool -> IO () + , externalPid :: ExternalAddonPID + , externalProgram :: String + } + +type ExternalAddonPID = Int + +data ExternalAddonStartError + = ProgramNotInstalled String + | ProgramFailure String + +startExternalAddonProcess :: String -> [CommandParam] -> ExternalAddonPID -> Annex (Either ExternalAddonStartError ExternalAddonProcess) +startExternalAddonProcess basecmd ps pid = do + errrelayer <- mkStderrRelayer + g <- Annex.gitRepo + cmdpath <- liftIO $ searchPath basecmd + liftIO $ start errrelayer g cmdpath + where + start errrelayer g cmdpath = do + (cmd, cmdps) <- maybe (pure (basecmd, [])) findShellCommand cmdpath + let basep = (proc cmd (toCommand (cmdps ++ ps))) + { std_in = CreatePipe + , std_out = CreatePipe + , std_err = CreatePipe + } + p <- propgit g basep + tryNonAsync (createProcess p) >>= \case + Right v -> (Right <$> started cmd errrelayer v) + `catchNonAsync` const (runerr cmdpath) + Left _ -> runerr cmdpath + + started cmd errrelayer pall@(Just hin, Just hout, Just herr, ph) = do + stderrelay <- async $ errrelayer ph herr + let shutdown forcestop = do + -- Close the process's stdin, to let it know there + -- are no more requests, so it will exit. + hClose hout + -- Close the procces's stdout as we're not going to + -- process any more output from it. + hClose hin + if forcestop + then cleanupProcess pall + else void (waitForProcess ph) + `onException` cleanupProcess pall + -- This thread will exit after consuming any + -- remaining stderr from the process. + () <- wait stderrelay + hClose herr + return $ ExternalAddonProcess + { externalSend = hin + , externalReceive = hout + , externalPid = pid + , externalShutdown = shutdown + , externalProgram = cmd + } + started _ _ _ = giveup "internal" + + propgit g p = do + environ <- propGitEnv g + return $ p { env = Just environ } + + runerr (Just cmd) = + return $ Left $ ProgramFailure $ + "Cannot run " ++ cmd ++ " -- Make sure it's executable and that its dependencies are installed." + runerr Nothing = do + path <- intercalate ":" <$> getSearchPath + return $ Left $ ProgramNotInstalled $ + "Cannot run " ++ basecmd ++ " -- It is not installed in PATH (" ++ path ++ ")" + +protocolDebug :: ExternalAddonProcess -> Bool -> String -> IO () +protocolDebug external sendto line = debug "Annex.ExternalAddonProcess" $ unwords + [ externalProgram external ++ + "[" ++ show (externalPid external) ++ "]" + , if sendto then "<--" else "-->" + , line + ] diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs new file mode 100644 index 0000000000..e48931f360 --- /dev/null +++ b/Annex/FileMatcher.hs @@ -0,0 +1,278 @@ +{- git-annex file matching + - + - Copyright 2012-2023 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.FileMatcher ( + GetFileMatcher, + checkFileMatcher, + checkFileMatcher', + checkMatcher, + checkMatcher', + matchAll, + PreferredContentData(..), + preferredContentTokens, + preferredContentParser, + ParseToken, + parsedToMatcher, + mkMatchExpressionParser, + largeFilesMatcher, + AddUnlockedMatcher, + addUnlockedMatcher, + checkAddUnlockedMatcher, + LimitBy(..), + module Types.FileMatcher +) where + +import qualified Data.Map as M + +import Annex.Common +import Limit +import Utility.Matcher +import Types.Group +import Types.FileMatcher +import Types.GitConfig +import Config.GitConfig +import Annex.SpecialRemote.Config (preferreddirField) +import Git.FilePath +import Types.Remote (RemoteConfig) +import Types.ProposedAccepted +import Annex.CheckAttr +import qualified Git.Config +#ifdef WITH_MAGICMIME +import Annex.Magic +#endif + +import Data.Either +import qualified Data.Set as S +import Control.Monad.Writer + +type GetFileMatcher = RawFilePath -> Annex (FileMatcher Annex) + +checkFileMatcher :: GetFileMatcher -> RawFilePath -> Annex Bool +checkFileMatcher getmatcher file = + checkFileMatcher' getmatcher file (return True) + +-- | Allows running an action when no matcher is configured for the file. +checkFileMatcher' :: GetFileMatcher -> RawFilePath -> Annex Bool -> Annex Bool +checkFileMatcher' getmatcher file notconfigured = do + matcher <- getmatcher file + checkMatcher matcher Nothing afile S.empty notconfigured d + where + afile = AssociatedFile (Just file) + -- checkMatcher will never use this, because afile is provided. + d = return True + +checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Annex Bool -> Annex Bool -> Annex Bool +checkMatcher matcher mkey afile notpresent notconfigured d + | isEmpty (fst matcher) = notconfigured + | otherwise = case (mkey, afile) of + (_, AssociatedFile (Just file)) -> + go =<< fileMatchInfo file mkey + (Just key, AssociatedFile Nothing) -> + let i = ProvidedInfo + { providedFilePath = Nothing + , providedKey = Just key + , providedFileSize = Nothing + , providedMimeType = Nothing + , providedMimeEncoding = Nothing + , providedLinkType = Nothing + } + in go (MatchingInfo i) + (Nothing, _) -> d + where + go mi = checkMatcher' matcher mi notpresent + +checkMatcher' :: FileMatcher Annex -> MatchInfo -> AssumeNotPresent -> Annex Bool +checkMatcher' (matcher, (MatcherDesc matcherdesc)) mi notpresent = do + (matches, desc) <- runWriterT $ matchMrun' matcher $ \op -> + matchAction op notpresent mi + explain (mkActionItem mi) $ UnquotedString <$> + describeMatchResult matchDesc desc + ((if matches then "matches " else "does not match ") ++ matcherdesc ++ ": ") + return matches + +fileMatchInfo :: RawFilePath -> Maybe Key -> Annex MatchInfo +fileMatchInfo file mkey = do + matchfile <- getTopFilePath <$> inRepo (toTopFilePath file) + return $ MatchingFile FileInfo + { matchFile = matchfile + , contentFile = file + , matchKey = mkey + } + +matchAll :: Matcher (MatchFiles Annex) +matchAll = generate [] + +parsedToMatcher :: MatcherDesc -> [ParseResult (MatchFiles Annex)] -> Either String (FileMatcher Annex) +parsedToMatcher matcherdesc parsed = case partitionEithers parsed of + ([], vs) -> Right (generate vs, matcherdesc) + (es, _) -> Left $ unwords $ map ("Parse failure: " ++) es + +data ParseToken t + = SimpleToken String (ParseResult t) + | ValueToken String (String -> ParseResult t) + +type ParseResult t = Either String (Token t) + +parseToken :: [ParseToken t] -> String -> ParseResult t +parseToken l t = case syntaxToken t of + Right st -> Right st + Left _ -> go l + where + go [] = Left $ "near " ++ show t + go (SimpleToken s r : _) | s == t = r + go (ValueToken s mkr : _) | s == k = mkr v + go (_ : ps) = go ps + (k, v) = separate (== '=') t + +{- This is really dumb tokenization; there's no support for quoted values. + - Open and close parens are always treated as standalone tokens; + - otherwise tokens must be separated by whitespace. -} +tokenizeMatcher :: String -> [String] +tokenizeMatcher = filter (not . null) . concatMap splitparens . words + where + splitparens = segmentDelim (`elem` "()") + +commonTokens :: LimitBy -> [ParseToken (MatchFiles Annex)] +commonTokens lb = + [ SimpleToken "anything" (simply limitAnything) + , SimpleToken "nothing" (simply limitNothing) + , ValueToken "include" (usev limitInclude) + , ValueToken "exclude" (usev limitExclude) + , ValueToken "largerthan" (usev $ limitSize lb "largerthan" (>)) + , ValueToken "smallerthan" (usev $ limitSize lb "smallerthan" (<)) + , SimpleToken "unused" (simply limitUnused) + ] + +data PreferredContentData = PCD + { matchStandard :: Either String (Matcher (MatchFiles Annex)) + , matchGroupWanted :: Either String (Matcher (MatchFiles Annex)) + , getGroupMap :: Annex GroupMap + , configMap :: M.Map UUID RemoteConfig + , repoUUID :: Maybe UUID + } + +preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)] +preferredContentTokens pcd = + [ SimpleToken "standard" (call "standard" $ matchStandard pcd) + , SimpleToken "groupwanted" (call "groupwanted" $ matchGroupWanted pcd) + , SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir "inpreferreddir") + , SimpleToken "present" (simply $ limitPresent $ repoUUID pcd) + , SimpleToken "securehash" (simply limitSecureHash) + , ValueToken "copies" (usev limitCopies) + , ValueToken "lackingcopies" (usev $ limitLackingCopies "lackingcopies" False) + , ValueToken "approxlackingcopies" (usev $ limitLackingCopies "approxlackingcopies" True) + , ValueToken "inbackend" (usev limitInBackend) + , ValueToken "metadata" (usev limitMetaData) + , ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd) + , ValueToken "onlyingroup" (usev $ limitOnlyInGroup $ getGroupMap pcd) + ] ++ commonTokens LimitAnnexFiles + where + preferreddir = maybe "public" fromProposedAccepted $ + M.lookup preferreddirField =<< (`M.lookup` configMap pcd) =<< repoUUID pcd + +preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)] +preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher + +mkMatchExpressionParser :: Annex (String -> [ParseResult (MatchFiles Annex)]) +mkMatchExpressionParser = do +#ifdef WITH_MAGICMIME + magicmime <- liftIO initMagicMime + let mimer n f = ValueToken n (usev $ f magicmime) +#else + let mimer n = ValueToken n $ + const $ Left $ "\""++n++"\" not supported; not built with MagicMime support" +#endif + let parse = parseToken $ + commonTokens LimitDiskFiles ++ +#ifdef WITH_MAGICMIME + [ mimer "mimetype" $ + matchMagic "mimetype" getMagicMimeType providedMimeType userProvidedMimeType + , mimer "mimeencoding" $ + matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding userProvidedMimeEncoding + ] +#else + [ mimer "mimetype" + , mimer "mimeencoding" + ] +#endif + return $ map parse . tokenizeMatcher + +{- Generates a matcher for files large enough (or meeting other criteria) + - to be added to the annex, rather than directly to git. + - + - annex.largefiles is configured in git config, or git attributes, + - or global git-annex config, in that order. + -} +largeFilesMatcher :: Annex GetFileMatcher +largeFilesMatcher = go =<< getGitConfigVal' annexLargeFiles + where + matcherdesc = MatcherDesc "annex.largefiles" + go (HasGitConfig (Just expr)) = do + matcher <- mkmatcher expr "git config" + return $ const $ return matcher + go v = return $ \file -> do + expr <- checkAttr "annex.largefiles" file + if null expr + then case v of + HasGlobalConfig (Just expr') -> + mkmatcher expr' "git-annex config" + _ -> return (matchAll, matcherdesc) + else mkmatcher expr "gitattributes" + + mkmatcher expr cfgfrom = do + parser <- mkMatchExpressionParser + either (badexpr cfgfrom) return $ parsedToMatcher matcherdesc $ parser expr + + badexpr cfgfrom e = giveup $ "bad annex.largefiles configuration in " ++ cfgfrom ++ ": " ++ e + +newtype AddUnlockedMatcher = AddUnlockedMatcher (FileMatcher Annex) + +addUnlockedMatcher :: Annex AddUnlockedMatcher +addUnlockedMatcher = AddUnlockedMatcher <$> + (go =<< getGitConfigVal' annexAddUnlocked) + where + go (HasGitConfig (Just expr)) = mkmatcher expr "git config" + go (HasGlobalConfig (Just expr)) = mkmatcher expr "git annex config" + go _ = matchalways False + + matcherdesc = MatcherDesc "annex.addunlocked" + + mkmatcher :: String -> String -> Annex (FileMatcher Annex) + mkmatcher expr cfgfrom = case Git.Config.isTrueFalse expr of + Just b -> matchalways b + Nothing -> do + parser <- mkMatchExpressionParser + either (badexpr cfgfrom) return $ parsedToMatcher matcherdesc $ parser expr + + badexpr cfgfrom e = giveup $ "bad annex.addunlocked configuration in " ++ cfgfrom ++ ": " ++ e + + matchalways True = return (MOp limitAnything, matcherdesc) + matchalways False = return (MOp limitNothing, matcherdesc) + +checkAddUnlockedMatcher :: AddUnlockedMatcher -> MatchInfo -> Annex Bool +checkAddUnlockedMatcher (AddUnlockedMatcher matcher) mi = + checkMatcher' matcher mi S.empty + +simply :: MatchFiles Annex -> ParseResult (MatchFiles Annex) +simply = Right . Operation + +usev :: MkLimit Annex -> String -> ParseResult (MatchFiles Annex) +usev a v = Operation <$> a v + +call :: String -> Either String (Matcher (MatchFiles Annex)) -> ParseResult (MatchFiles Annex) +call desc (Right sub) = Right $ Operation $ MatchFiles + { matchAction = \notpresent mi -> + matchMrun sub $ \o -> matchAction o notpresent mi + , matchNeedsFileName = any matchNeedsFileName sub + , matchNeedsFileContent = any matchNeedsFileContent sub + , matchNeedsKey = any matchNeedsKey sub + , matchNeedsLocationLog = any matchNeedsLocationLog sub + , matchDesc = matchDescSimple desc + } +call _ (Left err) = Left err diff --git a/Annex/Fixup.hs b/Annex/Fixup.hs new file mode 100644 index 0000000000..a60e4baa0b --- /dev/null +++ b/Annex/Fixup.hs @@ -0,0 +1,155 @@ +{- git-annex repository fixups + - + - Copyright 2013-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Fixup where + +import Git.Types +import Git.Config +import Types.GitConfig +import Utility.Path +import Utility.Path.AbsRel +import Utility.SafeCommand +import Utility.Directory +import Utility.Exception +import Utility.Monad +import Utility.FileSystemEncoding +import qualified Utility.RawFilePath as R +import Utility.PartialPrelude + +import System.IO +import Data.List +import Data.Maybe +import Control.Monad +import Control.Monad.IfElse +import qualified Data.Map as M +import qualified Data.ByteString as S +import System.FilePath.ByteString +import Control.Applicative +import Prelude + +fixupRepo :: Repo -> GitConfig -> IO Repo +fixupRepo r c = do + let r' = disableWildcardExpansion r + r'' <- fixupUnusualRepos r' c + if annexDirect c + then return (fixupDirect r'') + else return r'' + +{- Disable git's built-in wildcard expansion, which is not wanted + - when using it as plumbing by git-annex. -} +disableWildcardExpansion :: Repo -> Repo +disableWildcardExpansion r = r + { gitGlobalOpts = gitGlobalOpts r ++ [Param "--literal-pathspecs"] } + +{- Direct mode repos have core.bare=true, but are not really bare. + - Fix up the Repo to be a non-bare repo, and arrange for git commands + - run by git-annex to be passed parameters that override this setting. -} +fixupDirect :: Repo -> Repo +fixupDirect r@(Repo { location = l@(Local { gitdir = d, worktree = Nothing }) }) = do + r + { location = l { worktree = Just (parentDir d) } + , gitGlobalOpts = gitGlobalOpts r ++ + [ Param "-c" + , Param $ fromConfigKey coreBare ++ "=" ++ boolConfig False + ] + } +fixupDirect r = r + +{- Submodules have their gitdir containing ".git/modules/", and + - have core.worktree set, and also have a .git file in the top + - of the repo. We need to unset core.worktree, and change the .git + - file into a symlink to the git directory. This way, annex symlinks will be + - of the usual .git/annex/object form, and will consistently work + - whether a repo is used as a submodule or not, and wheverever the + - submodule is mounted. + - + - git-worktree directories have a .git file. + - That needs to be converted to a symlink, and .git/annex made a symlink + - to the main repository's git-annex directory. + - The worktree shares git config with the main repository, so the same + - annex uuid and other configuration will be used in the worktree as in + - the main repository. + - + - git clone or init with --separate-git-dir similarly makes a .git file, + - which in that case points to a different git directory. It's + - also converted to a symlink so links to .git/annex will work. + - + - When the filesystem doesn't support symlinks, we cannot make .git + - into a symlink. But we don't need too, since the repo will use adjusted + - unlocked branches. + - + - Don't do any of this if the repo has not been initialized for git-annex + - use yet. + -} +fixupUnusualRepos :: Repo -> GitConfig -> IO Repo +fixupUnusualRepos r@(Repo { location = l@(Local { worktree = Just w, gitdir = d }) }) c + | isNothing (annexVersion c) = return r + | needsSubmoduleFixup r = do + when (coreSymlinks c) $ + (replacedotgit >> unsetcoreworktree) + `catchNonAsync` \e -> hPutStrLn stderr $ + "warning: unable to convert submodule to form that will work with git-annex: " ++ show e + return $ r' + { config = M.delete "core.worktree" (config r) + } + | otherwise = ifM (needsGitLinkFixup r) + ( do + when (coreSymlinks c) $ + (replacedotgit >> worktreefixup) + `catchNonAsync` \e -> hPutStrLn stderr $ + "warning: unable to convert .git file to symlink that will work with git-annex: " ++ show e + return r' + , return r + ) + where + dotgit = w ".git" + + replacedotgit = whenM (doesFileExist (fromRawFilePath dotgit)) $ do + linktarget <- relPathDirToFile w d + removeWhenExistsWith R.removeLink dotgit + R.createSymbolicLink linktarget dotgit + + -- Unsetting a config fails if it's not set, so ignore failure. + unsetcoreworktree = void $ Git.Config.unset "core.worktree" r + + worktreefixup = + -- git-worktree sets up a "commondir" file that contains + -- the path to the main git directory. + -- Using --separate-git-dir does not. + catchDefaultIO Nothing (headMaybe . lines <$> readFile (fromRawFilePath (d "commondir"))) >>= \case + Just gd -> do + -- Make the worktree's git directory + -- contain an annex symlink to the main + -- repository's annex directory. + let linktarget = toRawFilePath gd "annex" + R.createSymbolicLink linktarget + (dotgit "annex") + Nothing -> return () + + -- Repo adjusted, so that symlinks to objects that get checked + -- in will have the usual path, rather than pointing off to the + -- real .git directory. + r' + | coreSymlinks c = r { location = l { gitdir = dotgit } } + | otherwise = r +fixupUnusualRepos r _ = return r + +needsSubmoduleFixup :: Repo -> Bool +needsSubmoduleFixup (Repo { location = (Local { worktree = Just _, gitdir = d }) }) = + (".git" "modules") `S.isInfixOf` d +needsSubmoduleFixup _ = False + +needsGitLinkFixup :: Repo -> IO Bool +needsGitLinkFixup (Repo { location = (Local { worktree = Just wt, gitdir = d }) }) + -- Optimization: Avoid statting .git in the common case; only + -- when the gitdir is not in the usual place inside the worktree + -- might .git be a file. + | wt ".git" == d = return False + | otherwise = doesFileExist (fromRawFilePath (wt ".git")) +needsGitLinkFixup _ = return False diff --git a/Annex/GitOverlay.hs b/Annex/GitOverlay.hs new file mode 100644 index 0000000000..5388c1bfc6 --- /dev/null +++ b/Annex/GitOverlay.hs @@ -0,0 +1,124 @@ +{- Temporarily changing how git-annex runs git commands. + - + - Copyright 2014-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.GitOverlay ( + module Annex.GitOverlay, + AltIndexFile(..), +) where + +import qualified Control.Exception as E + +import Annex.Common +import Types.IndexFiles +import Git +import Git.Types +import Git.Index +import Git.Env +import qualified Annex +import qualified Annex.Queue +import Config.Smudge + +{- Runs an action using a different git index file. -} +withIndexFile :: AltIndexFile -> (FilePath -> Annex a) -> Annex a +withIndexFile i = withAltRepo usecachedgitenv restoregitenv + where + -- This is an optimisation. Since withIndexFile is run repeatedly, + -- typically with the same file, and addGitEnv uses the slow + -- getEnvironment when gitEnv is Nothing, and has to do a + -- nontrivial amount of work, we cache the modified environment + -- the first time, and reuse it in subsequent calls for the same + -- index file. + -- + -- (This could be done at another level; eg when creating the + -- Git object in the first place, but it's more efficient to let + -- the environment be inherited in all calls to git where it + -- does not need to be modified.) + -- + -- Also, the use of AltIndexFile avoids needing to construct + -- the FilePath each time, which saves enough time to be worth the + -- added complication. + usecachedgitenv g = case gitEnv g of + Nothing -> Annex.withState $ \s -> case Annex.cachedgitenv s of + Just (cachedi, cachedf, cachede) | i == cachedi -> + return (s, (g { gitEnv = Just cachede }, cachedf)) + _ -> do + r@(g', f) <- addindex g + let cache = (,,) + <$> Just i + <*> Just f + <*> gitEnv g' + return (s { Annex.cachedgitenv = cache }, r) + Just _ -> liftIO $ addindex g + + addindex g = do + f <- indexEnvVal $ case i of + AnnexIndexFile -> gitAnnexIndex g + ViewIndexFile -> gitAnnexViewIndex g + g' <- addGitEnv g indexEnv f + return (g', f) + + restoregitenv g g' = g' { gitEnv = gitEnv g } + +{- Runs an action using a different git work tree. + - + - Smudge and clean filters are disabled in this work tree. -} +withWorkTree :: FilePath -> Annex a -> Annex a +withWorkTree d a = withAltRepo + (\g -> return $ (g { location = modlocation (location g), gitGlobalOpts = gitGlobalOpts g ++ bypassSmudgeConfig }, ())) + (\g g' -> g' { location = location g, gitGlobalOpts = gitGlobalOpts g }) + (const a) + where + modlocation l@(Local {}) = l { worktree = Just (toRawFilePath d) } + modlocation _ = giveup "withWorkTree of non-local git repo" + +{- Runs an action with the git index file and HEAD, and a few other + - files that are related to the work tree coming from an overlay + - directory other than the usual. This is done by pointing + - GIT_COMMON_DIR at the regular git directory, and GIT_DIR at the + - overlay directory. + - + - Needs git 2.2.0 or newer. + -} +withWorkTreeRelated :: FilePath -> Annex a -> Annex a +withWorkTreeRelated d a = withAltRepo modrepo unmodrepo (const a) + where + modrepo g = liftIO $ do + g' <- addGitEnv g "GIT_COMMON_DIR" . fromRawFilePath + =<< absPath (localGitDir g) + g'' <- addGitEnv g' "GIT_DIR" d + return (g'' { gitEnvOverridesGitDir = True }, ()) + unmodrepo g g' = g' + { gitEnv = gitEnv g + , gitEnvOverridesGitDir = gitEnvOverridesGitDir g + } + +withAltRepo + :: (Repo -> Annex (Repo, t)) + -- ^ modify Repo + -> (Repo -> Repo -> Repo) + -- ^ undo modifications; first Repo is the original and second + -- is the one after running the action. + -> (t -> Annex a) + -> Annex a +withAltRepo modrepo unmodrepo a = do + g <- gitRepo + (g', t) <- modrepo g + q <- Annex.Queue.get + v <- tryNonAsync $ do + Annex.changeState $ \s -> s + { Annex.repo = g' + -- Start a separate queue for any changes made + -- with the modified repo. + , Annex.repoqueue = Nothing + } + a t + void $ tryNonAsync Annex.Queue.flush + Annex.changeState $ \s -> s + { Annex.repo = unmodrepo g (Annex.repo s) + , Annex.repoqueue = Just q + } + either E.throw return v diff --git a/Annex/HashObject.hs b/Annex/HashObject.hs new file mode 100644 index 0000000000..4a0ea187ed --- /dev/null +++ b/Annex/HashObject.hs @@ -0,0 +1,66 @@ +{- git hash-object interface + - + - Copyright 2016-2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.HashObject ( + hashFile, + hashBlob, + hashObjectStop, + mkConcurrentHashObjectHandle, + withHashObjectHandle, +) where + +import Annex.Common +import qualified Git.HashObject +import qualified Annex +import Git.Types +import Utility.ResourcePool +import Types.Concurrency +import Annex.Concurrent.Utility + +hashObjectStop :: Annex () +hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle + where + stop p = do + liftIO $ freeResourcePool p Git.HashObject.hashObjectStop + Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing } + +hashFile :: RawFilePath -> Annex Sha +hashFile f = withHashObjectHandle $ \h -> + liftIO $ Git.HashObject.hashFile h f + +{- Note that the content will be written to a temp file. + - So it may be faster to use Git.HashObject.hashObject for large + - blob contents. -} +hashBlob :: Git.HashObject.HashableBlob b => b -> Annex Sha +hashBlob content = withHashObjectHandle $ \h -> + liftIO $ Git.HashObject.hashBlob h content + +withHashObjectHandle :: (Git.HashObject.HashObjectHandle -> Annex a) -> Annex a +withHashObjectHandle a = + maybe mkpool go =<< Annex.getState Annex.hashobjecthandle + where + go p = withResourcePool p start a + start = inRepo $ Git.HashObject.hashObjectStart True + mkpool = do + -- This only runs in non-concurrent code paths; + -- a concurrent pool is set up earlier when needed. + p <- mkResourcePoolNonConcurrent start + Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just p } + go p + +mkConcurrentHashObjectHandle :: Concurrency -> Annex (ResourcePool Git.HashObject.HashObjectHandle) +mkConcurrentHashObjectHandle c = + Annex.getState Annex.hashobjecthandle >>= \case + Just p@(ResourcePool {}) -> return p + _ -> mkResourcePool =<< liftIO (maxHashObjects c) + +{- git hash-object is typically CPU bound, and is not likely to be the main + - bottleneck for any command. So limit to the number of CPU cores, maximum, + - while respecting the -Jn value. + -} +maxHashObjects :: Concurrency -> IO Int +maxHashObjects = concurrencyUpToCpus diff --git a/Annex/Hook.hs b/Annex/Hook.hs new file mode 100644 index 0000000000..8c6d648fb0 --- /dev/null +++ b/Annex/Hook.hs @@ -0,0 +1,88 @@ +{- git-annex git hooks + - + - Note that it's important that the content of scripts installed by + - git-annex not change, otherwise removing old hooks using an old + - version of the script would fail. + - + - Copyright 2013-2019 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Hook where + +import Annex.Common +import qualified Git.Hook as Git +import qualified Annex +import Utility.Shell + +import qualified Data.Map as M + +preCommitHook :: Git.Hook +preCommitHook = Git.Hook "pre-commit" (mkHookScript "git annex pre-commit .") [] + +postReceiveHook :: Git.Hook +postReceiveHook = Git.Hook "post-receive" + -- Only run git-annex post-receive when git-annex supports it, + -- to avoid failing if the repository with this hook is used + -- with an older version of git-annex. + (mkHookScript "if git annex post-receive --help >/dev/null 2>&1; then git annex post-receive; fi") + -- This is an old version of the hook script. + [ mkHookScript "git annex post-receive" + ] + +postCheckoutHook :: Git.Hook +postCheckoutHook = Git.Hook "post-checkout" smudgeHook [] + +postMergeHook :: Git.Hook +postMergeHook = Git.Hook "post-merge" smudgeHook [] + +-- Older versions of git-annex didn't support this command, but neither did +-- they support v7 repositories. +smudgeHook :: String +smudgeHook = mkHookScript "git annex smudge --update" + +preCommitAnnexHook :: Git.Hook +preCommitAnnexHook = Git.Hook "pre-commit-annex" "" [] + +postUpdateAnnexHook :: Git.Hook +postUpdateAnnexHook = Git.Hook "post-update-annex" "" [] + +mkHookScript :: String -> String +mkHookScript s = unlines + [ shebang + , "# automatically configured by git-annex" + , s + ] + +hookWrite :: Git.Hook -> Annex () +hookWrite h = unlessM (inRepo $ Git.hookWrite h) $ + hookWarning h "already exists, not configuring" + +hookUnWrite :: Git.Hook -> Annex () +hookUnWrite h = unlessM (inRepo $ Git.hookUnWrite h) $ + hookWarning h "contents modified; not deleting. Edit it to remove call to git annex." + +hookWarning :: Git.Hook -> String -> Annex () +hookWarning h msg = do + r <- gitRepo + warning $ UnquotedString $ + Git.hookName h ++ " hook (" ++ Git.hookFile h r ++ ") " ++ msg + +{- Runs a hook. To avoid checking if the hook exists every time, + - the existing hooks are cached. -} +runAnnexHook :: Git.Hook -> Annex () +runAnnexHook hook = do + m <- Annex.getState Annex.existinghooks + case M.lookup hook m of + Just True -> run + Just False -> noop + Nothing -> do + exists <- inRepo $ Git.hookExists hook + Annex.changeState $ \s -> s + { Annex.existinghooks = M.insert hook exists m } + when exists run + where + run = unlessM (inRepo $ Git.runHook hook) $ do + h <- fromRepo $ Git.hookFile hook + warning $ UnquotedString $ h ++ " failed" diff --git a/Annex/Import.hs b/Annex/Import.hs new file mode 100644 index 0000000000..3de2139d4b --- /dev/null +++ b/Annex/Import.hs @@ -0,0 +1,1106 @@ +{- git-annex import from remotes + - + - Copyright 2019-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Import ( + ImportTreeConfig(..), + ImportCommitConfig(..), + buildImportCommit, + buildImportTrees, + recordImportTree, + canImportKeys, + ImportResult(..), + Imported, + importChanges, + importKeys, + makeImportMatcher, + getImportableContents, +) where + +import Annex.Common +import Types.Import +import qualified Types.Remote as Remote +import Git.Types +import Git.Tree +import Git.Sha +import Git.FilePath +import Git.History +import qualified Git.DiffTree +import qualified Git.Ref +import qualified Git.Branch +import qualified Annex +import Annex.Link +import Annex.LockFile +import Annex.Content +import Annex.Export +import Annex.RemoteTrackingBranch +import Annex.HashObject +import Annex.Transfer +import Annex.CheckIgnore +import Annex.CatFile +import Annex.VectorClock +import Annex.SpecialRemote.Config +import Command +import Backend +import Types.Key +import Types.KeySource +import Messages.Progress +import Utility.DataUnits +import Utility.Metered +import Utility.Hash (sha1s) +import Logs.Import +import Logs.Export +import Logs.Location +import Logs.PreferredContent +import Types.FileMatcher +import Annex.FileMatcher +import qualified Utility.Matcher +import qualified Database.Export as Export +import qualified Database.ContentIdentifier as CIDDb +import qualified Logs.ContentIdentifier as CIDLog +import Backend.Utilities + +import Control.Concurrent.STM +import qualified Data.Map.Strict as M +import qualified Data.Set as S +import qualified System.FilePath.Posix.ByteString as Posix +import qualified System.FilePath.ByteString as P +import qualified Data.ByteArray.Encoding as BA + +{- Configures how to build an import tree. -} +data ImportTreeConfig + = ImportTree + -- ^ Import the tree as-is from the remote. + | ImportSubTree TopFilePath Sha + -- ^ Import a tree from the remote and graft it into a subdirectory + -- of the existing tree whose Sha is provided, replacing anything + -- that was there before. + deriving (Show) + +{- Configures how to build an import commit. -} +data ImportCommitConfig = ImportCommitConfig + { importCommitTracking :: Maybe Sha + -- ^ Current commit on the remote tracking branch. + , importCommitMode :: Git.Branch.CommitMode + , importCommitMessages :: [String] + } + +{- Buils a commit for an import from a special remote. + - + - When there are no changes to make (importCommitTracking + - already matches what was imported), returns Nothing. + - + - After importing from a remote, exporting the same thing back to the + - remote should be a no-op. So, the export log and database are + - updated to reflect the imported tree. + - + - This does not download any content from a remote. But since it needs the + - Key of imported files to be known, its caller will have to first download + - new files in order to generate keys for them. + -} +buildImportCommit + :: Remote + -> ImportTreeConfig + -> ImportCommitConfig + -> Imported + -> Annex (Maybe Ref) +buildImportCommit remote importtreeconfig importcommitconfig imported = + case importCommitTracking importcommitconfig of + Nothing -> go Nothing + Just trackingcommit -> inRepo (Git.Ref.tree trackingcommit) >>= \case + Nothing -> go Nothing + Just _ -> go (Just trackingcommit) + where + go trackingcommit = do + (importedtree, updatestate) <- recordImportTree remote importtreeconfig imported + buildImportCommit' remote importcommitconfig trackingcommit importedtree >>= \case + Just finalcommit -> do + updatestate + return (Just finalcommit) + Nothing -> return Nothing + +{- Builds a tree for an import from a special remote. + - + - Also returns an action that can be used to update + - all the other state to record the import. + -} +recordImportTree + :: Remote + -> ImportTreeConfig + -> Imported + -> Annex (History Sha, Annex ()) +recordImportTree remote importtreeconfig imported = do + importedtree@(History finaltree _) <- buildImportTrees basetree subdir imported + return (importedtree, updatestate finaltree) + where + basetree = case importtreeconfig of + ImportTree -> emptyTree + ImportSubTree _ sha -> sha + subdir = case importtreeconfig of + ImportTree -> Nothing + ImportSubTree dir _ -> Just dir + + updatestate finaltree = do + importedtree <- case subdir of + Nothing -> pure finaltree + Just dir -> + let subtreeref = Ref $ + fromRef' finaltree + <> ":" + <> getTopFilePath dir + in fromMaybe emptyTree + <$> inRepo (Git.Ref.tree subtreeref) + updateexportdb importedtree + oldexport <- updateexportlog importedtree + updatelocationlog oldexport importedtree + + updateexportdb importedtree = do + db <- Export.openDb (Remote.uuid remote) + Export.writeLockDbWhile db $ do + prevtree <- liftIO $ fromMaybe emptyTree + <$> Export.getExportTreeCurrent db + when (importedtree /= prevtree) $ do + Export.updateExportDb db prevtree importedtree + liftIO $ Export.recordExportTreeCurrent db importedtree + Export.closeDb db + + updateexportlog importedtree = do + oldexport <- getExport (Remote.uuid remote) + recordExport (Remote.uuid remote) importedtree $ ExportChange + { oldTreeish = exportedTreeishes oldexport + , newTreeish = importedtree + } + return oldexport + + -- downloadImport takes care of updating the location log + -- for the local repo when keys are downloaded, and also updates + -- the location log for the remote for keys that are present in it. + -- That leaves updating the location log for the remote for keys + -- that have had the last copy of their content removed from it. + -- + -- This must run after the export database has been updated + -- and flushed to disk, so it can query it. + updatelocationlog oldexport finaltree = do + let stillpresent db k = liftIO $ not . null + <$> Export.getExportedLocation db k + let updater db moldkey _newkey _ = case moldkey of + Just oldkey | not (isGitShaKey oldkey) -> + unlessM (stillpresent db oldkey) $ + logChange oldkey (Remote.uuid remote) InfoMissing + _ -> noop + -- When the remote is versioned, it still contains keys + -- that are not present in the new tree. + unless (isVersioning (Remote.config remote)) $ do + db <- Export.openDb (Remote.uuid remote) + forM_ (exportedTreeishes oldexport) $ \oldtree -> + Export.runExportDiffUpdater updater db oldtree finaltree + Export.closeDb db + +buildImportCommit' :: Remote -> ImportCommitConfig -> Maybe Sha -> History Sha -> Annex (Maybe Sha) +buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History ti _) = + case mtrackingcommit of + Nothing -> Just <$> mkcommitsunconnected imported + Just trackingcommit -> do + -- Get history of tracking branch to at most + -- one more level deep than what was imported, + -- so we'll have enough history to compare, + -- but not spend too much time getting it. + let maxdepth = succ importeddepth + inRepo (getHistoryToDepth maxdepth trackingcommit) + >>= go trackingcommit + where + go _ Nothing = Just <$> mkcommitsunconnected imported + go trackingcommit (Just h) + -- If the tracking branch head is a merge commit + -- and one side of the merge matches the history, + -- nothing new needs to be committed. + | t == ti && any sametodepth (S.toList s) = return Nothing + -- If the tracking branch matches the history, + -- nothing new needs to be committed. + -- (This is unlikely to happen.) + | sametodepth h' = return Nothing + -- If the imported tree is unchanged, + -- nothing new needs to be committed. + | otherwise = getLastImportedTree remote >>= \case + Just (LastImportedTree lasttree) + | lasttree == ti -> return Nothing + _ -> gencommit trackingcommit h + where + h'@(History t s) = mapHistory historyCommitTree h + + gencommit trackingcommit h = do + importedcommit <- case getRemoteTrackingBranchImportHistory h of + Nothing -> mkcommitsunconnected imported + Just oldimported@(History oldhc _) + | importeddepth == 1 -> + mkcommitconnected imported oldimported + | otherwise -> do + let oldimportedtrees = mapHistory historyCommitTree oldimported + mknewcommits oldhc oldimportedtrees imported + ti' <- addBackExportExcluded remote ti + Just <$> makeRemoteTrackingBranchMergeCommit' + trackingcommit importedcommit ti' + + importeddepth = historyDepth imported + + sametodepth b = imported == truncateHistoryToDepth importeddepth b + + mkcommit parents tree = inRepo $ Git.Branch.commitTree + (importCommitMode importcommitconfig) + (importCommitMessages importcommitconfig) + parents + tree + + -- Start a new history of import commits, not connected to any + -- prior import commits. + mkcommitsunconnected (History importedtree hs) = do + parents <- mapM mkcommitsunconnected (S.toList hs) + mkcommit parents importedtree + + -- Commit the new history connected with the old history. + -- Used when the import is not versioned, so the history depth is 1. + mkcommitconnected (History importedtree _) (History oldhc _) = do + let parents = [historyCommit oldhc] + mkcommit parents importedtree + + -- Reuse the commits from the old imported History when possible. + mknewcommits oldhc old new@(History importedtree hs) + | old == new = return $ historyCommit oldhc + | otherwise = do + parents <- mapM (mknewcommits oldhc old) (S.toList hs) + mkcommit parents importedtree + +{- Builds a history of git trees for an import. + - + - When a subdir is provided, the imported tree is grafted into + - the basetree at that location, replacing any object that was there. + -} +buildImportTrees + :: Ref + -> Maybe TopFilePath + -> Imported + -> Annex (History Sha) +buildImportTrees basetree msubdir (ImportedFull imported) = + buildImportTreesGeneric convertImportTree basetree msubdir imported +buildImportTrees basetree msubdir (ImportedDiff (LastImportedTree oldtree) imported) = do + importtree <- if null (importableContents imported) + then pure oldtree + else applydiff + repo <- Annex.gitRepo + t <- withMkTreeHandle repo $ + graftImportTree basetree msubdir importtree + -- Diffing is not currently implemented when the history is not empty. + return (History t mempty) + where + applydiff = do + let (removed, new) = partition isremoved + (importableContents imported) + newtreeitems <- catMaybes <$> mapM mktreeitem new + let removedfiles = map (mkloc . fst) removed + inRepo $ adjustTree + (pure . Just) + -- ^ keep files that are not added/removed the same + newtreeitems + (\_oldti newti -> newti) + -- ^ prefer newly added version of file + removedfiles + oldtree + + mktreeitem (loc, DiffChanged v) = + Just <$> mkImportTreeItem msubdir loc v + mktreeitem (_, DiffRemoved) = + pure Nothing + + mkloc = asTopFilePath . fromImportLocation + + isremoved (_, v) = v == DiffRemoved + +convertImportTree :: Maybe TopFilePath -> [(ImportLocation, Either Sha Key)] -> Annex Tree +convertImportTree msubdir ls = + treeItemsToTree <$> mapM (uncurry $ mkImportTreeItem msubdir) ls + +mkImportTreeItem :: Maybe TopFilePath -> ImportLocation -> Either Sha Key -> Annex TreeItem +mkImportTreeItem msubdir loc v = case v of + Right k -> do + relf <- fromRepo $ fromTopFilePath topf + symlink <- calcRepo $ gitAnnexLink relf k + linksha <- hashSymlink symlink + return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha + Left sha -> + return $ TreeItem treepath (fromTreeItemType TreeFile) sha + where + lf = fromImportLocation loc + treepath = asTopFilePath lf + topf = asTopFilePath $ + maybe lf (\sd -> getTopFilePath sd P. lf) msubdir + +{- Builds a history of git trees using ContentIdentifiers. + - + - These are not the final trees that are generated by the import, which + - use Keys. The purpose of these trees is to allow quickly determining + - which files in the import have changed, and which are unchanged, to + - avoid needing to look up the Keys for unchanged ContentIdentifiers. + - When the import has a large number of files, that can be slow. + -} +buildContentIdentifierTree + :: ImportableContentsChunkable Annex (ContentIdentifier, ByteSize) + -> Annex (History Sha, M.Map Sha (ContentIdentifier, ByteSize)) +buildContentIdentifierTree importable = do + mv <- liftIO $ newTVarIO M.empty + r <- buildImportTreesGeneric (convertContentIdentifierTree mv) emptyTree Nothing importable + m <- liftIO $ atomically $ readTVar mv + return (r, m) + +{- For speed, and to avoid bloating the repository, the ContentIdentifiers + - are not actually checked into git, instead a sha1 hash is calculated + - internally. + -} +convertContentIdentifierTree + :: TVar (M.Map Sha (ContentIdentifier, ByteSize)) + -> Maybe TopFilePath + -> [(ImportLocation, (ContentIdentifier, ByteSize))] + -> Annex Tree +convertContentIdentifierTree mv _ ls = do + let (tis, ml) = unzip (map mktreeitem ls) + liftIO $ atomically $ modifyTVar' mv $ + M.union (M.fromList ml) + return (treeItemsToTree tis) + where + mktreeitem (loc, v@((ContentIdentifier cid), _sz)) = + (TreeItem p mode sha1, (sha1, v)) + where + p = asTopFilePath (fromImportLocation loc) + mode = fromTreeItemType TreeFile + -- Note that this hardcodes sha1, even if git has started + -- defaulting to some other checksum method. That should be + -- ok, hopefully. This checksum never needs to be verified + -- by git, which is why this does not bother to prefix the + -- cid with its length, like git would. + sha1 = Ref $ BA.convertToBase BA.Base16 $ sha1s cid + +buildImportTreesGeneric + :: (Maybe TopFilePath -> [(ImportLocation, v)] -> Annex Tree) + -> Ref + -> Maybe TopFilePath + -> ImportableContentsChunkable Annex v + -> Annex (History Sha) +buildImportTreesGeneric converttree basetree msubdir (ImportableContentsComplete importable) = do + repo <- Annex.gitRepo + withMkTreeHandle repo $ buildImportTreesGeneric' converttree basetree msubdir importable +buildImportTreesGeneric converttree basetree msubdir importable@(ImportableContentsChunked {}) = do + repo <- Annex.gitRepo + withMkTreeHandle repo $ \hdl -> + History + <$> go hdl + <*> buildImportTreesHistory converttree basetree msubdir + (importableHistoryComplete importable) hdl + where + go hdl = do + tree <- gochunks [] (importableContentsChunk importable) hdl + importtree <- liftIO $ recordTree' hdl tree + graftImportTree basetree msubdir importtree hdl + + gochunks l c hdl = do + let subdir = importChunkSubDir $ importableContentsSubDir c + -- Full directory prefix where the sub tree is located. + let fullprefix = asTopFilePath $ case msubdir of + Nothing -> subdir + Just d -> getTopFilePath d Posix. subdir + Tree ts <- converttree (Just fullprefix) $ + map (\(p, i) -> (mkImportLocation p, i)) + (importableContentsSubTree c) + -- Record this subtree before getting next chunk, this + -- avoids buffering all the chunks into memory. + tc <- liftIO $ recordSubTree hdl $ + NewSubTree (asTopFilePath subdir) ts + importableContentsNextChunk c >>= \case + Nothing -> return (Tree (tc:l)) + Just c' -> gochunks (tc:l) c' hdl + +buildImportTreesGeneric' + :: (Maybe TopFilePath -> [(ImportLocation, v)] -> Annex Tree) + -> Ref + -> Maybe TopFilePath + -> ImportableContents v + -> MkTreeHandle + -> Annex (History Sha) +buildImportTreesGeneric' converttree basetree msubdir importable hdl = History + <$> buildImportTree converttree basetree msubdir (importableContents importable) hdl + <*> buildImportTreesHistory converttree basetree msubdir (importableHistory importable) hdl + +buildImportTree + :: (Maybe TopFilePath -> [(ImportLocation, v)] -> Annex Tree) + -> Ref + -> Maybe TopFilePath + -> [(ImportLocation, v)] + -> MkTreeHandle + -> Annex Sha +buildImportTree converttree basetree msubdir ls hdl = do + importtree <- liftIO . recordTree' hdl =<< converttree msubdir ls + graftImportTree basetree msubdir importtree hdl + +graftImportTree + :: Ref + -> Maybe TopFilePath + -> Sha + -> MkTreeHandle + -> Annex Sha +graftImportTree basetree msubdir tree hdl = case msubdir of + Nothing -> return tree + Just subdir -> inRepo $ \repo -> + graftTree' tree subdir basetree repo hdl + +buildImportTreesHistory + :: (Maybe TopFilePath -> [(ImportLocation, v)] -> Annex Tree) + -> Ref + -> Maybe TopFilePath + -> [ImportableContents v] + -> MkTreeHandle + -> Annex (S.Set (History Sha)) +buildImportTreesHistory converttree basetree msubdir history hdl = S.fromList + <$> mapM (\ic -> buildImportTreesGeneric' converttree basetree msubdir ic hdl) history + +canImportKeys :: Remote -> Bool -> Bool +canImportKeys remote importcontent = + importcontent || isJust (Remote.importKey ia) + where + ia = Remote.importActions remote + +-- Result of an import. ImportUnfinished indicates that some file failed to +-- be imported. Running again should resume where it left off. +data ImportResult t + = ImportFinished t + | ImportUnfinished + +data Diffed t + = DiffChanged t + | DiffRemoved + deriving (Eq) + +data Imported + = ImportedFull (ImportableContentsChunkable Annex (Either Sha Key)) + | ImportedDiff LastImportedTree (ImportableContents (Diffed (Either Sha Key))) + +newtype LastImportedTree = LastImportedTree Sha + +{- Diffs between the previous and current ContentIdentifier trees, and + - runs importKeys on only the changed files. + - + - This will download the same content as if importKeys were run on all + - files, but this speeds it up significantly when there are a lot of files + - and only a few have changed. importKeys has to look up each + - ContentIdentifier to see if a Key is known for it. This avoids doing + - that lookup on files that have not changed. + - + - Diffing is not currently implemented when there is a History. + -} +importChanges + :: Remote + -> ImportTreeConfig + -> Bool + -> Bool + -> ImportableContentsChunkable Annex (ContentIdentifier, ByteSize) + -> Annex (ImportResult Imported) +importChanges remote importtreeconfig importcontent thirdpartypopulated importablecontents = do + ((History currcidtree currhistory), cidtreemap) <- buildContentIdentifierTree importablecontents + -- diffimport below does not handle history, so when there is + -- history, do a full import. + if not (S.null currhistory) + then fullimport currcidtree + else do + getContentIdentifierTree (Remote.uuid remote) >>= \case + Nothing -> fullimport currcidtree + Just prevcidtree -> candiffimport prevcidtree >>= \case + Nothing -> fullimport currcidtree + Just lastimportedtree -> diffimport cidtreemap prevcidtree currcidtree lastimportedtree + where + remember = recordContentIdentifierTree (Remote.uuid remote) + + -- In order to use a diff, the previous ContentIdentifier tree must + -- not have been garbage collected. Which can happen since there + -- are no git refs to it. + -- + -- Also, a tree must have been imported before, and that tree must + -- also have not been garbage collected (which is less likely to + -- happen due to the remote tracking branch). + candiffimport prevcidtree = + catObjectMetaData prevcidtree >>= \case + Nothing -> return Nothing + Just _ -> getLastImportedTree remote >>= \case + Nothing -> return Nothing + Just lastimported@(LastImportedTree t) -> + ifM (isJust <$> catObjectMetaData t) + ( return (Just lastimported) + , return Nothing + ) + + fullimport currcidtree = + importKeys remote importtreeconfig importcontent thirdpartypopulated importablecontents >>= \case + ImportUnfinished -> return ImportUnfinished + ImportFinished r -> do + remember currcidtree + return $ ImportFinished $ ImportedFull r + + diffimport cidtreemap prevcidtree currcidtree lastimportedtree = do + (diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive + prevcidtree + currcidtree + let (removed, changed) = partition isremoval diff + let mkicchanged ti = do + v <- M.lookup (Git.DiffTree.dstsha ti) cidtreemap + return (mkloc ti, v) + let ic = ImportableContentsComplete $ ImportableContents + { importableContents = mapMaybe mkicchanged changed + , importableHistory = [] + } + importKeys remote importtreeconfig importcontent thirdpartypopulated ic >>= \case + ImportUnfinished -> do + void $ liftIO cleanup + return ImportUnfinished + ImportFinished (ImportableContentsComplete ic') -> + liftIO cleanup >>= \case + False -> return ImportUnfinished + True -> do + remember currcidtree + return $ ImportFinished $ + ImportedDiff lastimportedtree + (mkdiff ic' removed) + -- importKeys is not passed ImportableContentsChunked + -- above, so it cannot return it + ImportFinished (ImportableContentsChunked {}) -> error "internal" + + isremoval ti = Git.DiffTree.dstsha ti `elem` nullShas + + mkloc = mkImportLocation . getTopFilePath . Git.DiffTree.file + + mkdiff ic removed = ImportableContents + { importableContents = diffremoved ++ diffchanged + , importableHistory = [] + } + where + diffchanged = map + (\(loc, v) -> (loc, DiffChanged v)) + (importableContents ic) + diffremoved = map + (\ti -> (mkloc ti, DiffRemoved)) + removed + +{- Gets the tree that was last imported from the remote + - (or exported to it if an export happened after the last import). + -} +getLastImportedTree :: Remote -> Annex (Maybe LastImportedTree) +getLastImportedTree remote = do + db <- Export.openDb (Remote.uuid remote) + mtree <- liftIO $ Export.getExportTreeCurrent db + Export.closeDb db + return (LastImportedTree <$> mtree) + +{- Downloads all new ContentIdentifiers, or when importcontent is False, + - generates Keys without downloading. + - + - Generates either a Key or a git Sha, depending on annex.largefiles. + - But when importcontent is False, it cannot match on annex.largefiles + - (or generate a git Sha), so always generates Keys. + - + - Supports concurrency when enabled. + - + - Note that, when a ContentIdentifier has been imported before, + - generates the same thing that was imported before, so annex.largefiles + - is not reapplied. + -} +importKeys + :: Remote + -> ImportTreeConfig + -> Bool + -> Bool + -> ImportableContentsChunkable Annex (ContentIdentifier, ByteSize) + -> Annex (ImportResult (ImportableContentsChunkable Annex (Either Sha Key))) +importKeys remote importtreeconfig importcontent thirdpartypopulated importablecontents = do + unless (canImportKeys remote importcontent) $ + giveup "This remote does not support importing without downloading content." + -- This map is used to remember content identifiers that + -- were just imported, before they have necessarily been + -- stored in the database. This way, if the same content + -- identifier appears multiple times in the + -- importablecontents (eg when it has a history), + -- they will only be imported once. + cidmap <- liftIO $ newTVarIO M.empty + -- When concurrency is enabled, this set is needed to + -- avoid two threads both importing the same content identifier. + importing <- liftIO $ newTVarIO S.empty + withciddb $ \db -> do + db' <- CIDDb.needsUpdateFromLog db + >>= maybe (pure db) (CIDDb.updateFromLog db) + (prepclock (run cidmap importing db')) + where + -- When not importing content, reuse the same vector + -- clock for all state that's recorded. This can save + -- a little bit of disk space. Individual file downloads + -- while downloading take too long for this optimisation + -- to be safe to do. + prepclock a + | importcontent = a + | otherwise = reuseVectorClockWhile a + + withciddb a = do + cidlck <- calcRepo' gitAnnexContentIdentifierLock + withExclusiveLock cidlck $ + bracket CIDDb.openDb CIDDb.closeDb a + + run cidmap importing db = do + largematcher <- largeFilesMatcher + case importablecontents of + ImportableContentsComplete ic -> + go False largematcher cidmap importing db ic >>= return . \case + Nothing -> ImportUnfinished + Just v -> ImportFinished $ ImportableContentsComplete v + ImportableContentsChunked {} -> do + c <- gochunked db (importableContentsChunk importablecontents) + gohistory largematcher cidmap importing db (importableHistoryComplete importablecontents) >>= return . \case + Nothing -> ImportUnfinished + Just h -> ImportFinished $ ImportableContentsChunked + { importableContentsChunk = c + , importableHistoryComplete = h + } + + go oldversion largematcher cidmap importing db (ImportableContents l h) = do + jobs <- forM l $ \i -> + if thirdpartypopulated + then Left <$> thirdpartypopulatedimport db i + else startimport cidmap importing db i oldversion largematcher + l' <- liftIO $ forM jobs $ + either pure (atomically . takeTMVar) + if any isNothing l' + then return Nothing + else gohistory largematcher cidmap importing db h >>= return . \case + Nothing -> Nothing + Just h' -> Just $ ImportableContents (catMaybes l') h' + + gohistory largematcher cidmap importing db h = do + h' <- mapM (go True largematcher cidmap importing db) h + if any isNothing h' + then return Nothing + else return $ Just $ catMaybes h' + + gochunked db c + -- Downloading cannot be done when chunked, since only + -- the first chunk is processed before returning. + | importcontent = giveup "importKeys does not support downloading chunked import" + -- Chunked import is currently only used by thirdpartypopulated + -- remotes. + | not thirdpartypopulated = giveup "importKeys does not support chunked import when not thirdpartypopulated" + | otherwise = do + l <- forM (importableContentsSubTree c) $ \(loc, i) -> do + let loc' = importableContentsChunkFullLocation (importableContentsSubDir c) loc + thirdpartypopulatedimport db (loc', i) >>= return . \case + Just (_loc, k) -> Just (loc, k) + Nothing -> Nothing + return $ ImportableContentsChunk + { importableContentsSubDir = importableContentsSubDir c + , importableContentsSubTree = catMaybes l + , importableContentsNextChunk = + importableContentsNextChunk c >>= \case + Nothing -> return Nothing + Just c' -> withciddb $ \db' -> + prepclock $ + Just <$> gochunked db' c' + } + + waitstart importing cid = liftIO $ atomically $ do + s <- readTVar importing + if S.member cid s + then retry + else writeTVar importing $ S.insert cid s + + signaldone importing cid = liftIO $ atomically $ do + s <- readTVar importing + writeTVar importing $ S.delete cid s + + startimport cidmap importing db i@(loc, (cid, _sz)) oldversion largematcher = getcidkey cidmap db cid >>= \case + (k:ks) -> + -- If the same content was imported before + -- yielding multiple different keys, it's not clear + -- which is best to use this time, so pick the + -- first in the list. But, if any of them is a + -- git sha, use it, because the content must + -- be included in the git repo then. + let v = case mapMaybe keyGitSha (k:ks) of + (sha:_) -> Left sha + [] -> Right k + in return $ Left $ Just (loc, v) + [] -> do + job <- liftIO $ newEmptyTMVarIO + let ai = ActionItemOther (Just (QuotedPath (fromImportLocation loc))) + let si = SeekInput [] + let importaction = starting ("import " ++ Remote.name remote) ai si $ do + when oldversion $ + showNote "old version" + tryNonAsync (importordownload cidmap i largematcher) >>= \case + Left e -> next $ do + warning (UnquotedString (show e)) + liftIO $ atomically $ + putTMVar job Nothing + return False + Right r -> next $ do + liftIO $ atomically $ + putTMVar job r + return True + commandAction $ bracket_ + (waitstart importing cid) + (signaldone importing cid) + importaction + return (Right job) + + thirdpartypopulatedimport db (loc, (cid, sz)) = + case Remote.importKey ia of + Nothing -> return Nothing + Just importkey -> + tryNonAsync (importkey loc cid sz nullMeterUpdate) >>= \case + Right (Just k) -> do + recordcidkeyindb db cid k + logChange k (Remote.uuid remote) InfoPresent + return $ Just (loc, Right k) + Right Nothing -> return Nothing + Left e -> do + warning (UnquotedString (show e)) + return Nothing + + importordownload cidmap (loc, (cid, sz)) largematcher = do + f <- locworktreefile loc + matcher <- largematcher f + -- When importing a key is supported, always use it rather + -- than downloading and retrieving a key, to avoid + -- generating trees with different keys for the same content. + let act = if importcontent + then case Remote.importKey ia of + Nothing -> dodownload + Just _ -> if Utility.Matcher.introspect matchNeedsFileContent (fst matcher) + then dodownload + else doimport + else doimport + act cidmap (loc, (cid, sz)) f matcher + + doimport cidmap (loc, (cid, sz)) f matcher = + case Remote.importKey ia of + Nothing -> error "internal" -- checked earlier + Just importkey -> do + when (Utility.Matcher.introspect matchNeedsFileContent (fst matcher)) $ + giveup "annex.largefiles configuration examines file contents, so cannot import without content." + let mi = MatchingInfo ProvidedInfo + { providedFilePath = Just f + , providedKey = Nothing + , providedFileSize = Just sz + , providedMimeType = Nothing + , providedMimeEncoding = Nothing + , providedLinkType = Nothing + } + islargefile <- checkMatcher' matcher mi mempty + metered Nothing sz bwlimit $ const $ if islargefile + then doimportlarge importkey cidmap loc cid sz f + else doimportsmall cidmap loc cid sz + + doimportlarge importkey cidmap loc cid sz f p = + tryNonAsync importer >>= \case + Right (Just (k, True)) -> return $ Just (loc, Right k) + Right _ -> return Nothing + Left e -> do + warning (UnquotedString (show e)) + return Nothing + where + importer = do + -- Don't display progress when generating + -- key, if the content will later be + -- downloaded, which is a more expensive + -- operation generally. + let p' = if importcontent then nullMeterUpdate else p + importkey loc cid sz p' >>= \case + Nothing -> return Nothing + Just k -> checkSecureHashes k >>= \case + Nothing -> do + recordcidkey cidmap cid k + logChange k (Remote.uuid remote) InfoPresent + if importcontent + then getcontent k + else return (Just (k, True)) + Just msg -> giveup (msg ++ " to import") + + getcontent :: Key -> Annex (Maybe (Key, Bool)) + getcontent k = do + let af = AssociatedFile (Just f) + let downloader p' tmpfile = do + _ <- Remote.retrieveExportWithContentIdentifier + ia loc [cid] (fromRawFilePath tmpfile) + (Left k) + (combineMeterUpdate p' p) + ok <- moveAnnex k af tmpfile + when ok $ + logStatus k InfoPresent + return (Just (k, ok)) + checkDiskSpaceToGet k Nothing Nothing $ + notifyTransfer Download af $ + download' (Remote.uuid remote) k af Nothing stdRetry $ \p' -> + withTmp k $ downloader p' + + -- The file is small, so is added to git, so while importing + -- without content does not retrieve annexed files, it does + -- need to retrieve this file. + doimportsmall cidmap loc cid sz p = do + let downloader tmpfile = do + (k, _) <- Remote.retrieveExportWithContentIdentifier + ia loc [cid] (fromRawFilePath tmpfile) + (Right (mkkey tmpfile)) + p + case keyGitSha k of + Just sha -> do + recordcidkey cidmap cid k + return sha + Nothing -> error "internal" + checkDiskSpaceToGet tmpkey Nothing Nothing $ + withTmp tmpkey $ \tmpfile -> + tryNonAsync (downloader tmpfile) >>= \case + Right sha -> return $ Just (loc, Left sha) + Left e -> do + warning (UnquotedString (show e)) + return Nothing + where + tmpkey = importKey cid sz + mkkey tmpfile = gitShaKey <$> hashFile tmpfile + + dodownload cidmap (loc, (cid, sz)) f matcher = do + let af = AssociatedFile (Just f) + let downloader tmpfile p = do + (k, _) <- Remote.retrieveExportWithContentIdentifier + ia loc [cid] (fromRawFilePath tmpfile) + (Right (mkkey tmpfile)) + p + case keyGitSha k of + Nothing -> do + ok <- moveAnnex k af tmpfile + when ok $ do + recordcidkey cidmap cid k + logStatus k InfoPresent + logChange k (Remote.uuid remote) InfoPresent + return (Right k, ok) + Just sha -> do + recordcidkey cidmap cid k + return (Left sha, True) + let rundownload tmpfile p = tryNonAsync (downloader tmpfile p) >>= \case + Right (v, True) -> return $ Just (loc, v) + Right (_, False) -> return Nothing + Left e -> do + warning (UnquotedString (show e)) + return Nothing + checkDiskSpaceToGet tmpkey Nothing Nothing $ + notifyTransfer Download af $ + download' (Remote.uuid remote) tmpkey af Nothing stdRetry $ \p -> + withTmp tmpkey $ \tmpfile -> + metered (Just p) tmpkey bwlimit $ + const (rundownload tmpfile) + where + tmpkey = importKey cid sz + + mkkey tmpfile = do + let mi = MatchingFile FileInfo + { matchFile = f + , contentFile = tmpfile + , matchKey = Nothing + } + islargefile <- checkMatcher' matcher mi mempty + if islargefile + then do + backend <- chooseBackend f + let ks = KeySource + { keyFilename = f + , contentLocation = tmpfile + , inodeCache = Nothing + } + fst <$> genKey ks nullMeterUpdate backend + else gitShaKey <$> hashFile tmpfile + + ia = Remote.importActions remote + + bwlimit = remoteAnnexBwLimitDownload (Remote.gitconfig remote) + <|> remoteAnnexBwLimit (Remote.gitconfig remote) + + locworktreefile loc = fromRepo $ fromTopFilePath $ asTopFilePath $ + case importtreeconfig of + ImportTree -> fromImportLocation loc + ImportSubTree subdir _ -> + getTopFilePath subdir P. fromImportLocation loc + + getcidkey cidmap db cid = liftIO $ + -- Avoiding querying the database when it's empty speeds up + -- the initial import. + if CIDDb.databaseIsEmpty db + then getcidkeymap cidmap cid + else CIDDb.getContentIdentifierKeys db rs cid >>= \case + [] -> getcidkeymap cidmap cid + l -> return l + + getcidkeymap cidmap cid = + atomically $ maybeToList . M.lookup cid <$> readTVar cidmap + + recordcidkey cidmap cid k = do + liftIO $ atomically $ modifyTVar' cidmap $ + M.insert cid k + -- Only record in log now; the database will be updated + -- later from the log, and the cidmap will be used for now. + recordcidkeyinlog cid k + + recordcidkeyindb db cid k = do + liftIO $ CIDDb.recordContentIdentifier db rs cid k + recordcidkeyinlog cid k + + recordcidkeyinlog cid k = + CIDLog.recordContentIdentifier rs cid k + + rs = Remote.remoteStateHandle remote + +{- Temporary key used for import of a ContentIdentifier while downloading + - content, before generating its real key. -} +importKey :: ContentIdentifier -> Integer -> Key +importKey (ContentIdentifier cid) size = mkKey $ \k -> k + { keyName = genKeyName (decodeBS cid) + , keyVariety = OtherKey "CID" + , keySize = Just size + } + +{-- Export omits non-preferred content from the tree stored on the + -- remote. So the import will normally have that content + -- omitted (unless something else added files with the same names to the + -- special remote). + -- + -- That presents a problem: Merging the imported tree would result + -- in deletion of the files that were excluded from export. + -- To avoid that happening, this adds them back to the imported tree. + --} +addBackExportExcluded :: Remote -> Sha -> Annex Sha +addBackExportExcluded remote importtree = + getExportExcluded (Remote.uuid remote) >>= \case + [] -> return importtree + excludedlist -> inRepo $ + adjustTree + -- don't remove any + (pure . Just) + excludedlist + -- if something was imported with the same + -- name as a file that was previously + -- excluded from import, use what was imported + (\imported _excluded -> imported) + [] + importtree + +{- Match the preferred content of the remote at import time. + - + - Only keyless tokens are supported, because the keys are not known + - until an imported file is downloaded, which is too late to bother + - excluding it from an import. So prunes any tokens in the preferred + - content expression that need keys. + -} +makeImportMatcher :: Remote -> Annex (Either String (FileMatcher Annex)) +makeImportMatcher r = load preferredContentTokens >>= \case + Nothing -> return $ Right (matchAll, matcherdesc) + Just (Right v) -> return $ Right (v, matcherdesc) + Just (Left err) -> return $ Left err + where + load t = M.lookup (Remote.uuid r) . fst + <$> preferredRequiredMapsLoad' pruneImportMatcher t + matcherdesc = MatcherDesc "preferred content" + +pruneImportMatcher :: Utility.Matcher.Matcher (MatchFiles a) -> Utility.Matcher.Matcher (MatchFiles a) +pruneImportMatcher = Utility.Matcher.pruneMatcher matchNeedsKey + +{- Gets the ImportableContents from the remote. + - + - Filters out any paths that include a ".git" component, because git does + - not allow storing ".git" in a git repository. While it is possible to + - write a git tree that contains that, git will complain and refuse to + - check it out. + - + - Filters out new things not matching the FileMatcher or that are + - gitignored. However, files that are already in git get imported + - regardless. (Similar to how git add behaves on gitignored files.) + - This avoids creating a remote tracking branch that, when merged, + - would delete the files. + - + - Throws exception if unable to contact the remote. + - Returns Nothing when there is no change since last time. + -} +getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize))) +getImportableContents r importtreeconfig ci matcher = do + Remote.listImportableContents (Remote.importActions r) >>= \case + Just (ImportableContentsComplete ic) -> do + dbhandle <- opendbhandle + Just . ImportableContentsComplete + <$> filterunwanted dbhandle ic + Just (c@(ImportableContentsChunked {})) -> do + dbhandle <- opendbhandle + Just <$> filterunwantedchunked dbhandle c + Nothing -> return Nothing + where + filterunwanted dbhandle ic = ImportableContents + <$> filterM (wanted dbhandle) (importableContents ic) + <*> mapM (filterunwanted dbhandle) (importableHistory ic) + + filterunwantedchunked dbhandle c = ImportableContentsChunked + <$> filterunwantedchunk dbhandle (importableContentsChunk c) + <*> mapM (filterunwanted dbhandle) (importableHistoryComplete c) + + filterunwantedchunk dbhandle c = ImportableContentsChunk + <$> pure (importableContentsSubDir c) + <*> filterM (wantedunder dbhandle (importableContentsSubDir c)) + (importableContentsSubTree c) + <*> pure ( + importableContentsNextChunk c >>= \case + Nothing -> return Nothing + Just c' -> Just <$> filterunwantedchunk dbhandle c' + ) + + opendbhandle = do + h <- Export.openDb (Remote.uuid r) + void $ Export.updateExportTreeFromLog h + return h + + wanted dbhandle (loc, (_cid, sz)) + | ingitdir = pure False + | otherwise = + isknown <||> (matches <&&> notignored) + where + -- Checks, from least to most expensive. + ingitdir = ".git" `elem` Posix.splitDirectories (fromImportLocation loc) + matches = matchesImportLocation matcher loc sz + isknown = isKnownImportLocation dbhandle loc + notignored = notIgnoredImportLocation importtreeconfig ci loc + + wantedunder dbhandle root (loc, v) = + wanted dbhandle (importableContentsChunkFullLocation root loc, v) + +isKnownImportLocation :: Export.ExportHandle -> ImportLocation -> Annex Bool +isKnownImportLocation dbhandle loc = liftIO $ + not . null <$> Export.getExportTreeKey dbhandle loc + +matchesImportLocation :: FileMatcher Annex -> ImportLocation -> Integer -> Annex Bool +matchesImportLocation matcher loc sz = checkMatcher' matcher mi mempty + where + mi = MatchingInfo $ ProvidedInfo + { providedFilePath = Just (fromImportLocation loc) + , providedKey = Nothing + , providedFileSize = Just sz + , providedMimeType = Nothing + , providedMimeEncoding = Nothing + , providedLinkType = Nothing + } + +notIgnoredImportLocation :: ImportTreeConfig -> CheckGitIgnore -> ImportLocation -> Annex Bool +notIgnoredImportLocation importtreeconfig ci loc = not <$> checkIgnored ci f + where + f = case importtreeconfig of + ImportSubTree dir _ -> + getTopFilePath dir P. fromImportLocation loc + ImportTree -> + fromImportLocation loc diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs new file mode 100644 index 0000000000..1ef4d346b9 --- /dev/null +++ b/Annex/Ingest.hs @@ -0,0 +1,425 @@ +{- git-annex content ingestion + - + - Copyright 2010-2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Ingest ( + LockedDown(..), + LockDownConfig(..), + lockDown, + checkLockedDownWritePerms, + ingestAdd, + ingestAdd', + ingest, + ingest', + finishIngestUnlocked, + cleanOldKeys, + addSymlink, + genSymlink, + makeLink, + addUnlocked, + CheckGitIgnore(..), + gitAddParams, + addAnnexedFile, + addingExistingLink, +) where + +import Annex.Common +import Types.KeySource +import Types.FileMatcher +import Backend +import Annex.Content +import Annex.Perms +import Annex.Link +import Annex.MetaData +import Annex.CurrentBranch +import Annex.CheckIgnore +import Logs.Location +import qualified Git +import qualified Annex +import qualified Database.Keys +import Config +import Utility.InodeCache +import Annex.ReplaceFile +import Utility.Tmp +import Utility.CopyFile +import Utility.Touch +import Utility.Metered +import Git.FilePath +import Annex.InodeSentinal +import Annex.AdjustedBranch +import Annex.FileMatcher +import qualified Utility.RawFilePath as R + +import System.PosixCompat.Files (fileMode) + +data LockedDown = LockedDown + { lockDownConfig :: LockDownConfig + , keySource :: KeySource + } + deriving (Show) + +data LockDownConfig = LockDownConfig + { lockingFile :: Bool + -- ^ write bit removed during lock down + , hardlinkFileTmpDir :: Maybe RawFilePath + -- ^ hard link to temp directory + , checkWritePerms :: Bool + -- ^ check that write perms are successfully removed + } + deriving (Show) + +{- The file that's being ingested is locked down before a key is generated, + - to prevent it from being modified in between. This lock down is not + - perfect at best (and pretty weak at worst). For example, it does not + - guard against files that are already opened for write by another process. + - So, the InodeCache can be used to detect any changes that might be made + - to the file after it was locked down. + - + - When possible, the file is hard linked to a temp directory. This guards + - against some changes, like deletion or overwrite of the file, and + - allows lsof checks to be done more efficiently when adding a lot of files. + - + - Lockdown can fail if a file gets deleted, or if it's unable to remove + - write permissions, and Nothing will be returned. + -} +lockDown :: LockDownConfig-> FilePath -> Annex (Maybe LockedDown) +lockDown cfg file = either + (\e -> warning (UnquotedString (show e)) >> return Nothing) + (return . Just) + =<< lockDown' cfg file + +lockDown' :: LockDownConfig -> FilePath -> Annex (Either SomeException LockedDown) +lockDown' cfg file = tryNonAsync $ ifM crippledFileSystem + ( nohardlink + , case hardlinkFileTmpDir cfg of + Nothing -> nohardlink + Just tmpdir -> withhardlink tmpdir + ) + where + file' = toRawFilePath file + + nohardlink = do + setperms + withTSDelta $ liftIO . nohardlink' + + nohardlink' delta = do + cache <- genInodeCache file' delta + return $ LockedDown cfg $ KeySource + { keyFilename = file' + , contentLocation = file' + , inodeCache = cache + } + + withhardlink tmpdir = do + setperms + withTSDelta $ \delta -> liftIO $ do + (tmpfile, h) <- openTmpFileIn (fromRawFilePath tmpdir) $ + relatedTemplate $ "ingest-" ++ takeFileName file + hClose h + removeWhenExistsWith R.removeLink (toRawFilePath tmpfile) + withhardlink' delta tmpfile + `catchIO` const (nohardlink' delta) + + withhardlink' delta tmpfile = do + let tmpfile' = toRawFilePath tmpfile + R.createLink file' tmpfile' + cache <- genInodeCache tmpfile' delta + return $ LockedDown cfg $ KeySource + { keyFilename = file' + , contentLocation = tmpfile' + , inodeCache = cache + } + + setperms = when (lockingFile cfg) $ do + freezeContent file' + when (checkWritePerms cfg) $ do + qp <- coreQuotePath <$> Annex.getGitConfig + maybe noop (giveup . decodeBS . quote qp) + =<< checkLockedDownWritePerms file' file' + +checkLockedDownWritePerms :: RawFilePath -> RawFilePath -> Annex (Maybe StringContainingQuotedPath) +checkLockedDownWritePerms file displayfile = checkContentWritePerm file >>= return . \case + Just False -> Just $ "Unable to remove all write permissions from " + <> QuotedPath displayfile + <> " -- perhaps it has an xattr or ACL set." + _ -> Nothing + +{- Ingests a locked down file into the annex. Updates the work tree and + - index. -} +ingestAdd :: MeterUpdate -> Maybe LockedDown -> Annex (Maybe Key) +ingestAdd meterupdate ld = ingestAdd' meterupdate ld Nothing + +ingestAdd' :: MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key) +ingestAdd' _ Nothing _ = return Nothing +ingestAdd' meterupdate ld@(Just (LockedDown cfg source)) mk = do + (mk', mic) <- ingest meterupdate ld mk + case mk' of + Nothing -> return Nothing + Just k -> do + let f = keyFilename source + if lockingFile cfg + then addSymlink f k mic + else do + mode <- liftIO $ catchMaybeIO $ + fileMode <$> R.getFileStatus (contentLocation source) + stagePointerFile f mode =<< hashPointerFile k + return (Just k) + +{- Ingests a locked down file into the annex. Does not update the working + - tree or the index. -} +ingest :: MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key, Maybe InodeCache) +ingest meterupdate ld mk = ingest' Nothing meterupdate ld mk (Restage True) + +ingest' :: Maybe Backend -> MeterUpdate -> Maybe LockedDown -> Maybe Key -> Restage -> Annex (Maybe Key, Maybe InodeCache) +ingest' _ _ Nothing _ _ = return (Nothing, Nothing) +ingest' preferredbackend meterupdate (Just (LockedDown cfg source)) mk restage = withTSDelta $ \delta -> do + k <- case mk of + Nothing -> do + backend <- maybe + (chooseBackend $ keyFilename source) + return + preferredbackend + fst <$> genKey source meterupdate backend + Just k -> return k + let src = contentLocation source + ms <- liftIO $ catchMaybeIO $ R.getFileStatus src + mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms + case (mcache, inodeCache source) of + (_, Nothing) -> go k mcache + (Just newc, Just c) | compareStrong c newc -> go k mcache + _ -> failure "changed while it was being added" + where + go key mcache + | lockingFile cfg = golocked key mcache + | otherwise = gounlocked key mcache + + golocked key mcache = + tryNonAsync (moveAnnex key naf (contentLocation source)) >>= \case + Right True -> success key mcache + Right False -> giveup "failed to add content to annex" + Left e -> restoreFile (keyFilename source) key e + + -- moveAnnex uses the AssociatedFile provided to it to unlock + -- locked files when getting a file in an adjusted branch. + -- That case does not apply here, where we're adding an unlocked + -- file, so provide it nothing. + naf = AssociatedFile Nothing + + gounlocked key (Just cache) = do + -- Remove temp directory hard link first because + -- linkToAnnex falls back to copying if a file + -- already has a hard link. + cleanCruft source + cleanOldKeys (keyFilename source) key + linkToAnnex key (keyFilename source) (Just cache) >>= \case + LinkAnnexFailed -> failure "failed to link to annex" + lar -> do + finishIngestUnlocked' key source restage (Just lar) + success key (Just cache) + gounlocked _ _ = failure "failed statting file" + + success k mcache = do + genMetaData k (keyFilename source) (fmap inodeCacheToMtime mcache) + return (Just k, mcache) + + failure msg = do + warning $ QuotedPath (keyFilename source) <> " " <> UnquotedString msg + cleanCruft source + return (Nothing, Nothing) + +finishIngestUnlocked :: Key -> KeySource -> Annex () +finishIngestUnlocked key source = do + cleanCruft source + finishIngestUnlocked' key source (Restage True) Nothing + +finishIngestUnlocked' :: Key -> KeySource -> Restage -> Maybe LinkAnnexResult -> Annex () +finishIngestUnlocked' key source restage lar = do + Database.Keys.addAssociatedFile key + =<< inRepo (toTopFilePath (keyFilename source)) + populateUnlockedFiles key source restage lar + +{- Copy to any other unlocked files using the same key. + - + - When linkToAnnex did not have to do anything, the object file + - was already present, and so other unlocked files are already populated, + - and nothing needs to be done here. + -} +populateUnlockedFiles :: Key -> KeySource -> Restage -> Maybe LinkAnnexResult -> Annex () +populateUnlockedFiles _ _ _ (Just LinkAnnexNoop) = return () +populateUnlockedFiles key source restage _ = do + obj <- calcRepo (gitAnnexLocation key) + g <- Annex.gitRepo + ingestedf <- flip fromTopFilePath g + <$> inRepo (toTopFilePath (keyFilename source)) + afs <- map (`fromTopFilePath` g) <$> Database.Keys.getAssociatedFiles key + forM_ (filter (/= ingestedf) afs) $ + populatePointerFile restage key obj + +cleanCruft :: KeySource -> Annex () +cleanCruft source = when (contentLocation source /= keyFilename source) $ + liftIO $ removeWhenExistsWith R.removeLink $ contentLocation source + +-- If a worktree file was was hard linked to an annex object before, +-- modifying the file would have caused the object to have the wrong +-- content. Clean up from that. +cleanOldKeys :: RawFilePath -> Key -> Annex () +cleanOldKeys file newkey = do + g <- Annex.gitRepo + topf <- inRepo (toTopFilePath file) + ingestedf <- fromRepo $ fromTopFilePath topf + oldkeys <- filter (/= newkey) + <$> Database.Keys.getAssociatedKey topf + forM_ oldkeys $ \key -> + unlessM (isUnmodified key =<< calcRepo (gitAnnexLocation key)) $ do + caches <- Database.Keys.getInodeCaches key + unlinkAnnex key + fs <- filter (/= ingestedf) + . map (`fromTopFilePath` g) + <$> Database.Keys.getAssociatedFiles key + filterM (`sameInodeCache` caches) fs >>= \case + -- If linkToAnnex fails, the associated + -- file with the content is still present, + -- so no need for any recovery. + (f:_) -> do + ic <- withTSDelta (liftIO . genInodeCache f) + void $ linkToAnnex key f ic + _ -> logStatus key InfoMissing + +{- On error, put the file back so it doesn't seem to have vanished. + - This can be called before or after the symlink is in place. -} +restoreFile :: RawFilePath -> Key -> SomeException -> Annex a +restoreFile file key e = do + whenM (inAnnex key) $ do + liftIO $ removeWhenExistsWith R.removeLink file + -- The key could be used by other files too, so leave the + -- content in the annex, and make a copy back to the file. + obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key) + unlessM (liftIO $ copyFileExternal CopyTimeStamps obj (fromRawFilePath file)) $ + warning $ "Unable to restore content of " <> QuotedPath file <> "; it should be located in " <> QuotedPath (toRawFilePath obj) + thawContent file + throwM e + +{- Creates the symlink to the annexed content, returns the link target. -} +makeLink :: RawFilePath -> Key -> Maybe InodeCache -> Annex LinkTarget +makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do + l <- calcRepo $ gitAnnexLink file key + replaceWorkTreeFile file' $ makeAnnexLink l + + -- touch symlink to have same time as the original file, + -- as provided in the InodeCache + case mcache of + Just c -> liftIO $ touch file (inodeCacheToMtime c) False + Nothing -> noop + + return l + where + file' = fromRawFilePath file + +{- Creates the symlink to the annexed content, and stages it in git. -} +addSymlink :: RawFilePath -> Key -> Maybe InodeCache -> Annex () +addSymlink file key mcache = stageSymlink file =<< genSymlink file key mcache + +genSymlink :: RawFilePath -> Key -> Maybe InodeCache -> Annex Git.Sha +genSymlink file key mcache = do + linktarget <- makeLink file key mcache + hashSymlink linktarget + +{- Parameters to pass to git add, forcing addition of ignored files. + - + - Note that, when git add is being run on an ignored file that is already + - checked in, CheckGitIgnore True has no effect. + -} +gitAddParams :: CheckGitIgnore -> Annex [CommandParam] +gitAddParams (CheckGitIgnore True) = ifM (Annex.getRead Annex.force) + ( return [Param "-f"] + , return [] + ) +gitAddParams (CheckGitIgnore False) = return [Param "-f"] + +{- Whether a file should be added unlocked or not. Default is to not, + - unless symlinks are not supported. annex.addunlocked can override that. + - Also, when in an adjusted branch that unlocked files, always add files + - unlocked. + -} +addUnlocked :: AddUnlockedMatcher -> MatchInfo -> Bool -> Annex Bool +addUnlocked matcher mi contentpresent = + ((not . coreSymlinks <$> Annex.getGitConfig) <||> + (checkAddUnlockedMatcher matcher mi) <||> + (maybe False go . snd <$> getCurrentBranch) + ) + where + go (LinkAdjustment UnlockAdjustment) = True + go (LinkAdjustment LockAdjustment) = False + go (LinkAdjustment FixAdjustment) = False + go (LinkAdjustment UnFixAdjustment) = False + go (PresenceAdjustment _ (Just la)) = go (LinkAdjustment la) + go (PresenceAdjustment _ Nothing) = False + go (LinkPresentAdjustment UnlockPresentAdjustment) = contentpresent + go (LinkPresentAdjustment LockPresentAdjustment) = False + +{- Adds a file to the work tree for the key, and stages it in the index. + - The content of the key may be provided in a temp file, which will be + - moved into place. If no content is provided, adds an annex link but does + - not ingest the content. + - + - When the content of the key is not accepted into the annex, returns False. + -} +addAnnexedFile :: AddUnlockedMatcher -> RawFilePath -> Key -> Maybe RawFilePath -> Annex Bool +addAnnexedFile matcher file key mtmp = ifM (addUnlocked matcher mi (isJust mtmp)) + ( do + mode <- maybe + (pure Nothing) + (\tmp -> liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus tmp) + mtmp + stagePointerFile file mode =<< hashPointerFile key + Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file) + case mtmp of + Just tmp -> ifM (moveAnnex key af tmp) + ( linkunlocked mode >> return True + , writepointer mode >> return False + ) + Nothing -> ifM (inAnnex key) + ( linkunlocked mode >> return True + , writepointer mode >> return True + ) + , do + addSymlink file key Nothing + case mtmp of + Just tmp -> moveAnnex key af tmp + Nothing -> return True + ) + where + af = AssociatedFile (Just file) + mi = case mtmp of + Just tmp -> MatchingFile $ FileInfo + { contentFile = tmp + , matchFile = file + , matchKey = Just key + } + Nothing -> keyMatchInfoWithoutContent key file + + linkunlocked mode = linkFromAnnex key file mode >>= \case + LinkAnnexFailed -> writepointer mode + _ -> return () + + writepointer mode = liftIO $ writePointerFile file key mode + +{- Use with actions that add an already existing annex symlink or pointer + - file. The warning avoids a confusing situation where the file got copied + - from another git-annex repo, probably by accident. -} +addingExistingLink :: RawFilePath -> Key -> Annex a -> Annex a +addingExistingLink f k a = do + unlessM (isKnownKey k <||> inAnnex k) $ do + islink <- isJust <$> isAnnexLink f + warning $ + QuotedPath f + <> " is a git-annex " + <> if islink then "symlink." else "pointer file." + <> " Its content is not available in this repository." + <> " (Maybe " <> QuotedPath f <> " was copied from another repository?)" + a diff --git a/Annex/Init.hs b/Annex/Init.hs new file mode 100644 index 0000000000..0cb2e09019 --- /dev/null +++ b/Annex/Init.hs @@ -0,0 +1,475 @@ +{- git-annex repository initialization + - + - Copyright 2011-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Init ( + checkInitializeAllowed, + ensureInitialized, + autoInitialize, + autoInitialize', + isInitialized, + initialize, + initialize', + uninitialize, + probeCrippledFileSystem, + probeCrippledFileSystem', +) where + +import Annex.Common +import qualified Annex +import qualified Git +import qualified Git.Config +import qualified Git.Objects +import Git.Types (fromConfigValue) +import Git.ConfigTypes (SharedRepository(..)) +import qualified Annex.Branch +import qualified Database.Fsck +import Logs.UUID +import Logs.Trust.Basic +import Logs.Config +import Types.TrustLevel +import Types.RepoVersion +import Annex.Version +import Annex.Difference +import Annex.UUID +import Annex.Fixup +import Annex.Path +import Config +import Config.Files +import Config.Smudge +import qualified Upgrade.V5.Direct as Direct +import qualified Annex.AdjustedBranch as AdjustedBranch +import Remote.List.Util (remotesChanged) +import Annex.Environment +import Annex.Hook +import Annex.InodeSentinal +import Upgrade +import Annex.Tmp +import Utility.UserInfo +import Annex.Perms +#ifndef mingw32_HOST_OS +import Utility.ThreadScheduler +import qualified Utility.RawFilePath as R +import Utility.FileMode +import System.Posix.User +import qualified Utility.LockFile.Posix as Posix +#endif + +import qualified Data.Map as M +import Control.Monad.IO.Class (MonadIO) +#ifndef mingw32_HOST_OS +import System.PosixCompat.Files (ownerReadMode, isNamedPipe) +import Data.Either +import qualified System.FilePath.ByteString as P +import Control.Concurrent.Async +#endif + +data InitializeAllowed = InitializeAllowed + +checkInitializeAllowed :: (InitializeAllowed -> Annex a) -> Annex a +checkInitializeAllowed a = guardSafeToUseRepo $ noAnnexFileContent' >>= \case + Nothing -> do + checkSqliteWorks + a InitializeAllowed + Just noannexmsg -> do + warning "Initialization prevented by .noannex file (remove the file to override)" + unless (null noannexmsg) $ + warning (UnquotedString noannexmsg) + giveup "Not initialized." + +initializeAllowed :: Annex Bool +initializeAllowed = isNothing <$> noAnnexFileContent' + +noAnnexFileContent' :: Annex (Maybe String) +noAnnexFileContent' = inRepo $ + noAnnexFileContent . fmap fromRawFilePath . Git.repoWorkTree + +genDescription :: Maybe String -> Annex UUIDDesc +genDescription (Just d) = return $ UUIDDesc $ encodeBS d +genDescription Nothing = do + reldir <- liftIO . relHome . fromRawFilePath + =<< liftIO . absPath + =<< fromRepo Git.repoPath + hostname <- fromMaybe "" <$> liftIO getHostname + let at = if null hostname then "" else "@" + v <- liftIO myUserName + return $ UUIDDesc $ encodeBS $ concat $ case v of + Right username -> [username, at, hostname, ":", reldir] + Left _ -> [hostname, ":", reldir] + +initialize :: Annex () -> Maybe String -> Maybe RepoVersion -> Annex () +initialize startupannex mdescription mversion = checkInitializeAllowed $ \initallowed -> do + {- Has to come before any commits are made as the shared + - clone heuristic expects no local objects. -} + sharedclone <- checkSharedClone + + {- This will make the first commit to git, so ensure git is set up + - properly to allow commits when running it. -} + ensureCommit $ Annex.Branch.create + + prepUUID + initialize' startupannex mversion initallowed + + initSharedClone sharedclone + + u <- getUUID + when (u == NoUUID) $ + giveup "Failed to read annex.uuid from git config after setting it. This should never happen. Please file a bug report." + + {- Avoid overwriting existing description with a default + - description. -} + whenM (pure (isJust mdescription) <||> not . M.member u <$> uuidDescMapRaw) $ + describeUUID u =<< genDescription mdescription + +-- Everything except for uuid setup, shared clone setup, and initial +-- description. +initialize' :: Annex () -> Maybe RepoVersion -> InitializeAllowed -> Annex () +initialize' startupannex mversion _initallowed = do + checkLockSupport + checkFifoSupport + checkCrippledFileSystem + unlessM isBareRepo $ do + hookWrite preCommitHook + hookWrite postReceiveHook + setDifferences + unlessM (isJust <$> getVersion) $ + setVersion (fromMaybe defaultVersion mversion) + supportunlocked <- annexSupportUnlocked <$> Annex.getGitConfig + if supportunlocked + then configureSmudgeFilter + else deconfigureSmudgeFilter + unlessM isBareRepo $ do + hookWrite postCheckoutHook + hookWrite postMergeHook + + AdjustedBranch.checkAdjustedClone >>= \case + AdjustedBranch.InAdjustedClone -> return () + AdjustedBranch.NotInAdjustedClone -> + ifM (crippledFileSystem <&&> (not <$> isBareRepo)) + ( AdjustedBranch.adjustToCrippledFileSystem + -- Handle case where this repo was cloned from a + -- direct mode repo + , unlessM isBareRepo + Direct.switchHEADBack + ) + propigateSecureHashesOnly + createInodeSentinalFile False + fixupUnusualReposAfterInit + + -- This is usually run at Annex startup, but when git-annex was + -- not already initialized, it will not yet have run. + startupannex + +uninitialize :: Annex () +uninitialize = do + -- Remove hooks that are written when initializing. + hookUnWrite preCommitHook + hookUnWrite postReceiveHook + hookUnWrite postCheckoutHook + hookUnWrite postMergeHook + deconfigureSmudgeFilter + removeRepoUUID + removeVersion + +{- Gets the version that the repo is initialized with. + - + - To make sure the repo is fully initialized, also checks that it has a + - uuid configured. In the unusual case where one is set and the other is + - not, errors out to avoid running in an inconsistent state. + -} +getInitializedVersion :: Annex (Maybe RepoVersion) +getInitializedVersion = do + um <- (\u -> if u == NoUUID then Nothing else Just u) <$> getUUID + vm <- getVersion + case (um, vm) of + (Just _, Just v) -> return (Just v) + (Nothing, Nothing) -> return Nothing + (Just _, Nothing) -> onemissing "annex.version" "annex.uuid" + (Nothing, Just _) -> onemissing "annex.uuid" "annex.version" + where + onemissing missing have = giveup $ unwords + [ "This repository has " ++ have ++ " set," + , "but " ++ missing ++ " is not set. Perhaps that" + , "git config was lost. Cannot use the repository" + , "in this state; set back " ++ missing ++ " to fix this." + ] + +{- Will automatically initialize if there is already a git-annex + - branch from somewhere. Otherwise, require a manual init + - to avoid git-annex accidentally being run in git + - repos that did not intend to use it. + - + - Checks repository version and handles upgrades too. + -} +ensureInitialized :: Annex () -> Annex [Remote] -> Annex () +ensureInitialized startupannex remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade + where + needsinit = ifM autoInitializeAllowed + ( do + tryNonAsync (initialize startupannex Nothing Nothing) >>= \case + Right () -> noop + Left e -> giveup $ show e ++ "\n" ++ + "git-annex: automatic initialization failed due to above problems" + autoEnableSpecialRemotes remotelist + , giveup "First run: git-annex init" + ) + +{- Check if auto-initialize is allowed. -} +autoInitializeAllowed :: Annex Bool +autoInitializeAllowed = Annex.Branch.hasSibling <&&> objectDirNotPresent + +objectDirNotPresent :: Annex Bool +objectDirNotPresent = do + d <- fromRawFilePath <$> fromRepo gitAnnexObjectDir + exists <- liftIO $ doesDirectoryExist d + when exists $ guardSafeToUseRepo $ + giveup $ unwords $ + [ "This repository is not initialized for use" + , "by git-annex, but " ++ d ++ " exists," + , "which indicates this repository was used by" + , "git-annex before, and may have lost its" + , "annex.uuid and annex.version configs. Either" + , "set back missing configs, or run git-annex init" + , "to initialize with a new uuid." + ] + return (not exists) + +guardSafeToUseRepo :: Annex a -> Annex a +guardSafeToUseRepo a = ifM (inRepo Git.Config.checkRepoConfigInaccessible) + ( do + repopath <- fromRepo Git.repoPath + p <- liftIO $ absPath repopath + giveup $ unlines $ + [ "Git refuses to operate in this repository," + , "probably because it is owned by someone else." + , "" + -- This mirrors git's wording. + , "To add an exception for this directory, call:" + , "\tgit config --global --add safe.directory " ++ fromRawFilePath p + ] + , a + ) + +{- Initialize if it can do so automatically. Avoids failing if it cannot. + - + - Checks repository version and handles upgrades too. + -} +autoInitialize :: Annex () -> Annex [Remote] -> Annex () +autoInitialize = autoInitialize' autoInitializeAllowed + +autoInitialize' :: Annex Bool -> Annex () -> Annex [Remote] -> Annex () +autoInitialize' check startupannex remotelist = + getInitializedVersion >>= maybe needsinit checkUpgrade + where + needsinit = + whenM (initializeAllowed <&&> check) $ do + initialize startupannex Nothing Nothing + autoEnableSpecialRemotes remotelist + +{- Checks if a repository is initialized. Does not check version for upgrade. -} +isInitialized :: Annex Bool +isInitialized = maybe Annex.Branch.hasSibling (const $ return True) =<< getVersion + +{- A crippled filesystem is one that does not allow making symlinks, + - or removing write access from files. -} +probeCrippledFileSystem :: Annex Bool +probeCrippledFileSystem = withEventuallyCleanedOtherTmp $ \tmp -> do + (r, warnings) <- probeCrippledFileSystem' tmp + (Just (freezeContent' UnShared)) + (Just (thawContent' UnShared)) + =<< hasFreezeHook + mapM_ (warning . UnquotedString) warnings + return r + +probeCrippledFileSystem' + :: (MonadIO m, MonadCatch m) + => RawFilePath + -> Maybe (RawFilePath -> m ()) + -> Maybe (RawFilePath -> m ()) + -> Bool + -> m (Bool, [String]) +#ifdef mingw32_HOST_OS +probeCrippledFileSystem' _ _ _ _ = return (True, []) +#else +probeCrippledFileSystem' tmp freezecontent thawcontent hasfreezehook = do + let f = tmp P. "gaprobe" + let f' = fromRawFilePath f + liftIO $ writeFile f' "" + r <- probe f' + void $ tryNonAsync $ (fromMaybe (liftIO . allowWrite) thawcontent) f + liftIO $ removeFile f' + return r + where + probe f = catchDefaultIO (True, []) $ do + let f2 = f ++ "2" + liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f2) + liftIO $ R.createSymbolicLink (toRawFilePath f) (toRawFilePath f2) + liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f2) + (fromMaybe (liftIO . preventWrite) freezecontent) (toRawFilePath f) + -- Should be unable to write to the file (unless + -- running as root). But some crippled + -- filesystems ignore write bit removals or ignore + -- permissions entirely. + ifM ((== Just False) <$> liftIO (checkContentWritePerm' UnShared (toRawFilePath f) Nothing hasfreezehook)) + ( return (True, ["Filesystem does not allow removing write bit from files."]) + , liftIO $ ifM ((== 0) <$> getRealUserID) + ( return (False, []) + , do + r <- catchBoolIO $ do + writeFile f "2" + return True + if r + then return (True, ["Filesystem allows writing to files whose write bit is not set."]) + else return (False, []) + ) + ) +#endif + +checkCrippledFileSystem :: Annex () +checkCrippledFileSystem = whenM probeCrippledFileSystem $ do + warning "Detected a crippled filesystem." + setCrippledFileSystem True + + {- Normally git disables core.symlinks itself when the:w + - + - filesystem does not support them. But, even if symlinks are + - supported, we don't use them by default in a crippled + - filesystem. -} + whenM (coreSymlinks <$> Annex.getGitConfig) $ do + warning "Disabling core.symlinks." + setConfig "core.symlinks" + (Git.Config.boolConfig False) + +probeLockSupport :: Annex Bool +#ifdef mingw32_HOST_OS +probeLockSupport = return True +#else +probeLockSupport = withEventuallyCleanedOtherTmp $ \tmp -> do + let f = tmp P. "lockprobe" + mode <- annexFileMode + annexrunner <- Annex.makeRunner + liftIO $ withAsync (warnstall annexrunner) (const (go f mode)) + where + go f mode = do + removeWhenExistsWith R.removeLink f + let locktest = bracket + (Posix.lockExclusive (Just mode) f) + Posix.dropLock + (const noop) + ok <- isRight <$> tryNonAsync locktest + removeWhenExistsWith R.removeLink f + return ok + + warnstall annexrunner = do + threadDelaySeconds (Seconds 10) + annexrunner $ do + warning "Probing the filesystem for POSIX fcntl lock support is taking a long time." + warning "(Setting annex.pidlock will avoid this probe.)" +#endif + +probeFifoSupport :: Annex Bool +probeFifoSupport = do +#ifdef mingw32_HOST_OS + return False +#else + withEventuallyCleanedOtherTmp $ \tmp -> do + let f = tmp P. "gaprobe" + let f2 = tmp P. "gaprobe2" + liftIO $ do + removeWhenExistsWith R.removeLink f + removeWhenExistsWith R.removeLink f2 + ms <- tryIO $ do + R.createNamedPipe f ownerReadMode + R.createLink f f2 + R.getFileStatus f + removeWhenExistsWith R.removeLink f + removeWhenExistsWith R.removeLink f2 + return $ either (const False) isNamedPipe ms +#endif + +checkLockSupport :: Annex () +checkLockSupport = + unlessM (annexPidLock <$> Annex.getGitConfig) $ + unlessM probeLockSupport $ do + warning "Detected a filesystem without POSIX fcntl lock support." + warning "Enabling annex.pidlock." + setConfig (annexConfig "pidlock") (Git.Config.boolConfig True) + +checkFifoSupport :: Annex () +checkFifoSupport = unlessM probeFifoSupport $ do + warning "Detected a filesystem without fifo support." + warning "Disabling ssh connection caching." + setConfig (annexConfig "sshcaching") (Git.Config.boolConfig False) + +{- Sqlite needs the filesystem to support range locking. Some like CIFS + - do not, which will cause sqlite to fail with ErrorBusy. -} +checkSqliteWorks :: Annex () +checkSqliteWorks = do + u <- getUUID + tryNonAsync (Database.Fsck.openDb u >>= Database.Fsck.closeDb) >>= \case + Right () -> return () + Left e -> do + showLongNote $ "Detected a filesystem where Sqlite does not work." + showLongNote $ UnquotedString $ "(" ++ show e ++ ")" + showLongNote $ "To work around this problem, you can set annex.dbdir " <> + "to a directory on another filesystem." + showLongNote $ "For example: git config annex.dbdir $HOME/cache/git-annex" + giveup "Not initialized." + +checkSharedClone :: Annex Bool +checkSharedClone = inRepo Git.Objects.isSharedClone + +initSharedClone :: Bool -> Annex () +initSharedClone False = return () +initSharedClone True = do + showLongNote "Repository was cloned with --shared; setting annex.hardlink=true and making repository untrusted." + u <- getUUID + trustSet u UnTrusted + setConfig (annexConfig "hardlink") (Git.Config.boolConfig True) + +{- Propagate annex.securehashesonly from then global config to local + - config. This makes a clone inherit a parent's setting, but once + - a repository has a local setting, changes to the global config won't + - affect it. -} +propigateSecureHashesOnly :: Annex () +propigateSecureHashesOnly = + maybe noop (setConfig "annex.securehashesonly" . fromConfigValue) + =<< getGlobalConfig "annex.securehashesonly" + +fixupUnusualReposAfterInit :: Annex () +fixupUnusualReposAfterInit = do + gc <- Annex.getGitConfig + void $ inRepo $ \r -> fixupUnusualRepos r gc + +{- Try to enable any special remotes that are configured to do so. + - + - The enabling is done in a child process to avoid it using stdio. + - + - The remotelist should be Remote.List.remoteList, which cannot + - be imported here due to a dependency loop. + -} +autoEnableSpecialRemotes :: Annex [Remote] -> Annex () +autoEnableSpecialRemotes remotelist = do + -- Get all existing git remotes to probe for their uuid here, + -- so it is not done inside the child process. Doing it in there + -- could result in password prompts for http credentials, + -- which would then not end up cached in this process's state. + _ <- remotelist + rp <- fromRawFilePath <$> fromRepo Git.repoPath + withNullHandle $ \nullh -> gitAnnexChildProcess "init" + [ Param "--autoenable" ] + (\p -> p + { std_out = UseHandle nullh + , std_err = UseHandle nullh + , std_in = UseHandle nullh + , cwd = Just rp + } + ) + (\_ _ _ pid -> void $ waitForProcess pid) + remotesChanged diff --git a/Annex/InodeSentinal.hs b/Annex/InodeSentinal.hs new file mode 100644 index 0000000000..129dd08b71 --- /dev/null +++ b/Annex/InodeSentinal.hs @@ -0,0 +1,112 @@ +{- git-annex inode sentinal file + - + - Copyright 2012-2015 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Annex.InodeSentinal where + +import Annex.Common +import qualified Annex +import Utility.InodeCache +import Annex.Perms + +{- If the sendinal shows the inodes have changed, only the size and mtime + - are compared. -} +compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool +compareInodeCaches x y + | compareStrong x y = return True + | otherwise = ifM inodesChanged + ( return $ compareWeak x y + , return False + ) + +compareInodeCachesWith :: Annex InodeComparisonType +compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly ) + +{- Checks if one of the provided old InodeCache matches the current + - version of a file. -} +sameInodeCache :: RawFilePath -> [InodeCache] -> Annex Bool +sameInodeCache file [] = do + fastDebug "Annex.InodeSentinal" $ + fromRawFilePath file ++ " inode cache empty" + return False +sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file) + where + go Nothing = do + fastDebug "Annex.InodeSentinal" $ + fromRawFilePath file ++ " not present, cannot compare with inode cache" + return False + go (Just curr) = ifM (elemInodeCaches curr old) + ( return True + , do + fastDebug "Annex.InodeSentinal" $ + fromRawFilePath file ++ " (" ++ show curr ++ ") does not match inode cache (" ++ show old ++ ")" + return False + ) + +elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool +elemInodeCaches _ [] = return False +elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l) + ( return True + , elemInodeCaches c ls + ) + +{- Some filesystems get new inodes each time they are mounted. + - In order to work on such a filesystem, a sentinal file is used to detect + - when the inodes have changed. + - + - If the sentinal file does not exist, we have to assume that the + - inodes have changed. + -} +inodesChanged :: Annex Bool +inodesChanged = sentinalInodesChanged <$> sentinalStatus + +withTSDelta :: (TSDelta -> Annex a) -> Annex a +withTSDelta a = a =<< getTSDelta + +getTSDelta :: Annex TSDelta +#ifdef mingw32_HOST_OS +getTSDelta = sentinalTSDelta <$> sentinalStatus +#else +getTSDelta = pure noTSDelta -- optimisation +#endif + +sentinalStatus :: Annex SentinalStatus +sentinalStatus = maybe check return =<< Annex.getState Annex.sentinalstatus + where + check = do + sc <- liftIO . checkSentinalFile =<< annexSentinalFile + Annex.changeState $ \s -> s { Annex.sentinalstatus = Just sc } + return sc + +{- The sentinal file is only created when first initializing a repository. + - If there are any annexed objects in the repository already, creating + - the file would invalidate their inode caches. -} +createInodeSentinalFile :: Bool -> Annex () +createInodeSentinalFile evenwithobjects = + unlessM (alreadyexists <||> hasobjects) $ do + s <- annexSentinalFile + createAnnexDirectory (parentDir (sentinalFile s)) + liftIO $ writeSentinalFile s + setAnnexFilePerm (sentinalFile s) + setAnnexFilePerm (sentinalCacheFile s) + where + alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile + hasobjects + | evenwithobjects = pure False + | otherwise = liftIO . doesDirectoryExist . fromRawFilePath + =<< fromRepo gitAnnexObjectDir + +annexSentinalFile :: Annex SentinalFile +annexSentinalFile = do + sentinalfile <- fromRepo gitAnnexInodeSentinal + sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache + return SentinalFile + { sentinalFile = sentinalfile + , sentinalCacheFile = sentinalcachefile + } diff --git a/Annex/Journal.hs b/Annex/Journal.hs new file mode 100644 index 0000000000..8eb1dc880f --- /dev/null +++ b/Annex/Journal.hs @@ -0,0 +1,303 @@ +{- management of the git-annex journal + - + - The journal is used to queue up changes before they are committed to the + - git-annex branch. Among other things, it ensures that if git-annex is + - interrupted, its recorded data is not lost. + - + - All files in the journal must be a series of lines separated by + - newlines. + - + - Copyright 2011-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE CPP #-} + +module Annex.Journal where + +import Annex.Common +import qualified Annex +import qualified Git +import Annex.Perms +import Annex.Tmp +import Annex.LockFile +import Annex.BranchState +import Types.BranchState +import Utility.Directory.Stream +import qualified Utility.RawFilePath as R + +import qualified Data.Set as S +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as B +import qualified System.FilePath.ByteString as P +import Data.ByteString.Builder +import Data.Char + +class Journalable t where + writeJournalHandle :: Handle -> t -> IO () + journalableByteString :: t -> L.ByteString + +instance Journalable L.ByteString where + writeJournalHandle = L.hPut + journalableByteString = id + +-- This is more efficient than the ByteString instance. +instance Journalable Builder where + writeJournalHandle = hPutBuilder + journalableByteString = toLazyByteString + +{- When a file in the git-annex branch is changed, this indicates what + - repository UUID (or in some cases, UUIDs) a change is regarding. + - + - Using this lets changes regarding private UUIDs be stored separately + - from the git-annex branch, so its information does not get exposed + - outside the repo. + -} +data RegardingUUID = RegardingUUID [UUID] + +regardingPrivateUUID :: RegardingUUID -> Annex Bool +regardingPrivateUUID (RegardingUUID []) = pure False +regardingPrivateUUID (RegardingUUID us) = do + s <- annexPrivateRepos <$> Annex.getGitConfig + return (any (flip S.member s) us) + +{- Are any private UUIDs known to exist? If so, extra work has to be done, + - to check for information separately recorded for them, outside the usual + - locations. + -} +privateUUIDsKnown :: Annex Bool +privateUUIDsKnown = privateUUIDsKnown' <$> Annex.getState id + +privateUUIDsKnown' :: Annex.AnnexState -> Bool +privateUUIDsKnown' = not . S.null . annexPrivateRepos . Annex.gitconfig + +{- Records content for a file in the branch to the journal. + - + - Using the journal, rather than immediately staging content to the index + - avoids git needing to rewrite the index after every change. + - + - The file in the journal is updated atomically. This avoids an + - interrupted write truncating information that was earlier read from the + - file, and so losing data. + -} +setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex () +setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do + st <- getState + jd <- fromRepo =<< ifM (regardingPrivateUUID ru) + ( return (gitAnnexPrivateJournalDir st) + , return (gitAnnexJournalDir st) + ) + -- journal file is written atomically + let jfile = journalFile file + let tmpfile = tmp P. jfile + liftIO $ withFile (fromRawFilePath tmpfile) WriteMode $ \h -> + writeJournalHandle h content + let dest = jd P. jfile + let mv = do + liftIO $ moveFile tmpfile dest + setAnnexFilePerm dest + -- avoid overhead of creating the journal directory when it already + -- exists + mv `catchIO` (const (createAnnexDirectory jd >> mv)) + +newtype AppendableJournalFile = AppendableJournalFile (RawFilePath, RawFilePath) + +{- If the journal file does not exist, it cannot be appended to, because + - that would overwrite whatever content the file has in the git-annex + - branch. -} +checkCanAppendJournalFile :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex (Maybe AppendableJournalFile) +checkCanAppendJournalFile _jl ru file = do + st <- getState + jd <- fromRepo =<< ifM (regardingPrivateUUID ru) + ( return (gitAnnexPrivateJournalDir st) + , return (gitAnnexJournalDir st) + ) + let jfile = jd P. journalFile file + ifM (liftIO $ R.doesPathExist jfile) + ( return (Just (AppendableJournalFile (jd, jfile))) + , return Nothing + ) + +{- Appends content to an existing journal file. + - + - Appends are not necessarily atomic, though short appends often are. + - So, when this is interrupted, it can leave only part of the content + - written to the file. To deal with that situation, both this and + - getJournalFileStale check if the file ends with a newline, and if + - not discard the incomplete line. + - + - Due to the lack of atomicity, this should not be used when multiple + - lines need to be written to the file as an atomic unit. + -} +appendJournalFile :: Journalable content => JournalLocked -> AppendableJournalFile -> content -> Annex () +appendJournalFile _jl (AppendableJournalFile (jd, jfile)) content = do + let write = liftIO $ withFile (fromRawFilePath jfile) ReadWriteMode $ \h -> do + sz <- hFileSize h + when (sz /= 0) $ do + hSeek h SeekFromEnd (-1) + lastchar <- B.hGet h 1 + unless (lastchar == "\n") $ do + hSeek h AbsoluteSeek 0 + goodpart <- L.length . discardIncompleteAppend + <$> L.hGet h (fromIntegral sz) + hSetFileSize h (fromIntegral goodpart) + hSeek h SeekFromEnd 0 + writeJournalHandle h content + write `catchIO` (const (createAnnexDirectory jd >> write)) + +data JournalledContent + = NoJournalledContent + | JournalledContent L.ByteString + | PossiblyStaleJournalledContent L.ByteString + -- ^ This is used when the journalled content may have been + -- supersceded by content in the git-annex branch. The returned + -- content should be combined with content from the git-annex branch. + -- This is particularly the case when a file is in the private + -- journal, which does not get written to the git-annex branch, + -- and so the git-annex branch can contain changes to non-private + -- information that were made after that journal file was written. + +{- Gets any journalled content for a file in the branch. -} +getJournalFile :: JournalLocked -> GetPrivate -> RawFilePath -> Annex JournalledContent +getJournalFile _jl = getJournalFileStale + +data GetPrivate = GetPrivate Bool + +{- Without locking, this is not guaranteed to be the most recent + - content of the file in the journal, so should not be used as a basis for + - making changes to the file. + - + - The file is read strictly so that its content can safely be fed into + - an operation that modifies the file (when getJournalFile calls this). + - The minor loss of laziness doesn't matter much, as the files are not + - very large. + - + - To recover from an append of a line that is interrupted part way through + - (or is in progress when this is called), if the file content does not end + - with a newline, it is truncated back to the previous newline. + -} +getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent +getJournalFileStale (GetPrivate getprivate) file = do + st <- Annex.getState id + let repo = Annex.repo st + bs <- getState + liftIO $ + if getprivate && privateUUIDsKnown' st + then do + x <- getfrom (gitAnnexJournalDir bs repo) + getfrom (gitAnnexPrivateJournalDir bs repo) >>= \case + Nothing -> return $ case x of + Nothing -> NoJournalledContent + Just b -> JournalledContent b + Just y -> return $ PossiblyStaleJournalledContent $ case x of + Nothing -> y + -- This concacenation is the same as + -- happens in a merge of two + -- git-annex branches. + Just x' -> x' <> y + else getfrom (gitAnnexJournalDir bs repo) >>= return . \case + Nothing -> NoJournalledContent + Just b -> JournalledContent b + where + jfile = journalFile file + getfrom d = catchMaybeIO $ + discardIncompleteAppend . L.fromStrict + <$> B.readFile (fromRawFilePath (d P. jfile)) + +-- Note that this forces read of the whole lazy bytestring. +discardIncompleteAppend :: L.ByteString -> L.ByteString +discardIncompleteAppend v + | L.null v = v + | L.last v == nl = v + | otherwise = dropwhileend (/= nl) v + where + nl = fromIntegral (ord '\n') +#if MIN_VERSION_bytestring(0,11,2) + dropwhileend = L.dropWhileEnd +#else + dropwhileend p = L.reverse . L.dropWhile p . L.reverse +#endif + +{- List of existing journal files in a journal directory, but without locking, + - may miss new ones just being added, or may have false positives if the + - journal is staged as it is run. -} +getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath] +getJournalledFilesStale getjournaldir = do + bs <- getState + repo <- Annex.gitRepo + let d = getjournaldir bs repo + fs <- liftIO $ catchDefaultIO [] $ + getDirectoryContents (fromRawFilePath d) + return $ filter (`notElem` [".", ".."]) $ + map (fileJournal . toRawFilePath) fs + +{- Directory handle open on a journal directory. -} +withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a +withJournalHandle getjournaldir a = do + bs <- getState + repo <- Annex.gitRepo + let d = getjournaldir bs repo + bracket (opendir d) (liftIO . closeDirectory) (liftIO . a) + where + -- avoid overhead of creating the journal directory when it already + -- exists + opendir d = liftIO (openDirectory (fromRawFilePath d)) + `catchIO` (const (createAnnexDirectory d >> opendir d)) + +{- Checks if there are changes in the journal. -} +journalDirty :: (BranchState -> Git.Repo -> RawFilePath) -> Annex Bool +journalDirty getjournaldir = do + st <- getState + d <- fromRawFilePath <$> fromRepo (getjournaldir st) + liftIO $ + (not <$> isDirectoryEmpty d) + `catchIO` (const $ doesDirectoryExist d) + +{- Produces a filename to use in the journal for a file on the branch. + - The filename does not include the journal directory. + - + - The journal typically won't have a lot of files in it, so the hashing + - used in the branch is not necessary, and all the files are put directly + - in the journal directory. + -} +journalFile :: RawFilePath -> RawFilePath +journalFile file = B.concatMap mangle file + where + mangle c + | P.isPathSeparator c = B.singleton underscore + | c == underscore = B.pack [underscore, underscore] + | otherwise = B.singleton c + underscore = fromIntegral (ord '_') + +{- Converts a journal file (relative to the journal dir) back to the + - filename on the branch. -} +fileJournal :: RawFilePath -> RawFilePath +fileJournal = go + where + go b = + let (h, t) = B.break (== underscore) b + in h <> case B.uncons t of + Nothing -> t + Just (_u, t') -> case B.uncons t' of + Nothing -> t' + Just (w, t'') + | w == underscore -> + B.cons underscore (go t'') + | otherwise -> + B.cons P.pathSeparator (go t') + + underscore = fromIntegral (ord '_') + +{- Sentinal value, only produced by lockJournal; required + - as a parameter by things that need to ensure the journal is + - locked. -} +data JournalLocked = ProduceJournalLocked + +{- Runs an action that modifies the journal, using locking to avoid + - contention with other git-annex processes. -} +lockJournal :: (JournalLocked -> Annex a) -> Annex a +lockJournal a = do + lck <- fromRepo gitAnnexJournalLock + withExclusiveLock lck $ a ProduceJournalLocked diff --git a/Annex/Link.hs b/Annex/Link.hs new file mode 100644 index 0000000000..4961499f62 --- /dev/null +++ b/Annex/Link.hs @@ -0,0 +1,476 @@ +{- git-annex links to content + - + - On file systems that support them, symlinks are used. + - + - On other filesystems, git instead stores the symlink target in a regular + - file. + - + - Pointer files are used instead of symlinks for unlocked files. + - + - Copyright 2013-2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP, BangPatterns, OverloadedStrings #-} + +module Annex.Link where + +import Annex.Common +import qualified Annex +import qualified Annex.Queue +import qualified Git.Queue +import qualified Git.UpdateIndex +import qualified Git.Index +import qualified Git.LockFile +import qualified Git.Env +import qualified Git +import Logs.Restage +import Git.Types +import Git.FilePath +import Git.Config +import Annex.HashObject +import Annex.InodeSentinal +import Annex.PidLock +import Utility.FileMode +import Utility.InodeCache +import Utility.Tmp.Dir +import Utility.CopyFile +import qualified Database.Keys.Handle +import qualified Utility.RawFilePath as R + +import qualified Data.ByteString as S +import qualified Data.ByteString.Char8 as S8 +import qualified Data.ByteString.Lazy as L +import qualified System.FilePath.ByteString as P +#ifndef mingw32_HOST_OS +#if MIN_VERSION_unix(2,8,0) +#else +import System.PosixCompat.Files (isSymbolicLink) +#endif +#endif + +type LinkTarget = S.ByteString + +{- Checks if a file is a link to a key. -} +isAnnexLink :: RawFilePath -> Annex (Maybe Key) +isAnnexLink file = maybe Nothing parseLinkTargetOrPointer <$> getAnnexLinkTarget file + +{- Gets the link target of a symlink. + - + - On a filesystem that does not support symlinks, fall back to getting the + - link target by looking inside the file. + - + - Returns Nothing if the file is not a symlink, or not a link to annex + - content. + -} +getAnnexLinkTarget :: RawFilePath -> Annex (Maybe LinkTarget) +getAnnexLinkTarget f = getAnnexLinkTarget' f + =<< (coreSymlinks <$> Annex.getGitConfig) + +{- Pass False to force looking inside file, for when git checks out + - symlinks as plain files. -} +getAnnexLinkTarget' :: RawFilePath -> Bool -> Annex (Maybe S.ByteString) +getAnnexLinkTarget' file coresymlinks = if coresymlinks + then check probesymlink $ + return Nothing + else check probesymlink $ + check probefilecontent $ + return Nothing + where + check getlinktarget fallback = + liftIO (catchMaybeIO getlinktarget) >>= \case + Just l + | isLinkToAnnex l -> return (Just l) + | otherwise -> return Nothing + Nothing -> fallback + + probesymlink = R.readSymbolicLink file + + probefilecontent = withFile (fromRawFilePath file) ReadMode $ \h -> do + s <- S.hGet h maxSymlinkSz + -- If we got the full amount, the file is too large + -- to be a symlink target. + return $ if S.length s == maxSymlinkSz + then mempty + else + -- If there are any NUL or newline + -- characters, or whitespace, we + -- certainly don't have a symlink to a + -- git-annex key. + if any (`S8.elem` s) ("\0\n\r \t" :: [Char]) + then mempty + else s + +makeAnnexLink :: LinkTarget -> RawFilePath -> Annex () +makeAnnexLink = makeGitLink + +{- Creates a link on disk. + - + - On a filesystem that does not support symlinks, writes the link target + - to a file. Note that git will only treat the file as a symlink if + - it's staged as such, so use addAnnexLink when adding a new file or + - modified link to git. + -} +makeGitLink :: LinkTarget -> RawFilePath -> Annex () +makeGitLink linktarget file = ifM (coreSymlinks <$> Annex.getGitConfig) + ( liftIO $ do + void $ tryIO $ R.removeLink file + R.createSymbolicLink linktarget file + , liftIO $ S.writeFile (fromRawFilePath file) linktarget + ) + +{- Creates a link on disk, and additionally stages it in git. -} +addAnnexLink :: LinkTarget -> RawFilePath -> Annex () +addAnnexLink linktarget file = do + makeAnnexLink linktarget file + stageSymlink file =<< hashSymlink linktarget + +{- Injects a symlink target into git, returning its Sha. -} +hashSymlink :: LinkTarget -> Annex Sha +hashSymlink = hashBlob . toInternalGitPath + +{- Stages a symlink to an annexed object, using a Sha of its target. -} +stageSymlink :: RawFilePath -> Sha -> Annex () +stageSymlink file sha = + Annex.Queue.addUpdateIndex =<< + inRepo (Git.UpdateIndex.stageSymlink file sha) + +{- Injects a pointer file content into git, returning its Sha. -} +hashPointerFile :: Key -> Annex Sha +hashPointerFile key = hashBlob $ formatPointer key + +{- Stages a pointer file, using a Sha of its content -} +stagePointerFile :: RawFilePath -> Maybe FileMode -> Sha -> Annex () +stagePointerFile file mode sha = + Annex.Queue.addUpdateIndex =<< + inRepo (Git.UpdateIndex.stageFile sha treeitemtype file) + where + treeitemtype + | maybe False isExecutable mode = TreeExecutable + | otherwise = TreeFile + +writePointerFile :: RawFilePath -> Key -> Maybe FileMode -> IO () +writePointerFile file k mode = do + S.writeFile (fromRawFilePath file) (formatPointer k) + maybe noop (R.setFileMode file) mode + +newtype Restage = Restage Bool + +{- Restage pointer file. This is used after updating a worktree file + - when content is added/removed, to prevent git status from showing + - it as modified. + - + - The InodeCache is for the worktree file. It is used to detect when + - the worktree file is changed by something else before git update-index + - gets to look at it. + - + - Asks git to refresh its index information for the file. + - That in turn runs the clean filter on the file; when the clean + - filter produces the same pointer that was in the index before, git + - realizes that the file has not actually been modified. + - + - Note that, if the pointer file is staged for deletion, or has different + - content than the current worktree content staged, this won't change + - that. So it's safe to call at any time and any situation. + - + - If the index is known to be locked (eg, git add has run git-annex), + - that would fail. Restage False will prevent the index being updated, + - and will store it in the restage log. Displays a message to help the + - user understand why the file will appear to be modified. + - + - This uses the git queue, so the update is not performed immediately, + - and this can be run multiple times cheaply. Using the git queue also + - prevents building up too large a number of updates when many files + - are being processed. It's also recorded in the restage log so that, + - if the process is interrupted before the git queue is fulushed, the + - restage will be taken care of later. + -} +restagePointerFile :: Restage -> RawFilePath -> InodeCache -> Annex () +restagePointerFile (Restage False) f orig = do + flip writeRestageLog orig =<< inRepo (toTopFilePath f) + toplevelWarning True $ unableToRestage $ Just f +restagePointerFile (Restage True) f orig = do + flip writeRestageLog orig =<< inRepo (toTopFilePath f) + -- Avoid refreshing the index if run by the + -- smudge clean filter, because git uses that when + -- it's already refreshing the index, probably because + -- this very action is running. Running it again would likely + -- deadlock. + unlessM (Annex.getState Annex.insmudgecleanfilter) $ + Annex.Queue.addFlushAction restagePointerFileRunner [f] + +restagePointerFileRunner :: Git.Queue.FlushActionRunner Annex +restagePointerFileRunner = + Git.Queue.FlushActionRunner "restagePointerFiles" $ \r _fs -> + restagePointerFiles r + +-- Restage all files in the restage log that have not been modified. +-- +-- Other changes to the files may have been staged before this +-- gets a chance to run. To avoid a race with any staging of +-- changes, first lock the index file. Then run git update-index +-- on all still-unmodified files, using a copy of the index file, +-- to bypass the lock. Then replace the old index file with the new +-- updated index file. +restagePointerFiles :: Git.Repo -> Annex () +restagePointerFiles r = unlessM (Annex.getState Annex.insmudgecleanfilter) $ do + -- Flush any queued changes to the keys database, so they + -- are visible to child processes. + -- The database is closed because that may improve behavior + -- when run in Windows's WSL1, which has issues with + -- multiple writers to SQL databases. + liftIO . Database.Keys.Handle.closeDbHandle + =<< Annex.getRead Annex.keysdbhandle + realindex <- liftIO $ Git.Index.currentIndexFile r + numsz@(numfiles, _) <- calcnumsz + let lock = fromRawFilePath (Git.Index.indexFileLock realindex) + lockindex = liftIO $ catchMaybeIO $ Git.LockFile.openLock' lock + unlockindex = liftIO . maybe noop Git.LockFile.closeLock + showwarning = warning $ unableToRestage Nothing + go Nothing = showwarning + go (Just _) = withtmpdir $ \tmpdir -> do + tsd <- getTSDelta + let tmpindex = toRawFilePath (tmpdir "index") + let replaceindex = liftIO $ moveFile tmpindex realindex + let updatetmpindex = do + r' <- liftIO $ Git.Env.addGitEnv r Git.Index.indexEnv + =<< Git.Index.indexEnvVal tmpindex + configfilterprocess numsz $ + runupdateindex tsd r' replaceindex + return True + ok <- liftIO (createLinkOrCopy realindex tmpindex) + <&&> catchBoolIO updatetmpindex + unless ok showwarning + when (numfiles > 0) $ + bracket lockindex unlockindex go + where + withtmpdir = withTmpDirIn (fromRawFilePath $ Git.localGitDir r) "annexindex" + + isunmodified tsd f orig = + genInodeCache f tsd >>= return . \case + Nothing -> False + Just new -> compareStrong orig new + + {- Avoid git warning about CRLF munging -} + avoidcrlfwarning r' = r' { gitGlobalOpts = gitGlobalOpts r' ++ + [ Param "-c" + , Param $ "core.safecrlf=" ++ boolConfig False + ] } + + runupdateindex tsd r' replaceindex = + runsGitAnnexChildProcessViaGit' (avoidcrlfwarning r') $ \r'' -> + Git.UpdateIndex.refreshIndex r'' $ \feeder -> do + let atend = do + -- wait for index write + liftIO $ feeder Nothing + replaceindex + streamRestageLog atend $ \topf ic -> do + let f = fromTopFilePath topf r'' + liftIO $ whenM (isunmodified tsd f ic) $ + feedupdateindex f feeder + + {- update-index is documented as picky about "./file" and it + - fails on "../../repo/path/file" when cwd is not in the repo + - being acted on. Avoid these problems with an absolute path. + -} + feedupdateindex f feeder = do + absf <- absPath f + feeder (Just absf) + + calcnumsz = calcRestageLog (0, 0) $ \(_f, ic) (numfiles, sizefiles) -> + (numfiles+1, sizefiles + inodeCacheFileSize ic) + + {- filter.annex.process configured to use git-annex filter-process + - is sometimes faster and sometimes slower than using + - git-annex smudge. The latter is run once per file, while + - the former has the content of files piped to it. + -} + filterprocessfaster :: (Integer, FileSize) -> Bool + filterprocessfaster (numfiles, sizefiles) = + let estimate_enabled = sizefiles `div` 191739611 + estimate_disabled = numfiles `div` 7 + in estimate_enabled <= estimate_disabled + + {- This disables filter.annex.process if it's set when it would + - probably not be faster to use it. Unfortunately, simply + - passing -c filter.annex.process= also prevents git from + - running the smudge filter, so .git/config has to be modified + - to disable it. The modification is reversed at the end. In + - case this process is terminated early, the next time this + - runs it will take care of reversing the modification. + -} + configfilterprocess numsz = bracket setup cleanup . const + where + setup + | filterprocessfaster numsz = return Nothing + | otherwise = fromRepo (Git.Config.getMaybe ck) >>= \case + Nothing -> return Nothing + Just v -> do + void $ inRepo (Git.Config.change ckd (fromConfigValue v)) + void $ inRepo (Git.Config.unset ck) + return (Just v) + cleanup (Just v) = do + void $ inRepo $ Git.Config.change ck (fromConfigValue v) + void $ inRepo (Git.Config.unset ckd) + cleanup Nothing = fromRepo (Git.Config.getMaybe ckd) >>= \case + Nothing -> return () + Just v -> do + whenM (isNothing <$> fromRepo (Git.Config.getMaybe ck)) $ + void $ inRepo (Git.Config.change ck (fromConfigValue v)) + void $ inRepo (Git.Config.unset ckd) + ck = ConfigKey "filter.annex.process" + ckd = ConfigKey "filter.annex.process-temp-disabled" + +unableToRestage :: Maybe RawFilePath -> StringContainingQuotedPath +unableToRestage mf = + "git status will show " <> maybe "some files" QuotedPath mf + <> " to be modified, since content availability has changed" + <> " and git-annex was unable to update the index." + <> " This is only a cosmetic problem affecting git status; git add," + <> " git commit, etc won't be affected." + <> " To fix the git status display, you can run:" + <> " git-annex restage" + +{- Parses a symlink target or a pointer file to a Key. + - + - Makes sure that the pointer file is valid, including not being longer + - than the maximum allowed size of a valid pointer file, and that any + - subsequent lines after the first contain the validPointerLineTag. + - If a valid pointer file gets some other data appended to it, it should + - never be considered valid, unless that data happened to itself be a + - valid pointer file. + -} +parseLinkTargetOrPointer :: S.ByteString -> Maybe Key +parseLinkTargetOrPointer = either (const Nothing) id + . parseLinkTargetOrPointer' + +data InvalidAppendedPointerFile = InvalidAppendedPointerFile + +parseLinkTargetOrPointer' :: S.ByteString -> Either InvalidAppendedPointerFile (Maybe Key) +parseLinkTargetOrPointer' b = + let (firstline, rest) = S8.span (/= '\n') b + in case parsekey $ droptrailing '\r' firstline of + Just k + | S.length b > maxValidPointerSz -> Left InvalidAppendedPointerFile + | restvalid (dropleading '\n' rest) -> Right (Just k) + | otherwise -> Left InvalidAppendedPointerFile + Nothing -> Right Nothing + where + parsekey l + | isLinkToAnnex l = fileKey $ snd $ S8.breakEnd pathsep l + | otherwise = Nothing + + restvalid r + | S.null r = True + | otherwise = + let (l, r') = S8.span (/= '\n') r + in validPointerLineTag `S.isInfixOf` l + && (not (S8.null r') && S8.head r' == '\n') + && restvalid (S8.tail r') + + dropleading c l + | S.null l = l + | S8.head l == c = S8.tail l + | otherwise = l + + droptrailing c l + | S.null l = l + | S8.last l == c = S8.init l + | otherwise = l + + pathsep '/' = True +#ifdef mingw32_HOST_OS + pathsep '\\' = True +#endif + pathsep _ = False + +{- Avoid looking at more of the lazy ByteString than necessary since it + - could be reading from a large file that is not a pointer file. -} +parseLinkTargetOrPointerLazy :: L.ByteString -> Maybe Key +parseLinkTargetOrPointerLazy = either (const Nothing) id + . parseLinkTargetOrPointerLazy' + +parseLinkTargetOrPointerLazy' :: L.ByteString -> Either InvalidAppendedPointerFile (Maybe Key) +parseLinkTargetOrPointerLazy' b = + let b' = L.take (fromIntegral maxPointerSz) b + in parseLinkTargetOrPointer' (L.toStrict b') + +formatPointer :: Key -> S.ByteString +formatPointer k = prefix <> keyFile k <> nl + where + prefix = toInternalGitPath $ P.pathSeparator `S.cons` objectDir + nl = S8.singleton '\n' + +{- Maximum size of a file that could be a pointer to a key. + - Check to avoid buffering really big files in git into + - memory when reading files that may be pointers. + - + - 8192 bytes is plenty for a pointer to a key. This adds some additional + - padding to allow for pointer files that have lines of additional data + - after the key. + - + - One additional byte is used to detect when a valid pointer file + - got something else appended to it. + -} +maxPointerSz :: Int +maxPointerSz = maxValidPointerSz + 1 + +{- Maximum size of a valid pointer files is 32kb. -} +maxValidPointerSz :: Int +maxValidPointerSz = 32768 + +maxSymlinkSz :: Int +maxSymlinkSz = 8192 + +{- Checks if a worktree file is a pointer to a key. + - + - Unlocked files whose content is present are not detected by this. + - + - It's possible, though unlikely, that an annex symlink points to + - an object that looks like a pointer file. Or that a non-annex + - symlink does. Avoids a false positive in those cases. + - -} +isPointerFile :: RawFilePath -> IO (Maybe Key) +isPointerFile f = catchDefaultIO Nothing $ +#if defined(mingw32_HOST_OS) + withFile (fromRawFilePath f) ReadMode readhandle +#else +#if MIN_VERSION_unix(2,8,0) + let open = do + fd <- openFd (fromRawFilePath f) ReadOnly + (defaultFileFlags { nofollow = True }) + fdToHandle fd + in bracket open hClose readhandle +#else + ifM (isSymbolicLink <$> R.getSymbolicLinkStatus f) + ( return Nothing + , withFile (fromRawFilePath f) ReadMode readhandle + ) +#endif +#endif + where + readhandle h = parseLinkTargetOrPointer <$> S.hGet h maxPointerSz + +{- Checks a symlink target or pointer file first line to see if it + - appears to point to annexed content. + - + - We only look for paths inside the .git directory, and not at the .git + - directory itself, because GIT_DIR may cause a directory name other + - than .git to be used. + -} +isLinkToAnnex :: S.ByteString -> Bool +isLinkToAnnex s = p `S.isInfixOf` s +#ifdef mingw32_HOST_OS + -- '/' is used inside pointer files on Windows, not the native '\' + || p' `S.isInfixOf` s +#endif + where + p = P.pathSeparator `S.cons` objectDir +#ifdef mingw32_HOST_OS + p' = toInternalGitPath p +#endif + +{- String that must appear on every line of a valid pointer file. -} +validPointerLineTag :: S.ByteString +validPointerLineTag = "/annex/" diff --git a/Annex/Locations.hs b/Annex/Locations.hs new file mode 100644 index 0000000000..6f6203cfa2 --- /dev/null +++ b/Annex/Locations.hs @@ -0,0 +1,757 @@ +{- git-annex file locations + - + - Copyright 2010-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Locations ( + keyFile, + fileKey, + keyPaths, + keyPath, + annexDir, + objectDir, + gitAnnexLocation, + gitAnnexLocation', + gitAnnexLocationDepth, + gitAnnexLink, + gitAnnexLinkCanonical, + gitAnnexContentLock, + gitAnnexContentRetentionTimestamp, + gitAnnexContentRetentionTimestampLock, + gitAnnexContentLockLock, + gitAnnexInodeSentinal, + gitAnnexInodeSentinalCache, + annexLocationsBare, + annexLocationsNonBare, + annexLocation, + exportAnnexObjectLocation, + gitAnnexDir, + gitAnnexObjectDir, + gitAnnexTmpOtherDir, + gitAnnexTmpOtherLock, + gitAnnexTmpOtherDirOld, + gitAnnexTmpWatcherDir, + gitAnnexTmpObjectDir, + gitAnnexTmpObjectLocation, + gitAnnexTmpWorkDir, + gitAnnexBadDir, + gitAnnexBadLocation, + gitAnnexUnusedLog, + gitAnnexKeysDbDir, + gitAnnexKeysDbLock, + gitAnnexKeysDbIndexCache, + gitAnnexFsckState, + gitAnnexFsckDbDir, + gitAnnexFsckDbDirOld, + gitAnnexFsckDbLock, + gitAnnexFsckResultsLog, + gitAnnexUpgradeLog, + gitAnnexUpgradeLock, + gitAnnexSmudgeLog, + gitAnnexSmudgeLock, + gitAnnexRestageLog, + gitAnnexRestageLogOld, + gitAnnexRestageLock, + gitAnnexAdjustedBranchUpdateLog, + gitAnnexAdjustedBranchUpdateLock, + gitAnnexMigrateLog, + gitAnnexMigrateLock, + gitAnnexMigrationsLog, + gitAnnexMigrationsLock, + gitAnnexMoveLog, + gitAnnexMoveLock, + gitAnnexExportDir, + gitAnnexExportDbDir, + gitAnnexExportLock, + gitAnnexExportUpdateLock, + gitAnnexExportExcludeLog, + gitAnnexImportDir, + gitAnnexImportLog, + gitAnnexContentIdentifierDbDir, + gitAnnexContentIdentifierLock, + gitAnnexImportFeedDbDir, + gitAnnexImportFeedDbLock, + gitAnnexScheduleState, + gitAnnexTransferDir, + gitAnnexCredsDir, + gitAnnexWebCertificate, + gitAnnexWebPrivKey, + gitAnnexFeedStateDir, + gitAnnexFeedState, + gitAnnexMergeDir, + gitAnnexJournalDir, + gitAnnexPrivateJournalDir, + gitAnnexJournalLock, + gitAnnexGitQueueLock, + gitAnnexIndex, + gitAnnexPrivateIndex, + gitAnnexIndexStatus, + gitAnnexViewIndex, + gitAnnexViewLog, + gitAnnexMergedRefs, + gitAnnexIgnoredRefs, + gitAnnexPidFile, + gitAnnexPidLockFile, + gitAnnexDaemonStatusFile, + gitAnnexDaemonLogFile, + gitAnnexFuzzTestLogFile, + gitAnnexHtmlShim, + gitAnnexUrlFile, + gitAnnexTmpCfgFile, + gitAnnexSshDir, + gitAnnexRemotesDir, + gitAnnexAssistantDefaultDir, + HashLevels(..), + hashDirMixed, + hashDirLower, + preSanitizeKeyName, + reSanitizeKeyName, +) where + +import Data.Char +import Data.Default +import qualified Data.ByteString.Char8 as S8 +import qualified System.FilePath.ByteString as P + +import Common +import Key +import Types.UUID +import Types.GitConfig +import Types.Difference +import Types.BranchState +import Types.Export +import qualified Git +import qualified Git.Types as Git +import Git.FilePath +import Annex.DirHashes +import Annex.Fixup +import qualified Utility.RawFilePath as R + +{- Conventions: + - + - Functions ending in "Dir" should always return values ending with a + - trailing path separator. Most code does not rely on that, but a few + - things do. + - + - Everything else should not end in a trailing path separator. + - + - Only functions (with names starting with "git") that build a path + - based on a git repository should return full path relative to the git + - repository. Everything else returns path segments. + -} + +{- The directory git annex uses for local state, relative to the .git + - directory -} +annexDir :: RawFilePath +annexDir = P.addTrailingPathSeparator "annex" + +{- The directory git annex uses for locally available object content, + - relative to the .git directory -} +objectDir :: RawFilePath +objectDir = P.addTrailingPathSeparator $ annexDir P. "objects" + +{- Annexed file's possible locations relative to the .git directory + - in a non-bare repository. + - + - Normally it is hashDirMixed. However, it's always possible that a + - bare repository was converted to non-bare, or that the cripped + - filesystem setting changed, so still need to check both. -} +annexLocationsNonBare :: GitConfig -> Key -> [RawFilePath] +annexLocationsNonBare config key = + map (annexLocation config key) [hashDirMixed, hashDirLower] + +{- Annexed file's possible locations relative to a bare repository. -} +annexLocationsBare :: GitConfig -> Key -> [RawFilePath] +annexLocationsBare config key = + map (annexLocation config key) [hashDirLower, hashDirMixed] + +annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath +annexLocation config key hasher = objectDir P. keyPath key (hasher $ objectHashLevels config) + +{- For exportree remotes with annexobjects=true, objects are stored + - in this location as well as in the exported tree. -} +exportAnnexObjectLocation :: GitConfig -> Key -> ExportLocation +exportAnnexObjectLocation gc k = + mkExportLocation $ + ".git" P. annexLocation gc k hashDirLower + +{- Number of subdirectories from the gitAnnexObjectDir + - to the gitAnnexLocation. -} +gitAnnexLocationDepth :: GitConfig -> Int +gitAnnexLocationDepth config = hashlevels + 1 + where + HashLevels hashlevels = objectHashLevels config + +{- Annexed object's location in a repository. + - + - When there are multiple possible locations, returns the one where the + - file is actually present. + - + - When the file is not present, returns the location where the file should + - be stored. + -} +gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO RawFilePath +gitAnnexLocation = gitAnnexLocation' R.doesPathExist + +gitAnnexLocation' :: (RawFilePath -> IO Bool) -> Key -> Git.Repo -> GitConfig -> IO RawFilePath +gitAnnexLocation' checker key r config = gitAnnexLocation'' key r config + (annexCrippledFileSystem config) + (coreSymlinks config) + checker + (Git.localGitDir r) + +gitAnnexLocation'' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (RawFilePath -> IO Bool) -> RawFilePath -> IO RawFilePath +gitAnnexLocation'' key r config crippled symlinkssupported checker gitdir + {- Bare repositories default to hashDirLower for new + - content, as it's more portable. But check all locations. -} + | Git.repoIsLocalBare r = checkall annexLocationsBare + {- If the repository is configured to only use lower, no need + - to check both. -} + | hasDifference ObjectHashLower (annexDifferences config) = + only hashDirLower + {- Repositories on crippled filesystems use same layout as bare + - repos for new content, unless symlinks are supported too. -} + | crippled = if symlinkssupported + then checkall annexLocationsNonBare + else checkall annexLocationsBare + | otherwise = checkall annexLocationsNonBare + where + only = return . inrepo . annexLocation config key + checkall f = check $ map inrepo $ f config key + + inrepo d = gitdir P. d + check locs@(l:_) = fromMaybe l <$> firstM checker locs + check [] = error "internal" + +{- Calculates a symlink target to link a file to an annexed object. -} +gitAnnexLink :: RawFilePath -> Key -> Git.Repo -> GitConfig -> IO RawFilePath +gitAnnexLink file key r config = do + currdir <- R.getCurrentDirectory + let absfile = absNormPathUnix currdir file + let gitdir = getgitdir currdir + loc <- gitAnnexLocation'' key r config False False (\_ -> return True) gitdir + toInternalGitPath <$> relPathDirToFile (parentDir absfile) loc + where + getgitdir currdir + {- This special case is for git submodules on filesystems not + - supporting symlinks; generate link target that will + - work portably. -} + | not (coreSymlinks config) && needsSubmoduleFixup r = + absNormPathUnix currdir (Git.repoPath r P. ".git") + | otherwise = Git.localGitDir r + absNormPathUnix d p = toInternalGitPath $ + absPathFrom (toInternalGitPath d) (toInternalGitPath p) + +{- Calculates a symlink target as would be used in a typical git + - repository, with .git in the top of the work tree. -} +gitAnnexLinkCanonical :: RawFilePath -> Key -> Git.Repo -> GitConfig -> IO RawFilePath +gitAnnexLinkCanonical file key r config = gitAnnexLink file key r' config' + where + r' = case r of + Git.Repo { Git.location = l@Git.Local { Git.worktree = Just wt } } -> + r { Git.location = l { Git.gitdir = wt P. ".git" } } + _ -> r + config' = config + { annexCrippledFileSystem = False + , coreSymlinks = True + } + +{- File used to lock a key's content. -} +gitAnnexContentLock :: Key -> Git.Repo -> GitConfig -> IO RawFilePath +gitAnnexContentLock key r config = do + loc <- gitAnnexLocation key r config + return $ loc <> ".lck" + +{- File used to indicate a key's content should not be dropped until after + - a specified time. -} +gitAnnexContentRetentionTimestamp :: Key -> Git.Repo -> GitConfig -> IO RawFilePath +gitAnnexContentRetentionTimestamp key r config = do + loc <- gitAnnexLocation key r config + return $ loc <> ".rtm" + +{- Lock file for gitAnnexContentRetentionTimestamp -} +gitAnnexContentRetentionTimestampLock :: Key -> Git.Repo -> GitConfig -> IO RawFilePath +gitAnnexContentRetentionTimestampLock key r config = do + loc <- gitAnnexLocation key r config + return $ loc <> ".rtl" + +{- Lock that is held when taking the gitAnnexContentLock to support the v10 + - upgrade. + - + - This uses the gitAnnexInodeSentinal file, because it needs to be a file + - that exists in the repository, even when it's an old v8 repository that + - is mounted read-only. The gitAnnexInodeSentinal is created by git-annex + - init, so should already exist. + -} +gitAnnexContentLockLock :: Git.Repo -> RawFilePath +gitAnnexContentLockLock = gitAnnexInodeSentinal + +gitAnnexInodeSentinal :: Git.Repo -> RawFilePath +gitAnnexInodeSentinal r = gitAnnexDir r P. "sentinal" + +gitAnnexInodeSentinalCache :: Git.Repo -> RawFilePath +gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r <> ".cache" + +{- The annex directory of a repository. -} +gitAnnexDir :: Git.Repo -> RawFilePath +gitAnnexDir r = P.addTrailingPathSeparator $ Git.localGitDir r P. annexDir + +{- The part of the annex directory where file contents are stored. -} +gitAnnexObjectDir :: Git.Repo -> RawFilePath +gitAnnexObjectDir r = P.addTrailingPathSeparator $ + Git.localGitDir r P. objectDir + +{- .git/annex/tmp/ is used for temp files for key's contents -} +gitAnnexTmpObjectDir :: Git.Repo -> RawFilePath +gitAnnexTmpObjectDir r = P.addTrailingPathSeparator $ + gitAnnexDir r P. "tmp" + +{- .git/annex/othertmp/ is used for other temp files -} +gitAnnexTmpOtherDir :: Git.Repo -> RawFilePath +gitAnnexTmpOtherDir r = P.addTrailingPathSeparator $ + gitAnnexDir r P. "othertmp" + +{- Lock file for gitAnnexTmpOtherDir. -} +gitAnnexTmpOtherLock :: Git.Repo -> RawFilePath +gitAnnexTmpOtherLock r = gitAnnexDir r P. "othertmp.lck" + +{- .git/annex/misctmp/ was used by old versions of git-annex and is still + - used during initialization -} +gitAnnexTmpOtherDirOld :: Git.Repo -> RawFilePath +gitAnnexTmpOtherDirOld r = P.addTrailingPathSeparator $ + gitAnnexDir r P. "misctmp" + +{- .git/annex/watchtmp/ is used by the watcher and assistant -} +gitAnnexTmpWatcherDir :: Git.Repo -> RawFilePath +gitAnnexTmpWatcherDir r = P.addTrailingPathSeparator $ + gitAnnexDir r P. "watchtmp" + +{- The temp file to use for a given key's content. -} +gitAnnexTmpObjectLocation :: Key -> Git.Repo -> RawFilePath +gitAnnexTmpObjectLocation key r = gitAnnexTmpObjectDir r P. keyFile key + +{- Given a temp file such as gitAnnexTmpObjectLocation, makes a name for a + - subdirectory in the same location, that can be used as a work area + - when receiving the key's content. + - + - There are ordering requirements for creating these directories; + - use Annex.Content.withTmpWorkDir to set them up. + -} +gitAnnexTmpWorkDir :: RawFilePath -> RawFilePath +gitAnnexTmpWorkDir p = + let (dir, f) = P.splitFileName p + -- Using a prefix avoids name conflict with any other keys. + in dir P. "work." <> f + +{- .git/annex/bad/ is used for bad files found during fsck -} +gitAnnexBadDir :: Git.Repo -> RawFilePath +gitAnnexBadDir r = P.addTrailingPathSeparator $ gitAnnexDir r P. "bad" + +{- The bad file to use for a given key. -} +gitAnnexBadLocation :: Key -> Git.Repo -> RawFilePath +gitAnnexBadLocation key r = gitAnnexBadDir r P. keyFile key + +{- .git/annex/foounused is used to number possibly unused keys -} +gitAnnexUnusedLog :: RawFilePath -> Git.Repo -> RawFilePath +gitAnnexUnusedLog prefix r = gitAnnexDir r P. (prefix <> "unused") + +{- .git/annex/keysdb/ contains a database of information about keys. -} +gitAnnexKeysDbDir :: Git.Repo -> GitConfig -> RawFilePath +gitAnnexKeysDbDir r c = fromMaybe (gitAnnexDir r) (annexDbDir c) P. "keysdb" + +{- Lock file for the keys database. -} +gitAnnexKeysDbLock :: Git.Repo -> GitConfig -> RawFilePath +gitAnnexKeysDbLock r c = gitAnnexKeysDbDir r c <> ".lck" + +{- Contains the stat of the last index file that was + - reconciled with the keys database. -} +gitAnnexKeysDbIndexCache :: Git.Repo -> GitConfig -> RawFilePath +gitAnnexKeysDbIndexCache r c = gitAnnexKeysDbDir r c <> ".cache" + +{- .git/annex/fsck/uuid/ is used to store information about incremental + - fscks. -} +gitAnnexFsckDir :: UUID -> Git.Repo -> Maybe GitConfig -> RawFilePath +gitAnnexFsckDir u r mc = case annexDbDir =<< mc of + Nothing -> go (gitAnnexDir r) + Just d -> go d + where + go d = d P. "fsck" P. fromUUID u + +{- used to store information about incremental fscks. -} +gitAnnexFsckState :: UUID -> Git.Repo -> RawFilePath +gitAnnexFsckState u r = gitAnnexFsckDir u r Nothing P. "state" + +{- Directory containing database used to record fsck info. -} +gitAnnexFsckDbDir :: UUID -> Git.Repo -> GitConfig -> RawFilePath +gitAnnexFsckDbDir u r c = gitAnnexFsckDir u r (Just c) P. "fsckdb" + +{- Directory containing old database used to record fsck info. -} +gitAnnexFsckDbDirOld :: UUID -> Git.Repo -> GitConfig -> RawFilePath +gitAnnexFsckDbDirOld u r c = gitAnnexFsckDir u r (Just c) P. "db" + +{- Lock file for the fsck database. -} +gitAnnexFsckDbLock :: UUID -> Git.Repo -> GitConfig -> RawFilePath +gitAnnexFsckDbLock u r c = gitAnnexFsckDir u r (Just c) P. "fsck.lck" + +{- .git/annex/fsckresults/uuid is used to store results of git fscks -} +gitAnnexFsckResultsLog :: UUID -> Git.Repo -> RawFilePath +gitAnnexFsckResultsLog u r = + gitAnnexDir r P. "fsckresults" P. fromUUID u + +{- .git/annex/upgrade.log is used to record repository version upgrades. -} +gitAnnexUpgradeLog :: Git.Repo -> RawFilePath +gitAnnexUpgradeLog r = gitAnnexDir r P. "upgrade.log" + +gitAnnexUpgradeLock :: Git.Repo -> RawFilePath +gitAnnexUpgradeLock r = gitAnnexDir r P. "upgrade.lck" + +{- .git/annex/smudge.log is used to log smudged worktree files that need to + - be updated. -} +gitAnnexSmudgeLog :: Git.Repo -> RawFilePath +gitAnnexSmudgeLog r = gitAnnexDir r P. "smudge.log" + +gitAnnexSmudgeLock :: Git.Repo -> RawFilePath +gitAnnexSmudgeLock r = gitAnnexDir r P. "smudge.lck" + +{- .git/annex/restage.log is used to log worktree files that need to be + - restaged in git -} +gitAnnexRestageLog :: Git.Repo -> RawFilePath +gitAnnexRestageLog r = gitAnnexDir r P. "restage.log" + +{- .git/annex/restage.old is used while restaging files in git -} +gitAnnexRestageLogOld :: Git.Repo -> RawFilePath +gitAnnexRestageLogOld r = gitAnnexDir r P. "restage.old" + +gitAnnexRestageLock :: Git.Repo -> RawFilePath +gitAnnexRestageLock r = gitAnnexDir r P. "restage.lck" + +{- .git/annex/adjust.log is used to log when the adjusted branch needs to + - be updated. -} +gitAnnexAdjustedBranchUpdateLog :: Git.Repo -> RawFilePath +gitAnnexAdjustedBranchUpdateLog r = gitAnnexDir r P. "adjust.log" + +gitAnnexAdjustedBranchUpdateLock :: Git.Repo -> RawFilePath +gitAnnexAdjustedBranchUpdateLock r = gitAnnexDir r P. "adjust.lck" + +{- .git/annex/migrate.log is used to log migrations before committing them. -} +gitAnnexMigrateLog :: Git.Repo -> RawFilePath +gitAnnexMigrateLog r = gitAnnexDir r P. "migrate.log" + +gitAnnexMigrateLock :: Git.Repo -> RawFilePath +gitAnnexMigrateLock r = gitAnnexDir r P. "migrate.lck" + +{- .git/annex/migrations.log is used to log committed migrations. -} +gitAnnexMigrationsLog :: Git.Repo -> RawFilePath +gitAnnexMigrationsLog r = gitAnnexDir r P. "migrations.log" + +gitAnnexMigrationsLock :: Git.Repo -> RawFilePath +gitAnnexMigrationsLock r = gitAnnexDir r P. "migrations.lck" + +{- .git/annex/move.log is used to log moves that are in progress, + - to better support resuming an interrupted move. -} +gitAnnexMoveLog :: Git.Repo -> RawFilePath +gitAnnexMoveLog r = gitAnnexDir r P. "move.log" + +gitAnnexMoveLock :: Git.Repo -> RawFilePath +gitAnnexMoveLock r = gitAnnexDir r P. "move.lck" + +{- .git/annex/export/ is used to store information about + - exports to special remotes. -} +gitAnnexExportDir :: Git.Repo -> GitConfig -> RawFilePath +gitAnnexExportDir r c = fromMaybe (gitAnnexDir r) (annexDbDir c) P. "export" + +{- Directory containing database used to record export info. -} +gitAnnexExportDbDir :: UUID -> Git.Repo -> GitConfig -> RawFilePath +gitAnnexExportDbDir u r c = + gitAnnexExportDir r c P. fromUUID u P. "exportdb" + +{- Lock file for export database. -} +gitAnnexExportLock :: UUID -> Git.Repo -> GitConfig -> RawFilePath +gitAnnexExportLock u r c = gitAnnexExportDbDir u r c <> ".lck" + +{- Lock file for updating the export database with information from the + - repository. -} +gitAnnexExportUpdateLock :: UUID -> Git.Repo -> GitConfig -> RawFilePath +gitAnnexExportUpdateLock u r c = gitAnnexExportDbDir u r c <> ".upl" + +{- Log file used to keep track of files that were in the tree exported to a + - remote, but were excluded by its preferred content settings. -} +gitAnnexExportExcludeLog :: UUID -> Git.Repo -> RawFilePath +gitAnnexExportExcludeLog u r = gitAnnexDir r P. "export.ex" P. fromUUID u + +{- Directory containing database used to record remote content ids. + - + - (This used to be "cid", but a problem with the database caused it to + - need to be rebuilt with a new name.) + -} +gitAnnexContentIdentifierDbDir :: Git.Repo -> GitConfig -> RawFilePath +gitAnnexContentIdentifierDbDir r c = + fromMaybe (gitAnnexDir r) (annexDbDir c) P. "cidsdb" + +{- Lock file for writing to the content id database. -} +gitAnnexContentIdentifierLock :: Git.Repo -> GitConfig -> RawFilePath +gitAnnexContentIdentifierLock r c = gitAnnexContentIdentifierDbDir r c <> ".lck" + +{- .git/annex/import/ is used to store information about + - imports from special remotes. -} +gitAnnexImportDir :: Git.Repo -> GitConfig -> RawFilePath +gitAnnexImportDir r c = fromMaybe (gitAnnexDir r) (annexDbDir c) P. "import" + +{- File containing state about the last import done from a remote. -} +gitAnnexImportLog :: UUID -> Git.Repo -> GitConfig -> RawFilePath +gitAnnexImportLog u r c = + gitAnnexImportDir r c P. fromUUID u P. "log" + +{- Directory containing database used by importfeed. -} +gitAnnexImportFeedDbDir :: Git.Repo -> GitConfig -> RawFilePath +gitAnnexImportFeedDbDir r c = + fromMaybe (gitAnnexDir r) (annexDbDir c) P. "importfeed" + +{- Lock file for writing to the importfeed database. -} +gitAnnexImportFeedDbLock :: Git.Repo -> GitConfig -> RawFilePath +gitAnnexImportFeedDbLock r c = gitAnnexImportFeedDbDir r c <> ".lck" + +{- .git/annex/schedulestate is used to store information about when + - scheduled jobs were last run. -} +gitAnnexScheduleState :: Git.Repo -> RawFilePath +gitAnnexScheduleState r = gitAnnexDir r P. "schedulestate" + +{- .git/annex/creds/ is used to store credentials to access some special + - remotes. -} +gitAnnexCredsDir :: Git.Repo -> RawFilePath +gitAnnexCredsDir r = P.addTrailingPathSeparator $ gitAnnexDir r P. "creds" + +{- .git/annex/certificate.pem and .git/annex/key.pem are used by the webapp + - when HTTPS is enabled -} +gitAnnexWebCertificate :: Git.Repo -> FilePath +gitAnnexWebCertificate r = fromRawFilePath $ gitAnnexDir r P. "certificate.pem" +gitAnnexWebPrivKey :: Git.Repo -> FilePath +gitAnnexWebPrivKey r = fromRawFilePath $ gitAnnexDir r P. "privkey.pem" + +{- .git/annex/feeds/ is used to record per-key (url) state by importfeed -} +gitAnnexFeedStateDir :: Git.Repo -> RawFilePath +gitAnnexFeedStateDir r = P.addTrailingPathSeparator $ + gitAnnexDir r P. "feedstate" + +gitAnnexFeedState :: Key -> Git.Repo -> RawFilePath +gitAnnexFeedState k r = gitAnnexFeedStateDir r P. keyFile k + +{- .git/annex/merge/ is used as a empty work tree for merges in + - adjusted branches. -} +gitAnnexMergeDir :: Git.Repo -> FilePath +gitAnnexMergeDir r = fromRawFilePath $ + P.addTrailingPathSeparator $ gitAnnexDir r P. "merge" + +{- .git/annex/transfer/ is used to record keys currently + - being transferred, and other transfer bookkeeping info. -} +gitAnnexTransferDir :: Git.Repo -> RawFilePath +gitAnnexTransferDir r = + P.addTrailingPathSeparator $ gitAnnexDir r P. "transfer" + +{- .git/annex/journal/ is used to journal changes made to the git-annex + - branch -} +gitAnnexJournalDir :: BranchState -> Git.Repo -> RawFilePath +gitAnnexJournalDir st r = P.addTrailingPathSeparator $ + case alternateJournal st of + Nothing -> gitAnnexDir r P. "journal" + Just d -> d + +{- .git/annex/journal.private/ is used to journal changes regarding private + - repositories. -} +gitAnnexPrivateJournalDir :: BranchState -> Git.Repo -> RawFilePath +gitAnnexPrivateJournalDir st r = P.addTrailingPathSeparator $ + case alternateJournal st of + Nothing -> gitAnnexDir r P. "journal-private" + Just d -> d + +{- Lock file for the journal. -} +gitAnnexJournalLock :: Git.Repo -> RawFilePath +gitAnnexJournalLock r = gitAnnexDir r P. "journal.lck" + +{- Lock file for flushing a git queue that writes to the git index or + - other git state that should only have one writer at a time. -} +gitAnnexGitQueueLock :: Git.Repo -> RawFilePath +gitAnnexGitQueueLock r = gitAnnexDir r P. "gitqueue.lck" + +{- .git/annex/index is used to stage changes to the git-annex branch -} +gitAnnexIndex :: Git.Repo -> RawFilePath +gitAnnexIndex r = gitAnnexDir r P. "index" + +{- .git/annex/index-private is used to store information that is not to + - be exposed to the git-annex branch. -} +gitAnnexPrivateIndex :: Git.Repo -> RawFilePath +gitAnnexPrivateIndex r = gitAnnexDir r P. "index-private" + +{- Holds the ref of the git-annex branch that the index was last updated to. + - + - The .lck in the name is a historical accident; this is not used as a + - lock. -} +gitAnnexIndexStatus :: Git.Repo -> RawFilePath +gitAnnexIndexStatus r = gitAnnexDir r P. "index.lck" + +{- The index file used to generate a filtered branch view._-} +gitAnnexViewIndex :: Git.Repo -> RawFilePath +gitAnnexViewIndex r = gitAnnexDir r P. "viewindex" + +{- File containing a log of recently accessed views. -} +gitAnnexViewLog :: Git.Repo -> RawFilePath +gitAnnexViewLog r = gitAnnexDir r P. "viewlog" + +{- List of refs that have already been merged into the git-annex branch. -} +gitAnnexMergedRefs :: Git.Repo -> RawFilePath +gitAnnexMergedRefs r = gitAnnexDir r P. "mergedrefs" + +{- List of refs that should not be merged into the git-annex branch. -} +gitAnnexIgnoredRefs :: Git.Repo -> RawFilePath +gitAnnexIgnoredRefs r = gitAnnexDir r P. "ignoredrefs" + +{- Pid file for daemon mode. -} +gitAnnexPidFile :: Git.Repo -> RawFilePath +gitAnnexPidFile r = gitAnnexDir r P. "daemon.pid" + +{- Pid lock file for pidlock mode -} +gitAnnexPidLockFile :: Git.Repo -> RawFilePath +gitAnnexPidLockFile r = gitAnnexDir r P. "pidlock" + +{- Status file for daemon mode. -} +gitAnnexDaemonStatusFile :: Git.Repo -> FilePath +gitAnnexDaemonStatusFile r = fromRawFilePath $ + gitAnnexDir r P. "daemon.status" + +{- Log file for daemon mode. -} +gitAnnexDaemonLogFile :: Git.Repo -> RawFilePath +gitAnnexDaemonLogFile r = gitAnnexDir r P. "daemon.log" + +{- Log file for fuzz test. -} +gitAnnexFuzzTestLogFile :: Git.Repo -> FilePath +gitAnnexFuzzTestLogFile r = fromRawFilePath $ + gitAnnexDir r P. "fuzztest.log" + +{- Html shim file used to launch the webapp. -} +gitAnnexHtmlShim :: Git.Repo -> RawFilePath +gitAnnexHtmlShim r = gitAnnexDir r P. "webapp.html" + +{- File containing the url to the webapp. -} +gitAnnexUrlFile :: Git.Repo -> RawFilePath +gitAnnexUrlFile r = gitAnnexDir r P. "url" + +{- Temporary file used to edit configuriation from the git-annex branch. -} +gitAnnexTmpCfgFile :: Git.Repo -> RawFilePath +gitAnnexTmpCfgFile r = gitAnnexDir r P. "config.tmp" + +{- .git/annex/ssh/ is used for ssh connection caching -} +gitAnnexSshDir :: Git.Repo -> RawFilePath +gitAnnexSshDir r = P.addTrailingPathSeparator $ gitAnnexDir r P. "ssh" + +{- .git/annex/remotes/ is used for remote-specific state. -} +gitAnnexRemotesDir :: Git.Repo -> RawFilePath +gitAnnexRemotesDir r = + P.addTrailingPathSeparator $ gitAnnexDir r P. "remotes" + +{- This is the base directory name used by the assistant when making + - repositories, by default. -} +gitAnnexAssistantDefaultDir :: FilePath +gitAnnexAssistantDefaultDir = "annex" + +{- Sanitizes a String that will be used as part of a Key's keyName, + - dealing with characters that cause problems. + - + - This is used when a new Key is initially being generated, eg by genKey. + - Unlike keyFile and fileKey, it does not need to be a reversible + - escaping. Also, it's ok to change this to add more problematic + - characters later. Unlike changing keyFile, which could result in the + - filenames used for existing keys changing and contents getting lost. + - + - It is, however, important that the input and output of this function + - have a 1:1 mapping, to avoid two different inputs from mapping to the + - same key. + -} +preSanitizeKeyName :: String -> String +preSanitizeKeyName = preSanitizeKeyName' False + +preSanitizeKeyName' :: Bool -> String -> String +preSanitizeKeyName' resanitize = concatMap escape + where + escape c + | isAsciiUpper c || isAsciiLower c || isDigit c = [c] + | c `elem` ['.', '-', '_'] = [c] -- common, assumed safe + | c `elem` ['/', '%', ':'] = [c] -- handled by keyFile + -- , is safe and uncommon, so will be used to escape + -- other characters. By itself, it is escaped to + -- doubled form. + | c == ',' = if not resanitize + then ",," + else "," + | otherwise = ',' : show (ord c) + +{- Converts a keyName that has been santizied with an old version of + - preSanitizeKeyName to be sanitized with the new version. -} +reSanitizeKeyName :: String -> String +reSanitizeKeyName = preSanitizeKeyName' True + +{- Converts a key into a filename fragment without any directory. + - + - Escape "/" in the key name, to keep a flat tree of files and avoid + - issues with keys containing "/../" or ending with "/" etc. + - + - "/" is escaped to "%" because it's short and rarely used, and resembles + - a slash + - "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping + - is one to one. + - ":" is escaped to "&c", because it seemed like a good idea at the time. + - + - Changing what this function escapes and how is not a good idea, as it + - can cause existing objects to get lost. + -} +keyFile :: Key -> RawFilePath +keyFile k = + let b = serializeKey' k + in if S8.any (`elem` ['&', '%', ':', '/']) b + then S8.concatMap esc b + else b + where + esc '&' = "&a" + esc '%' = "&s" + esc ':' = "&c" + esc '/' = "%" + esc c = S8.singleton c + +{- Reverses keyFile, converting a filename fragment (ie, the basename of + - the symlink target) into a key. -} +fileKey :: RawFilePath -> Maybe Key +fileKey = deserializeKey' . S8.intercalate "/" . map go . S8.split '%' + where + go = S8.concat . unescafterfirst . S8.split '&' + unescafterfirst [] = [] + unescafterfirst (b:bs) = b : map (unesc . S8.uncons) bs + unesc :: Maybe (Char, S8.ByteString) -> S8.ByteString + unesc Nothing = mempty + unesc (Just ('c', b)) = S8.cons ':' b + unesc (Just ('s', b)) = S8.cons '%' b + unesc (Just ('a', b)) = S8.cons '&' b + unesc (Just (c, b)) = S8.cons c b + +{- A location to store a key on a special remote that uses a filesystem. + - A directory hash is used, to protect against filesystems that dislike + - having many items in a single directory. + - + - The file is put in a directory with the same name, this allows + - write-protecting the directory to avoid accidental deletion of the file. + -} +keyPath :: Key -> Hasher -> RawFilePath +keyPath key hasher = hasher key P. f P. f + where + f = keyFile key + +{- All possible locations to store a key in a special remote + - using different directory hashes. + - + - This is compatible with the annexLocationsNonBare and annexLocationsBare, + - for interoperability between special remotes and git-annex repos. + -} +keyPaths :: Key -> [RawFilePath] +keyPaths key = map (\h -> keyPath key (h def)) dirHashes diff --git a/Annex/LockFile.hs b/Annex/LockFile.hs new file mode 100644 index 0000000000..71a07e677c --- /dev/null +++ b/Annex/LockFile.hs @@ -0,0 +1,113 @@ +{- git-annex lock files. + - + - Copyright 2012-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.LockFile ( + lockFileCached, + unlockFile, + getLockCache, + fromLockCache, + withSharedLock, + withExclusiveLock, + takeExclusiveLock, + tryExclusiveLock, +) where + +import Annex.Common +import Annex +import Types.LockCache +import Annex.Perms +import Annex.LockPool + +import qualified Data.Map as M +import qualified System.FilePath.ByteString as P + +{- Create a specified lock file, and takes a shared lock, which is retained + - in the cache. -} +lockFileCached :: RawFilePath -> Annex () +lockFileCached file = go =<< fromLockCache file + where + go (Just _) = noop -- already locked + go Nothing = do +#ifndef mingw32_HOST_OS + mode <- annexFileMode + lockhandle <- lockShared (Just mode) file +#else + lockhandle <- liftIO $ waitToLock $ lockShared file +#endif + changeLockCache $ M.insert file lockhandle + +unlockFile :: RawFilePath -> Annex () +unlockFile file = maybe noop go =<< fromLockCache file + where + go lockhandle = do + liftIO $ dropLock lockhandle + changeLockCache $ M.delete file + +getLockCache :: Annex LockCache +getLockCache = getState lockcache + +fromLockCache :: RawFilePath -> Annex (Maybe LockHandle) +fromLockCache file = M.lookup file <$> getLockCache + +changeLockCache :: (LockCache -> LockCache) -> Annex () +changeLockCache a = do + m <- getLockCache + changeState $ \s -> s { lockcache = a m } + +{- Runs an action with a shared lock held. If an exclusive lock is held, + - blocks until it becomes free. -} +withSharedLock :: RawFilePath -> Annex a -> Annex a +withSharedLock lockfile a = debugLocks $ do + createAnnexDirectory $ P.takeDirectory lockfile + mode <- annexFileMode + bracket (lock mode lockfile) (liftIO . dropLock) (const a) + where +#ifndef mingw32_HOST_OS + lock mode = lockShared (Just mode) +#else + lock _mode = liftIO . waitToLock . lockShared +#endif + +{- Runs an action with an exclusive lock held. If the lock is already + - held, blocks until it becomes free. -} +withExclusiveLock :: RawFilePath -> Annex a -> Annex a +withExclusiveLock lockfile a = bracket + (takeExclusiveLock lockfile) + (liftIO . dropLock) + (const a) + +{- Takes an exclusive lock, blocking until it's free. -} +takeExclusiveLock :: RawFilePath -> Annex LockHandle +takeExclusiveLock lockfile = debugLocks $ do + createAnnexDirectory $ P.takeDirectory lockfile + mode <- annexFileMode + lock mode lockfile + where +#ifndef mingw32_HOST_OS + lock mode = lockExclusive (Just mode) +#else + lock _mode = liftIO . waitToLock . lockExclusive +#endif + +{- Tries to take an exclusive lock and run an action. If the lock is + - already held, returns Nothing. -} +tryExclusiveLock :: RawFilePath -> Annex a -> Annex (Maybe a) +tryExclusiveLock lockfile a = debugLocks $ do + createAnnexDirectory $ P.takeDirectory lockfile + mode <- annexFileMode + bracket (lock mode lockfile) (liftIO . unlock) go + where +#ifndef mingw32_HOST_OS + lock mode = tryLockExclusive (Just mode) +#else + lock _mode = liftIO . lockExclusive +#endif + unlock = maybe noop dropLock + go Nothing = return Nothing + go (Just _) = Just <$> a diff --git a/Annex/LockPool.hs b/Annex/LockPool.hs new file mode 100644 index 0000000000..33df3b611e --- /dev/null +++ b/Annex/LockPool.hs @@ -0,0 +1,17 @@ +{- Wraps Utility.LockPool, making pid locks be used when git-annex is so + - configured. + - + - Copyright 2015 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.LockPool (module X) where + +#ifndef mingw32_HOST_OS +import Annex.LockPool.PosixOrPid as X +#else +import Utility.LockPool.Windows as X +#endif diff --git a/Annex/LockPool/PosixOrPid.hs b/Annex/LockPool/PosixOrPid.hs new file mode 100644 index 0000000000..36426fdaf8 --- /dev/null +++ b/Annex/LockPool/PosixOrPid.hs @@ -0,0 +1,93 @@ +{- Wraps Utility.LockPool, making pid locks be used when git-annex is so + - configured. + - + - Copyright 2015-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.LockPool.PosixOrPid ( + LockFile, + LockHandle, + lockShared, + lockExclusive, + tryLockShared, + tryLockExclusive, + dropLock, + checkLocked, + LockStatus(..), + getLockStatus, + checkSaneLock, +) where + +import Common +import Types +import qualified Annex +import qualified Utility.LockPool.Posix as Posix +import qualified Utility.LockPool.PidLock as Pid +import qualified Utility.LockPool.LockHandle as H +import Utility.FileMode +import Utility.LockPool.LockHandle (LockHandle, dropLock) +import Utility.LockFile.Posix (openLockFile) +import Utility.LockPool.STM (LockFile, LockMode(..)) +import Utility.LockFile.LockStatus +import Config (pidLockFile) +import Messages (warning) +import Git.Quote + +import System.Posix + +lockShared :: Maybe ModeSetter -> LockFile -> Annex LockHandle +lockShared m f = pidLock m f LockShared $ Posix.lockShared m f + +lockExclusive :: Maybe ModeSetter -> LockFile -> Annex LockHandle +lockExclusive m f = pidLock m f LockExclusive $ Posix.lockExclusive m f + +tryLockShared :: Maybe ModeSetter -> LockFile -> Annex (Maybe LockHandle) +tryLockShared m f = tryPidLock m f LockShared $ Posix.tryLockShared m f + +tryLockExclusive :: Maybe ModeSetter -> LockFile -> Annex (Maybe LockHandle) +tryLockExclusive m f = tryPidLock m f LockExclusive $ Posix.tryLockExclusive m f + +checkLocked :: LockFile -> Annex (Maybe Bool) +checkLocked f = Posix.checkLocked f `pidLockCheck` checkpid + where + checkpid pidlock = Pid.checkLocked pidlock >>= \case + -- Only return true when the posix lock file exists. + Just _ -> Posix.checkLocked f + Nothing -> return Nothing + +getLockStatus :: LockFile -> Annex LockStatus +getLockStatus f = Posix.getLockStatus f + `pidLockCheck` Pid.getLockStatus + +checkSaneLock :: LockFile -> LockHandle -> Annex Bool +checkSaneLock f h = H.checkSaneLock f h + `pidLockCheck` flip Pid.checkSaneLock h + +pidLockCheck :: IO a -> (LockFile -> IO a) -> Annex a +pidLockCheck posixcheck pidcheck = debugLocks $ + liftIO . maybe posixcheck pidcheck =<< pidLockFile + +pidLock :: Maybe ModeSetter -> LockFile -> LockMode -> IO LockHandle -> Annex LockHandle +pidLock m f lockmode posixlock = debugLocks $ go =<< pidLockFile + where + go Nothing = liftIO posixlock + go (Just pidlock) = do + timeout <- annexPidLockTimeout <$> Annex.getGitConfig + liftIO $ dummyPosixLock m f + Pid.waitLock f lockmode timeout pidlock (warning . UnquotedString) + +tryPidLock :: Maybe ModeSetter -> LockFile -> LockMode -> IO (Maybe LockHandle) -> Annex (Maybe LockHandle) +tryPidLock m f lockmode posixlock = debugLocks $ liftIO . go =<< pidLockFile + where + go Nothing = posixlock + go (Just pidlock) = do + dummyPosixLock m f + Pid.tryLock f lockmode pidlock + +-- The posix lock file is created even when using pid locks, in order to +-- avoid complicating any code that might expect to be able to see that +-- lock file. But, it's not locked. +dummyPosixLock :: Maybe ModeSetter -> LockFile -> IO () +dummyPosixLock m f = bracket (openLockFile ReadLock m f) closeFd (const noop) diff --git a/Annex/Magic.hs b/Annex/Magic.hs new file mode 100644 index 0000000000..c408cd50d0 --- /dev/null +++ b/Annex/Magic.hs @@ -0,0 +1,74 @@ +{- Interface to libmagic + - + - Copyright 2019-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Magic ( + Magic, + MimeType, + MimeEncoding, + initMagicMime, + getMagicMimeType, + getMagicMimeEncoding, +) where + +import Types.Mime +import Control.Monad.IO.Class +#ifdef WITH_MAGICMIME +import Magic +import Utility.Env +import Control.Concurrent +import System.IO.Unsafe (unsafePerformIO) +import Common +#else +type Magic = () +#endif + +initMagicMime :: IO (Maybe Magic) +#ifdef WITH_MAGICMIME +initMagicMime = catchMaybeIO $ do + m <- magicOpen [MagicMime] + liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case + Nothing -> magicLoadDefault m + Just d -> magicLoad m + (d "magic" "magic.mgc") + return m +#else +initMagicMime = return Nothing +#endif + +getMagicMime :: Magic -> FilePath -> IO (Maybe (MimeType, MimeEncoding)) +#ifdef WITH_MAGICMIME +getMagicMime m f = Just . parse <$> magicConcurrentSafe (magicFile m f) + where + parse s = + let (mimetype, rest) = separate (== ';') s + in case rest of + (' ':'c':'h':'a':'r':'s':'e':'t':'=':mimeencoding) -> + (mimetype, mimeencoding) + _ -> (mimetype, "") +#else +getMagicMime _ _ = return Nothing +#endif + +getMagicMimeType :: MonadIO m => Magic -> FilePath -> m (Maybe MimeType) +getMagicMimeType m f = liftIO $ fmap fst <$> getMagicMime m f + +getMagicMimeEncoding :: MonadIO m => Magic -> FilePath -> m(Maybe MimeEncoding) +getMagicMimeEncoding m f = liftIO $ fmap snd <$> getMagicMime m f + +#ifdef WITH_MAGICMIME +{-# NOINLINE mutex #-} +mutex :: MVar () +mutex = unsafePerformIO $ newMVar () + +-- Work around a bug, the library is not concurrency safe and will +-- sometimes access the wrong memory if multiple ones are called at the +-- same time. +magicConcurrentSafe :: IO a -> IO a +magicConcurrentSafe = bracket_ (takeMVar mutex) (putMVar mutex ()) +#endif diff --git a/Annex/MetaData.hs b/Annex/MetaData.hs new file mode 100644 index 0000000000..1eba836455 --- /dev/null +++ b/Annex/MetaData.hs @@ -0,0 +1,121 @@ +{- git-annex metadata + - + - Copyright 2014-2016 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.MetaData ( + genMetaData, + dateMetaData, + parseModMeta, + parseMetaDataMatcher, + module X +) where + +import Annex.Common +import qualified Annex +import Types.MetaData as X +import Annex.MetaData.StandardFields as X +import Logs.MetaData +import Annex.CatFile +import Utility.Glob + +import qualified Data.Set as S +import qualified Data.Text as T +import Data.Time.Calendar +import Data.Time.Clock +import Data.Time.Clock.POSIX +import Text.Read + +{- Adds metadata for a file that has just been ingested into the + - annex, but has not yet been committed to git. + - + - When the file has been modified, the metadata is copied over + - from the old key to the new key. Note that it looks at the old key as + - committed to HEAD -- the new key may or may not have already been staged + - in the index. + - + - Also, can generate new metadata, if configured to do so. + -} +genMetaData :: Key -> RawFilePath -> Maybe POSIXTime -> Annex () +genMetaData key file mmtime = do + catKeyFileHEAD file >>= \case + Nothing -> noop + Just oldkey -> + -- Have to copy first, before adding any + -- more metadata, because copyMetaData does not + -- preserve any metadata already on key. + whenM (copyMetaData oldkey key <&&> (not <$> onlydatemeta oldkey)) $ + warncopied + whenM (annexGenMetaData <$> Annex.getGitConfig) $ + case mmtime of + Just mtime -> do + old <- getCurrentMetaData key + addMetaData key $ + dateMetaData (posixSecondsToUTCTime mtime) old + Nothing -> noop + where + warncopied = warning $ UnquotedString $ + "Copied metadata from old version of " ++ fromRawFilePath file ++ " to new version. " ++ + "If you don't want this copied metadata, run: git annex metadata --remove-all " ++ fromRawFilePath file + -- If the only fields copied were date metadata, and they'll + -- be overwritten with the current mtime, no need to warn about + -- copying. + onlydatemeta oldkey = ifM (annexGenMetaData <$> Annex.getGitConfig) + ( null . filter (not . isDateMetaField . fst) . fromMetaData + <$> getCurrentMetaData oldkey + , return False + ) + +{- Generates metadata for a file's date stamp. + - + - Any date fields in the old metadata will be overwritten. + - + - Note that the returned MetaData does not contain all the input MetaData, + - only changes to add the date fields. -} +dateMetaData :: UTCTime -> MetaData -> MetaData +dateMetaData mtime old = modMeta old $ + (SetMeta yearMetaField $ S.singleton $ toMetaValue $ encodeBS $ show y) + `ComposeModMeta` + (SetMeta monthMetaField $ S.singleton $ toMetaValue $ encodeBS $ show m) + `ComposeModMeta` + (SetMeta dayMetaField $ S.singleton $ toMetaValue $ encodeBS $ show d) + where + (y, m, d) = toGregorian $ utctDay mtime + +{- Parses field=value, field+=value, field-=value, field?=value -} +parseModMeta :: String -> Either String ModMeta +parseModMeta p = case lastMaybe f of + Just '+' -> AddMeta <$> mkMetaField (T.pack f') <*> v + Just '-' -> DelMeta <$> mkMetaField (T.pack f') <*> (Just <$> v) + Just '?' -> MaybeSetMeta <$> mkMetaField (T.pack f') <*> v + _ -> SetMeta <$> mkMetaField (T.pack f) <*> (S.singleton <$> v) + where + (f, sv) = separate (== '=') p + f' = beginning f + v = pure (toMetaValue (encodeBS sv)) + +{- Parses field=value, fieldvalue, field>=value -} +parseMetaDataMatcher :: String -> Either String (MetaField, MetaValue -> Bool) +parseMetaDataMatcher p = (,) + <$> mkMetaField (T.pack f) + <*> pure matcher + where + (f, op_v) = break (`elem` "=<>") p + matcher = case op_v of + ('=':v) -> checkglob v + ('<':'=':v) -> checkcmp (<=) (<=) v + ('<':v) -> checkcmp (<) (<) v + ('>':'=':v) -> checkcmp (>=) (>=) v + ('>':v) -> checkcmp (>) (>) v + _ -> checkglob "" + checkglob v = + let cglob = compileGlob v CaseInsensitive (GlobFilePath False) + in matchGlob cglob . decodeBS . fromMetaValue + checkcmp cmp cmp' v mv' = + let v' = decodeBS (fromMetaValue mv') + in case (doubleval v, doubleval v') of + (Just d, Just d') -> d' `cmp` d + _ -> v' `cmp'` v + doubleval v = readMaybe v :: Maybe Double diff --git a/Annex/MetaData/StandardFields.hs b/Annex/MetaData/StandardFields.hs new file mode 100644 index 0000000000..061133b41c --- /dev/null +++ b/Annex/MetaData/StandardFields.hs @@ -0,0 +1,67 @@ +{- git-annex metadata, standard fields + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.MetaData.StandardFields ( + tagMetaField, + yearMetaField, + monthMetaField, + dayMetaField, + isDateMetaField, + lastChangedField, + mkLastChangedField, + isLastChangedField, + itemIdField +) where + +import Types.MetaData + +import qualified Data.Text as T +import Data.Monoid +import Prelude + +tagMetaField :: MetaField +tagMetaField = mkMetaFieldUnchecked "tag" + +yearMetaField :: MetaField +yearMetaField = mkMetaFieldUnchecked "year" + +monthMetaField :: MetaField +monthMetaField = mkMetaFieldUnchecked "month" + +dayMetaField :: MetaField +dayMetaField = mkMetaFieldUnchecked "day" + +isDateMetaField :: MetaField -> Bool +isDateMetaField f + | f == yearMetaField = True + | f == monthMetaField = True + | f == dayMetaField = True + | otherwise = False + +lastChangedField :: MetaField +lastChangedField = mkMetaFieldUnchecked lastchanged + +mkLastChangedField :: MetaField -> MetaField +mkLastChangedField f = mkMetaFieldUnchecked (fromMetaField f <> lastchangedSuffix) + +isLastChangedField :: MetaField -> Bool +isLastChangedField f + | f == lastChangedField = True + | otherwise = lastchanged `T.isSuffixOf` s && s /= lastchangedSuffix + where + s = fromMetaField f + +lastchanged :: T.Text +lastchanged = "lastchanged" + +lastchangedSuffix :: T.Text +lastchangedSuffix = "-lastchanged" + +itemIdField :: MetaField +itemIdField = mkMetaFieldUnchecked "itemid" diff --git a/Annex/Multicast.hs b/Annex/Multicast.hs new file mode 100644 index 0000000000..1443de776c --- /dev/null +++ b/Annex/Multicast.hs @@ -0,0 +1,44 @@ +{- git-annex multicast receive callback + - + - Copyright 2017 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Multicast where + +import Annex.Path +import Utility.Env +import Utility.PartialPrelude + +import System.Process +import System.IO +import GHC.IO.Handle.FD +import Control.Applicative +import Prelude + +multicastReceiveEnv :: String +multicastReceiveEnv = "GIT_ANNEX_MULTICAST_RECEIVE" + +multicastCallbackEnv :: IO (FilePath, [(String, String)], Handle) +multicastCallbackEnv = do + gitannex <- programPath + -- This will even work on Windows + (rfd, wfd) <- createPipeFd + rh <- fdToHandle rfd + environ <- addEntry multicastReceiveEnv (show wfd) <$> getEnvironment + return (gitannex, environ, rh) + +-- This is run when uftpd has received a file. Rather than move +-- the file into the annex here, which would require starting up the +-- Annex monad, parsing git config, and verifying the content, simply +-- output to the specified FD the filename. This keeps the time +-- that uftpd is not receiving the next file as short as possible. +runMulticastReceive :: [String] -> String -> IO () +runMulticastReceive ("-I":_sessionid:fs) hs = case readish hs of + Just fd -> do + h <- fdToHandle fd + mapM_ (hPutStrLn h) fs + hClose h + Nothing -> return () +runMulticastReceive _ _ = return () diff --git a/Annex/Notification.hs b/Annex/Notification.hs new file mode 100644 index 0000000000..f205797359 --- /dev/null +++ b/Annex/Notification.hs @@ -0,0 +1,108 @@ +{- git-annex desktop notifications + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-} +{-# LANGUAGE CPP #-} + +module Annex.Notification (NotifyWitness, noNotification, notifyTransfer, notifyDrop) where + +import Annex.Common +import Types.Transfer +#ifdef WITH_DBUS_NOTIFICATIONS +import qualified Annex +import Types.DesktopNotify +import qualified DBus.Notify as Notify +import qualified DBus.Client +#endif + +-- Witness that notification has happened. +data NotifyWitness = NotifyWitness + +-- Only use when no notification should be done. +noNotification :: NotifyWitness +noNotification = NotifyWitness + +{- Wrap around an action that performs a transfer, which may run multiple + - attempts. Displays notification when supported and when the user asked + - for it. -} +notifyTransfer :: Transferrable t => Observable v => Direction -> t -> (NotifyWitness -> Annex v) -> Annex v +#ifdef WITH_DBUS_NOTIFICATIONS +notifyTransfer direction t a = case descTransfrerrable t of + Nothing -> a NotifyWitness + Just desc -> do + wanted <- Annex.getRead Annex.desktopnotify + if (notifyStart wanted || notifyFinish wanted) + then do + client <- liftIO DBus.Client.connectSession + startnotification <- liftIO $ if notifyStart wanted + then Just <$> Notify.notify client (startedTransferNote direction desc) + else pure Nothing + res <- a NotifyWitness + let ok = observeBool res + when (notifyFinish wanted) $ liftIO $ void $ maybe + (Notify.notify client $ finishedTransferNote ok direction desc) + (\n -> Notify.replace client n $ finishedTransferNote ok direction desc) + startnotification + return res + else a NotifyWitness +#else +notifyTransfer _ _ a = a NotifyWitness +#endif + +notifyDrop :: AssociatedFile -> Bool -> Annex () +notifyDrop (AssociatedFile Nothing) _ = noop +#ifdef WITH_DBUS_NOTIFICATIONS +notifyDrop (AssociatedFile (Just f)) ok = do + wanted <- Annex.getRead Annex.desktopnotify + when (notifyFinish wanted) $ liftIO $ do + client <- DBus.Client.connectSession + void $ Notify.notify client (droppedNote ok (fromRawFilePath f)) +#else +notifyDrop (AssociatedFile (Just _)) _ = noop +#endif + +#ifdef WITH_DBUS_NOTIFICATIONS +startedTransferNote :: Direction -> String -> Notify.Note +startedTransferNote Upload = mkNote Notify.Transfer Notify.Low iconUpload + "Uploading" +startedTransferNote Download = mkNote Notify.Transfer Notify.Low iconDownload + "Downloading" + +finishedTransferNote :: Bool -> Direction -> String -> Notify.Note +finishedTransferNote False Upload = mkNote Notify.TransferError Notify.Normal iconFailure + "Failed to upload" +finishedTransferNote False Download = mkNote Notify.TransferError Notify.Normal iconFailure + "Failed to download" +finishedTransferNote True Upload = mkNote Notify.TransferComplete Notify.Low iconSuccess + "Finished uploading" +finishedTransferNote True Download = mkNote Notify.TransferComplete Notify.Low iconSuccess + "Finished downloading" + +droppedNote :: Bool -> String -> Notify.Note +droppedNote False = mkNote Notify.TransferError Notify.Normal iconFailure + "Failed to drop" +droppedNote True = mkNote Notify.TransferComplete Notify.Low iconSuccess + "Dropped" + +iconUpload, iconDownload, iconFailure, iconSuccess :: String +iconUpload = "network-transmit" +iconDownload = "network-receive" +iconFailure = "dialog-error" +iconSuccess = "git-annex" -- Is there a standard icon for success/completion? + +mkNote :: Notify.Category -> Notify.UrgencyLevel -> String -> String -> FilePath -> Notify.Note +mkNote category urgency icon desc path = Notify.blankNote + { Notify.appName = "git-annex" + , Notify.appImage = Just (Notify.Icon icon) + , Notify.summary = desc ++ " " ++ path + , Notify.hints = + [ Notify.Category category + , Notify.Urgency urgency + , Notify.SuppressSound True + ] + } +#endif diff --git a/Annex/NumCopies.hs b/Annex/NumCopies.hs new file mode 100644 index 0000000000..6ec339cae8 --- /dev/null +++ b/Annex/NumCopies.hs @@ -0,0 +1,406 @@ +{- git-annex numcopies configuration and checking + - + - Copyright 2014-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE ScopedTypeVariables, DeriveDataTypeable, OverloadedStrings #-} + +module Annex.NumCopies ( + module Types.NumCopies, + module Logs.NumCopies, + getFileNumMinCopies, + getSafestNumMinCopies, + getSafestNumMinCopies', + getGlobalFileNumCopies, + getNumCopies, + getMinCopies, + deprecatedNumCopies, + defaultNumCopies, + numCopiesCheck, + numCopiesCheck', + numCopiesCheck'', + numCopiesCount, + verifyEnoughCopiesToDrop, + verifiableCopies, + UnVerifiedCopy(..), +) where + +import Annex.Common +import qualified Annex +import Annex.SafeDropProof +import Types.NumCopies +import Logs.NumCopies +import Logs.Trust +import Logs.Cluster +import Annex.CheckAttr +import qualified Remote +import qualified Types.Remote as Remote +import Annex.Content +import Annex.UUID +import Annex.CatFile +import qualified Database.Keys + +import Control.Exception +import qualified Control.Monad.Catch as MC +import Data.Typeable +import qualified Data.Set as S +import qualified Data.Map as M + +defaultNumCopies :: NumCopies +defaultNumCopies = configuredNumCopies 1 + +defaultMinCopies :: MinCopies +defaultMinCopies = configuredMinCopies 1 + +fromSourcesOr :: v -> [Annex (Maybe v)] -> Annex v +fromSourcesOr v = fromMaybe v <$$> getM id + +{- The git config annex.numcopies is deprecated. -} +deprecatedNumCopies :: Annex (Maybe NumCopies) +deprecatedNumCopies = annexNumCopies <$> Annex.getGitConfig + +{- Value forced on the command line by --numcopies. -} +getForcedNumCopies :: Annex (Maybe NumCopies) +getForcedNumCopies = Annex.getRead Annex.forcenumcopies + +{- Value forced on the command line by --mincopies. -} +getForcedMinCopies :: Annex (Maybe MinCopies) +getForcedMinCopies = Annex.getRead Annex.forcemincopies + +{- NumCopies value from any of the non-.gitattributes configuration + - sources. -} +getNumCopies :: Annex NumCopies +getNumCopies = fromSourcesOr defaultNumCopies + [ getForcedNumCopies + , getGlobalNumCopies + , deprecatedNumCopies + ] + +{- MinCopies value from any of the non-.gitattributes configuration + - sources. -} +getMinCopies :: Annex MinCopies +getMinCopies = fromSourcesOr defaultMinCopies + [ getForcedMinCopies + , getGlobalMinCopies + ] + +{- NumCopies and MinCopies value for a file, from any configuration source, + - including .gitattributes. -} +getFileNumMinCopies :: RawFilePath -> Annex (NumCopies, MinCopies) +getFileNumMinCopies f = do + fnumc <- getForcedNumCopies + fminc <- getForcedMinCopies + case (fnumc, fminc) of + (Just numc, Just minc) -> return (numc, minc) + (Just numc, Nothing) -> do + minc <- fromSourcesOr defaultMinCopies + [ snd <$> getNumMinCopiesAttr f + , getGlobalMinCopies + ] + return (numc, minc) + (Nothing, Just minc) -> do + numc <- fromSourcesOr defaultNumCopies + [ fst <$> getNumMinCopiesAttr f + , getGlobalNumCopies + , deprecatedNumCopies + ] + return (numc, minc) + (Nothing, Nothing) -> do + let fallbacknum = fromSourcesOr defaultNumCopies + [ getGlobalNumCopies + , deprecatedNumCopies + ] + let fallbackmin = fromSourcesOr defaultMinCopies + [ getGlobalMinCopies + ] + getNumMinCopiesAttr f >>= \case + (Just numc, Just minc) -> + return (numc, minc) + (Just numc, Nothing) -> (,) + <$> pure numc + <*> fallbackmin + (Nothing, Just minc) -> (,) + <$> fallbacknum + <*> pure minc + (Nothing, Nothing) -> (,) + <$> fallbacknum + <*> fallbackmin + +{- Gets the highest NumCopies and MinCopies value for all files + - associated with a key. Provide any known associated file; + - the rest are looked up from the database. + - + - Using this when dropping, rather than getFileNumMinCopies + - avoids dropping one file that has a smaller value violating + - the value set for another file that uses the same content. + -} +getSafestNumMinCopies :: AssociatedFile -> Key -> Annex (NumCopies, MinCopies) +getSafestNumMinCopies afile k = + Database.Keys.getAssociatedFilesIncluding afile k + >>= getSafestNumMinCopies' afile k + +getSafestNumMinCopies' :: AssociatedFile -> Key -> [RawFilePath] -> Annex (NumCopies, MinCopies) +getSafestNumMinCopies' afile k fs = do + l <- mapM getFileNumMinCopies fs + let l' = zip l fs + (,) + <$> findmax fst l' getNumCopies + <*> findmax snd l' getMinCopies + where + -- Some associated files in the keys database may no longer + -- correspond to files in the repository. + -- (But the AssociatedFile passed to this is known to be + -- an associated file, which may not be in the keys database + -- yet, so checking it is skipped.) + stillassociated f + | AssociatedFile (Just f) == afile = return True + | otherwise = catKeyFile f >>= \case + Just k' | k' == k -> return True + _ -> return False + + -- Avoid calling stillassociated on every file; just make sure + -- that the one with the highest value is still associated. + findmax _ [] fallback = fallback + findmax getv l fallback = do + let n = maximum (map (getv . fst) l) + let (maxls, l') = partition (\(x, _) -> getv x == n) l + ifM (anyM stillassociated (map snd maxls)) + ( return n + , findmax getv l' fallback + ) + +{- This is the globally visible numcopies value for a file. So it does + - not include local configuration in the git config or command line + - options. -} +getGlobalFileNumCopies :: RawFilePath -> Annex NumCopies +getGlobalFileNumCopies f = fromSourcesOr defaultNumCopies + [ fst <$> getNumMinCopiesAttr f + , getGlobalNumCopies + ] + +getNumMinCopiesAttr :: RawFilePath -> Annex (Maybe NumCopies, Maybe MinCopies) +getNumMinCopiesAttr file = + checkAttrs ["annex.numcopies", "annex.mincopies"] file >>= \case + (n:m:[]) -> return + ( configuredNumCopies <$> readish n + , configuredMinCopies <$> readish m + ) + _ -> error "internal" + +{- Checks if numcopies are satisfied for a file by running a comparison + - between the number of (not untrusted) copies that are + - believed to exist, and the configured value. + - + - This is good enough for everything except dropping the file, which + - requires active verification of the copies. + -} +numCopiesCheck :: RawFilePath -> Key -> (Int -> Int -> v) -> Annex v +numCopiesCheck file key vs = do + have <- trustExclude UnTrusted =<< Remote.keyLocations key + numCopiesCheck' file vs have + +numCopiesCheck' :: RawFilePath -> (Int -> Int -> v) -> [UUID] -> Annex v +numCopiesCheck' file vs have = do + needed <- fst <$> getFileNumMinCopies file + let nhave = numCopiesCount have + explain (ActionItemTreeFile file) $ Just $ UnquotedString $ + "has " ++ show nhave ++ " " ++ pluralCopies nhave ++ + ", and the configured annex.numcopies is " ++ show needed + return $ numCopiesCheck'' have vs needed + +numCopiesCheck'' :: [UUID] -> (Int -> Int -> v) -> NumCopies -> v +numCopiesCheck'' have vs needed = + let nhave = numCopiesCount have + in nhave `vs` fromNumCopies needed + +{- When a key is logged as present in a node of the cluster, + - the cluster's UUID will also be in the list, but is not a + - distinct copy. + -} +numCopiesCount :: [UUID] -> Int +numCopiesCount = length . filter (not . isClusterUUID) + +data UnVerifiedCopy = UnVerifiedRemote Remote | UnVerifiedHere + deriving (Ord, Eq) + +{- Verifies that enough copies of a key exist among the listed remotes, + - to safely drop it, running an action with a proof if so, and + - printing an informative message if not. + - + - Note that the proof is checked to still be valid at the current time + - before running the action, but when dropping the key may take some time, + - the proof's time may need to be checked again. + -} +verifyEnoughCopiesToDrop + :: String -- message to print when there are no known locations + -> Key + -> Maybe UUID -- repo dropping from + -> Maybe ContentRemovalLock + -> NumCopies + -> MinCopies + -> [UUID] -- repos to skip considering (generally untrusted remotes) + -> [VerifiedCopy] -- copies already verified to exist + -> [UnVerifiedCopy] -- places to check to see if they have copies + -> (SafeDropProof -> Annex a) -- action to perform the drop + -> Annex a -- action to perform when unable to drop + -> Annex a +verifyEnoughCopiesToDrop nolocmsg key dropfrom removallock neednum needmin skip preverified tocheck dropaction nodropaction = + helper [] [] preverified (nub tocheck) [] + where + helper bad missing have [] lockunsupported = + liftIO (mkSafeDropProof neednum needmin have removallock) >>= \case + Right proof -> checkprooftime proof + Left stillhave -> do + notEnoughCopies key dropfrom neednum needmin stillhave (skip++missing) bad nolocmsg lockunsupported + nodropaction + helper bad missing have (c:cs) lockunsupported + | isSafeDrop neednum needmin have removallock = + liftIO (mkSafeDropProof neednum needmin have removallock) >>= \case + Right proof -> checkprooftime proof + Left stillhave -> helper bad missing stillhave (c:cs) lockunsupported + | otherwise = case c of + UnVerifiedHere -> lockContentShared key Nothing contverified + UnVerifiedRemote r + -- Skip cluster uuids because locking is + -- not supported with them, instead will + -- lock individual nodes. + | isClusterUUID (Remote.uuid r) -> helper bad missing have cs lockunsupported + | otherwise -> checkremote r contverified $ + let lockunsupported' = r : lockunsupported + in Remote.hasKey r key >>= \case + Right True -> helper bad missing (mkVerifiedCopy RecentlyVerifiedCopy r : have) cs lockunsupported' + Left _ -> helper (r:bad) missing have cs lockunsupported' + Right False -> helper bad (Remote.uuid r:missing) have cs lockunsupported' + where + contverified vc = helper bad missing (vc : have) cs lockunsupported + + checkremote r cont fallback = case Remote.lockContent r of + Just lockcontent -> do + -- The remote's lockContent will throw an exception + -- when it is unable to lock, in which case the + -- fallback should be run. + -- + -- On the other hand, the continuation could itself + -- throw an exception (ie, the eventual drop action + -- fails), and in this case we don't want to run the + -- fallback since part of the drop action may have + -- already been performed. + -- + -- Differentiate between these two sorts + -- of exceptions by using DropException. + let a = lockcontent key $ \v -> + cont v `catchNonAsync` (throw . DropException) + a `MC.catches` + [ MC.Handler (\ (e :: AsyncException) -> throwM e) + , MC.Handler (\ (e :: SomeAsyncException) -> throwM e) + , MC.Handler (\ (DropException e') -> throwM e') + , MC.Handler (\ (_e :: SomeException) -> fallback) + ] + Nothing -> fallback + + checkprooftime proof = + ifM (liftIO $ checkSafeDropProofEndTime (Just proof)) + ( dropaction proof + , do + safeDropProofExpired + nodropaction + ) + +data DropException = DropException SomeException + deriving (Typeable, Show) + +instance Exception DropException + +notEnoughCopies :: Key -> Maybe UUID -> NumCopies -> MinCopies -> [VerifiedCopy] -> [UUID] -> [Remote] -> String -> [Remote] -> Annex () +notEnoughCopies key dropfrom neednum needmin have skip bad nolocmsg lockunsupported = do + showNote "unsafe" + if length have < fromNumCopies neednum + then showLongNote $ UnquotedString $ + if fromNumCopies neednum == 1 + then "Could not verify the existence of the 1 necessary copy." + else "Could only verify the existence of " ++ + show (length have) ++ " out of " ++ show (fromNumCopies neednum) ++ + " necessary " ++ pluralCopies (fromNumCopies neednum) ++ "." + else do + showLongNote $ UnquotedString $ "Unable to lock down " ++ show (fromMinCopies needmin) ++ + " " ++ pluralCopies (fromMinCopies needmin) ++ + " of file necessary to safely drop it." + if null lockunsupported + then showLongNote "(This could have happened because of a concurrent drop, or because a remote has too old a version of git-annex-shell installed.)" + else showLongNote $ UnquotedString $ "These remotes do not support locking: " + ++ Remote.listRemoteNames lockunsupported + + Remote.showTriedRemotes bad + -- When dropping from a cluster, don't suggest making the nodes of + -- the cluster available + clusternodes <- case mkClusterUUID =<< dropfrom of + Nothing -> pure [] + Just cu -> do + clusters <- getClusters + pure $ maybe [] (map fromClusterNodeUUID . S.toList) $ + M.lookup cu (clusterUUIDs clusters) + let excludeset = S.fromList $ map toUUID have++skip++clusternodes + -- Don't suggest making a cluster available when dropping from its + -- node. + let exclude u + | u `S.member` excludeset = pure True + | otherwise = case (dropfrom, mkClusterUUID u) of + (Just dropfrom', Just cu) -> do + clusters <- getClusters + pure $ case M.lookup cu (clusterUUIDs clusters) of + Just nodes -> + ClusterNodeUUID dropfrom' + `S.member` nodes + Nothing -> False + _ -> pure False + Remote.showLocations True key exclude nolocmsg + +pluralCopies :: Int -> String +pluralCopies 1 = "copy" +pluralCopies _ = "copies" + +{- Finds locations of a key that can be used to get VerifiedCopies, + - in order to allow dropping the key. + - + - Provide a list of UUIDs that the key is being dropped from. + - The returned lists will exclude any of those UUIDs. + - + - The return lists also exclude any repositories that are untrusted, + - since those should not be used for verification. + - + - When dropping from a cluster UUID, its nodes are excluded. + - + - Cluster UUIDs are also excluded since locking a key on a cluster + - is done by locking on individual nodes. + - + - The UnVerifiedCopy list is cost ordered. + - The VerifiedCopy list contains repositories that are trusted to + - contain the key. + -} +verifiableCopies :: Key -> [UUID] -> Annex ([UnVerifiedCopy], [VerifiedCopy]) +verifiableCopies key exclude = do + locs <- filter (not . isClusterUUID) <$> Remote.keyLocations key + (remotes, trusteduuids) <- Remote.remoteLocations (Remote.IncludeIgnored False) locs + =<< trustGet Trusted + clusternodes <- if any isClusterUUID exclude + then do + clusters <- getClusters + pure $ concatMap (getclusternodes clusters) exclude + else pure [] + untrusteduuids <- trustGet UnTrusted + let exclude' = exclude ++ untrusteduuids ++ clusternodes + let remotes' = Remote.remotesWithoutUUID remotes (exclude' ++ trusteduuids) + let verified = map (mkVerifiedCopy TrustedCopy) $ + filter (`notElem` exclude') trusteduuids + u <- getUUID + let herec = if u `elem` locs && u `notElem` exclude' + then [UnVerifiedHere] + else [] + return (herec ++ map UnVerifiedRemote remotes', verified) + where + getclusternodes clusters u = case mkClusterUUID u of + Just cu -> maybe [] (map fromClusterNodeUUID . S.toList) $ + M.lookup cu (clusterUUIDs clusters) + Nothing -> [] diff --git a/Annex/Path.hs b/Annex/Path.hs new file mode 100644 index 0000000000..c131ddba0f --- /dev/null +++ b/Annex/Path.hs @@ -0,0 +1,129 @@ +{- git-annex program path + - + - Copyright 2013-2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Path ( + programPath, + readProgramFile, + gitAnnexChildProcess, + gitAnnexChildProcessParams, + gitAnnexDaemonizeParams, + cleanStandaloneEnvironment, +) where + +import Annex.Common +import Config.Files +import Utility.Env +import Annex.PidLock +import qualified Annex + +import System.Environment (getExecutablePath, getArgs, getProgName) + +{- A fully qualified path to the currently running git-annex program. + - + - getExecutablePath is used when possible. On OSs it supports + - well, it returns the complete path to the program. But, on other OSs, + - it might return just the basename. Fall back to reading the programFile, + - or searching for the command name in PATH. + - + - The standalone build runs git-annex via ld.so, and defeats + - getExecutablePath. It sets GIT_ANNEX_DIR to the location of the + - standalone build directory, and there are wrapper scripts for git-annex + - and git-annex-shell in that directory. + -} +programPath :: IO FilePath +programPath = go =<< getEnv "GIT_ANNEX_DIR" + where + go (Just dir) = do + name <- getProgName + return (dir name) + go Nothing = do + exe <- getExecutablePath + p <- if isAbsolute exe + then return exe + else fromMaybe exe <$> readProgramFile + maybe cannotFindProgram return =<< searchPath p + +{- Returns the path for git-annex that is recorded in the programFile. -} +readProgramFile :: IO (Maybe FilePath) +readProgramFile = do + programfile <- programFile + headMaybe . lines <$> readFile programfile + +cannotFindProgram :: IO a +cannotFindProgram = do + f <- programFile + giveup $ "cannot find git-annex program in PATH or in " ++ f + +{- Runs a git-annex child process. + - + - Like runsGitAnnexChildProcessViaGit, when pid locking is in use, + - this takes the pid lock, while running it, and sets an env var + - that prevents the child process trying to take the pid lock, + - to avoid it deadlocking. + -} +gitAnnexChildProcess + :: String + -> [CommandParam] + -> (CreateProcess -> CreateProcess) + -> (Maybe Handle -> Maybe Handle -> Maybe Handle -> ProcessHandle -> IO a) + -> Annex a +gitAnnexChildProcess subcmd ps f a = do + cmd <- liftIO programPath + ps' <- gitAnnexChildProcessParams subcmd ps + pidLockChildProcess cmd ps' f a + +{- Parameters to pass to a git-annex child process to run a subcommand + - with some parameters. + - + - Includes -c values that were passed on the git-annex command line + - or due to options like --debug being enabled. + -} +gitAnnexChildProcessParams :: String -> [CommandParam] -> Annex [CommandParam] +gitAnnexChildProcessParams subcmd ps = do + cps <- gitAnnexGitConfigOverrides + force <- Annex.getRead Annex.force + let cps' = if force + then Param "--force" : cps + else cps + return (Param subcmd : cps' ++ ps) + +gitAnnexGitConfigOverrides :: Annex [CommandParam] +gitAnnexGitConfigOverrides = concatMap (\c -> [Param "-c", Param c]) + <$> Annex.getGitConfigOverrides + +{- Parameters to pass to git-annex when re-running the current command + - to daemonize it. Used with Utility.Daemon.daemonize. -} +gitAnnexDaemonizeParams :: Annex [CommandParam] +gitAnnexDaemonizeParams = do + -- This includes -c parameters passed to git, as well as ones + -- passed to git-annex. + cps <- gitAnnexGitConfigOverrides + -- Get every parameter git-annex was run with. + ps <- liftIO getArgs + return (map Param ps ++ cps) + +{- Returns a cleaned up environment that lacks path and other settings + - used to make the standalone builds use their bundled libraries and programs. + - Useful when calling programs not included in the standalone builds. + - + - For a non-standalone build, returns Nothing. + -} +cleanStandaloneEnvironment :: IO (Maybe [(String, String)]) +cleanStandaloneEnvironment = clean <$> getEnvironment + where + clean environ + | null vars = Nothing + | otherwise = Just $ catMaybes $ map (restoreorig environ) environ + where + vars = words $ fromMaybe "" $ + lookup "GIT_ANNEX_STANDLONE_ENV" environ + restoreorig oldenviron p@(k, _v) + | k `elem` vars = case lookup ("ORIG_" ++ k) oldenviron of + (Just v') + | not (null v') -> Just (k, v') + _ -> Nothing + | otherwise = Just p diff --git a/Annex/Perms.hs b/Annex/Perms.hs new file mode 100644 index 0000000000..83b4f73130 --- /dev/null +++ b/Annex/Perms.hs @@ -0,0 +1,374 @@ +{- git-annex file permissions + - + - Copyright 2012-2023 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Perms ( + FileMode, + setAnnexFilePerm, + setAnnexDirPerm, + resetAnnexFilePerm, + annexFileMode, + createAnnexDirectory, + createWorkTreeDirectory, + freezeContent, + freezeContent', + freezeContent'', + checkContentWritePerm, + checkContentWritePerm', + thawContent, + thawContent', + createContentDir, + freezeContentDir, + thawContentDir, + modifyContentDir, + modifyContentDirWhenExists, + withShared, + hasFreezeHook, + hasThawHook, +) where + +import Annex.Common +import Utility.FileMode +import Git +import Git.ConfigTypes +import qualified Annex +import Annex.Version +import Types.RepoVersion +import Config +import Utility.Directory.Create +import qualified Utility.RawFilePath as R + +import System.PosixCompat.Files (fileMode, intersectFileModes, nullFileMode, groupWriteMode, ownerWriteMode, ownerReadMode, groupReadMode, otherReadMode, stdFileMode, ownerExecuteMode, groupExecuteMode, otherExecuteMode, setGroupIDMode) + +withShared :: (SharedRepository -> Annex a) -> Annex a +withShared a = a =<< coreSharedRepository <$> Annex.getGitConfig + +setAnnexFilePerm :: RawFilePath -> Annex () +setAnnexFilePerm = setAnnexPerm False + +setAnnexDirPerm :: RawFilePath -> Annex () +setAnnexDirPerm = setAnnexPerm True + +{- Sets appropriate file mode for a file or directory in the annex, + - other than the content files and content directory. Normally, + - don't change the mode, but with core.sharedRepository set, + - allow the group to write, etc. -} +setAnnexPerm :: Bool -> RawFilePath -> Annex () +setAnnexPerm isdir file = setAnnexPerm' Nothing isdir >>= \go -> liftIO (go file) + +setAnnexPerm' :: Maybe ([FileMode] -> FileMode -> FileMode) -> Bool -> Annex (RawFilePath -> IO ()) +setAnnexPerm' modef isdir = ifM crippledFileSystem + ( return (const noop) + , withShared $ \s -> return $ \file -> go s file + ) + where + go GroupShared file = void $ tryIO $ modifyFileMode file $ modef' $ + groupSharedModes ++ + if isdir then [ ownerExecuteMode, groupExecuteMode ] else [] + go AllShared file = void $ tryIO $ modifyFileMode file $ modef' $ + readModes ++ + [ ownerWriteMode, groupWriteMode ] ++ + if isdir then executeModes else [] + go UnShared file = case modef of + Nothing -> noop + Just f -> void $ tryIO $ + modifyFileMode file $ f [] + go (UmaskShared n) file = void $ tryIO $ R.setFileMode file $ + if isdir then umaskSharedDirectory n else n + modef' = fromMaybe addModes modef + +resetAnnexFilePerm :: RawFilePath -> Annex () +resetAnnexFilePerm = resetAnnexPerm False + +{- Like setAnnexPerm, but ignores the current mode of the file entirely, + - and sets the same mode that the umask would result in when creating a + - new file. + - + - Useful eg, after creating a temporary file with locked down modes, + - which is going to be moved to a non-temporary location and needs + - usual modes. + -} +resetAnnexPerm :: Bool -> RawFilePath -> Annex () +resetAnnexPerm isdir file = unlessM crippledFileSystem $ do + defmode <- liftIO defaultFileMode + let modef moremodes _oldmode = addModes moremodes defmode + setAnnexPerm' (Just modef) isdir >>= \go -> liftIO (go file) + +{- Creates a ModeSetter which can be used for creating a file in the annex + - (other than content files, which are locked down more). -} +annexFileMode :: Annex ModeSetter +annexFileMode = do + modesetter <- setAnnexPerm' Nothing False + withShared (\s -> pure $ mk s modesetter) + where + mk GroupShared = ModeSetter stdFileMode + mk AllShared = ModeSetter stdFileMode + mk UnShared = ModeSetter stdFileMode + mk (UmaskShared mode) = ModeSetter mode + +{- Creates a directory inside the gitAnnexDir (or possibly the dbdir), + - creating any parent directories up to and including the gitAnnexDir. + - Makes directories with appropriate permissions. -} +createAnnexDirectory :: RawFilePath -> Annex () +createAnnexDirectory dir = do + top <- parentDir <$> fromRepo gitAnnexDir + tops <- annexDbDir <$> Annex.getGitConfig >>= return . \case + Nothing -> [top] + Just dbdir -> [top, parentDir (parentDir dbdir)] + createDirectoryUnder' tops dir createdir + where + createdir p = do + liftIO $ R.createDirectory p + setAnnexDirPerm p + +{- Create a directory in the git work tree, creating any parent + - directories up to the top of the work tree. + - + - Uses default permissions. + -} +createWorkTreeDirectory :: RawFilePath -> Annex () +createWorkTreeDirectory dir = do + fromRepo repoWorkTree >>= liftIO . \case + Just wt -> createDirectoryUnder [wt] dir + -- Should never happen, but let whatever tries to write + -- to the directory be what throws an exception, as that + -- will be clearer than an exception from here. + Nothing -> noop + +{- Normally, blocks writing to an annexed file, and modifies file + - permissions to allow reading it. + - + - Before v9, when core.sharedRepository is set, the write bits are not + - removed from the file, but instead the appropriate group write bits + - are set. This is necessary to let other users in the group lock the file. + - v9 improved this by using separate lock files, so the content file does + - not need to be writable when using it. + - + - In a shared repository, the current user may not be able to change + - a file owned by another user, so failure to change modes is ignored. + - + - Note that, on Linux, xattrs can sometimes prevent removing + - certain permissions from a file with chmod. (Maybe some ACLs too?) + - In such a case, this will return with the file still having some mode + - it should not normally have. checkContentWritePerm can detect when + - that happens with write permissions. + -} +freezeContent :: RawFilePath -> Annex () +freezeContent file = + withShared $ \sr -> freezeContent' sr file + +freezeContent' :: SharedRepository -> RawFilePath -> Annex () +freezeContent' sr file = freezeContent'' sr file =<< getVersion + +freezeContent'' :: SharedRepository -> RawFilePath -> Maybe RepoVersion -> Annex () +freezeContent'' sr file rv = do + fastDebug "Annex.Perms" ("freezing content " ++ fromRawFilePath file) + unlessM crippledFileSystem $ go sr + freezeHook file + where + go UnShared = liftIO $ nowriteadd [ownerReadMode] + go GroupShared = if versionNeedsWritableContentFiles rv + then liftIO $ ignoresharederr $ modmode $ addModes + [ownerReadMode, groupReadMode, ownerWriteMode, groupWriteMode] + else liftIO $ ignoresharederr $ + nowriteadd [ownerReadMode, groupReadMode] + go AllShared = if versionNeedsWritableContentFiles rv + then liftIO $ ignoresharederr $ modmode $ addModes + (readModes ++ writeModes) + else liftIO $ ignoresharederr $ + nowriteadd readModes + go (UmaskShared n) = if versionNeedsWritableContentFiles rv + -- Assume that the configured mode includes write bits + -- for all users who should be able to lock the file, so + -- don't need to add any write modes. + then liftIO $ ignoresharederr $ modmode $ const n + else liftIO $ ignoresharederr $ modmode $ const $ + removeModes writeModes n + + ignoresharederr = void . tryIO + + modmode = modifyFileMode file + + nowriteadd readmodes = modmode $ + removeModes writeModes . + addModes readmodes + +{- Checks if the write permissions are as freezeContent should set them. + - + - When the repository is shared, the user may not be able to change + - permissions of a file owned by another user. So if the permissions seem + - wrong, but the repository is shared, returns Nothing. If the permissions + - are wrong otherwise, returns Just False. + - + - When there is a freeze hook, it may prevent write in some way other than + - permissions. One use of a freeze hook is when the filesystem does not + - support removing write permissions, so when there is such a hook + - write permissions are ignored. + -} +checkContentWritePerm :: RawFilePath -> Annex (Maybe Bool) +checkContentWritePerm file = ifM crippledFileSystem + ( return (Just True) + , do + rv <- getVersion + hasfreezehook <- hasFreezeHook + withShared $ \sr -> + liftIO $ checkContentWritePerm' sr file rv hasfreezehook + ) + +checkContentWritePerm' :: SharedRepository -> RawFilePath -> Maybe RepoVersion -> Bool -> IO (Maybe Bool) +checkContentWritePerm' sr file rv hasfreezehook + | hasfreezehook = return (Just True) + | otherwise = case sr of + UnShared -> want Just (excludemodes writeModes) + GroupShared + | versionNeedsWritableContentFiles rv -> want sharedret + (includemodes [ownerWriteMode, groupWriteMode]) + | otherwise -> want sharedret (excludemodes writeModes) + AllShared + | versionNeedsWritableContentFiles rv -> + want sharedret (includemodes writeModes) + | otherwise -> want sharedret (excludemodes writeModes) + UmaskShared n + | versionNeedsWritableContentFiles rv -> want sharedret + (\havemode -> havemode == n) + | otherwise -> want sharedret + (\havemode -> havemode == removeModes writeModes n) + where + want mk f = catchMaybeIO (fileMode <$> R.getFileStatus file) + >>= return . \case + Just havemode -> mk (f havemode) + Nothing -> mk True + + includemodes l havemode = havemode == combineModes (havemode:l) + excludemodes l havemode = all (\m -> intersectFileModes m havemode == nullFileMode) l + + sharedret True = Just True + sharedret False = Nothing + +{- Allows writing to an annexed file that freezeContent was called on + - before. -} +thawContent :: RawFilePath -> Annex () +thawContent file = withShared $ \sr -> thawContent' sr file + +thawContent' :: SharedRepository -> RawFilePath -> Annex () +thawContent' sr file = do + fastDebug "Annex.Perms" ("thawing content " ++ fromRawFilePath file) + thawPerms (go sr) (thawHook file) + where + go GroupShared = liftIO $ void $ tryIO $ groupWriteRead file + go AllShared = liftIO $ void $ tryIO $ groupWriteRead file + go UnShared = liftIO $ allowWrite file + go (UmaskShared n) = liftIO $ void $ tryIO $ R.setFileMode file n + +{- Runs an action that thaws a file's permissions. This will probably + - fail on a crippled filesystem. But, if file modes are supported on a + - crippled filesystem, the file may be frozen, so try to thaw its + - permissions. -} +thawPerms :: Annex () -> Annex () -> Annex () +thawPerms a hook = ifM crippledFileSystem + ( hook >> void (tryNonAsync a) + , hook >> a + ) + +{- Blocks writing to the directory an annexed file is in, to prevent the + - file accidentally being deleted. However, if core.sharedRepository + - is set, this is not done, since the group must be allowed to delete the + - file without eing able to thaw the directory. + -} +freezeContentDir :: RawFilePath -> Annex () +freezeContentDir file = do + fastDebug "Annex.Perms" ("freezing content directory " ++ fromRawFilePath dir) + unlessM crippledFileSystem $ withShared go + freezeHook dir + where + dir = parentDir file + go UnShared = liftIO $ preventWrite dir + go GroupShared = liftIO $ void $ tryIO $ groupWriteRead dir + go AllShared = liftIO $ void $ tryIO $ groupWriteRead dir + go (UmaskShared n) = liftIO $ void $ tryIO $ R.setFileMode dir $ + umaskSharedDirectory $ + -- If n includes group or other write mode, leave them set + -- to allow them to delete the file without being able to + -- thaw the directory. + removeModes [ownerWriteMode] n + +thawContentDir :: RawFilePath -> Annex () +thawContentDir file = do + fastDebug "Annex.Perms" ("thawing content directory " ++ fromRawFilePath dir) + thawPerms (withShared (liftIO . go)) (thawHook dir) + where + dir = parentDir file + go UnShared = allowWrite dir + go GroupShared = allowWrite dir + go AllShared = allowWrite dir + go (UmaskShared n) = R.setFileMode dir n + +{- Makes the directory tree to store an annexed file's content, + - with appropriate permissions on each level. -} +createContentDir :: RawFilePath -> Annex () +createContentDir dest = do + unlessM (liftIO $ R.doesPathExist dir) $ + createAnnexDirectory dir + -- might have already existed with restricted perms + thawHook dir + unlessM crippledFileSystem $ liftIO $ allowWrite dir + where + dir = parentDir dest + +{- Creates the content directory for a file if it doesn't already exist, + - or thaws it if it does, then runs an action to modify a file in the + - directory, and finally, freezes the content directory. -} +modifyContentDir :: RawFilePath -> Annex a -> Annex a +modifyContentDir f a = do + createContentDir f -- also thaws it + v <- tryNonAsync a + freezeContentDir f + either throwM return v + +{- Like modifyContentDir, but avoids creating the content directory if it + - does not already exist. In that case, the action will probably fail. -} +modifyContentDirWhenExists :: RawFilePath -> Annex a -> Annex a +modifyContentDirWhenExists f a = do + thawContentDir f + v <- tryNonAsync a + freezeContentDir f + either throwM return v + +hasFreezeHook :: Annex Bool +hasFreezeHook = isJust . annexFreezeContentCommand <$> Annex.getGitConfig + +hasThawHook :: Annex Bool +hasThawHook = isJust . annexThawContentCommand <$> Annex.getGitConfig + +freezeHook :: RawFilePath -> Annex () +freezeHook p = maybe noop go =<< annexFreezeContentCommand <$> Annex.getGitConfig + where + go basecmd = void $ liftIO $ + boolSystem "sh" [Param "-c", Param $ gencmd basecmd] + gencmd = massReplace [ ("%path", shellEscape (fromRawFilePath p)) ] + +thawHook :: RawFilePath -> Annex () +thawHook p = maybe noop go =<< annexThawContentCommand <$> Annex.getGitConfig + where + go basecmd = void $ liftIO $ + boolSystem "sh" [Param "-c", Param $ gencmd basecmd] + gencmd = massReplace [ ("%path", shellEscape (fromRawFilePath p)) ] + +{- Calculate mode to use for a directory from the mode to use for a file. + - + - This corresponds to git's handling of core.sharedRepository=0xxx + -} +umaskSharedDirectory :: FileMode -> FileMode +umaskSharedDirectory n = flip addModes n $ map snd $ filter fst + [ (isset ownerReadMode, ownerExecuteMode) + , (isset groupReadMode, groupExecuteMode) + , (isset otherReadMode, otherExecuteMode) + , (isset groupReadMode || isset groupWriteMode, setGroupIDMode) + ] + where + isset v = checkMode v n diff --git a/Annex/PidLock.hs b/Annex/PidLock.hs new file mode 100644 index 0000000000..9b2adea4e8 --- /dev/null +++ b/Annex/PidLock.hs @@ -0,0 +1,131 @@ +{- Pid locking support. + - + - Copyright 2014-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.PidLock where + +import Annex.Common +import Git +#ifndef mingw32_HOST_OS +import Git.Env +import Annex.GitOverlay +import qualified Utility.LockFile.PidLock as PidF +import qualified Utility.LockPool.PidLock as PidP +import Utility.LockPool (dropLock) +import Utility.Env +import Config +#endif + +{- When pid locking is in use, this tries to take the pid lock (unless + - the process already has it), and if successful, holds it while + - running the child process. The child process is run with an env var + - set, which prevents it from trying to take the pid lock itself. + - + - This way, any locking the parent does will not get in the way of + - the child. The child is assumed to not do any locking that conflicts + - with the parent, but if it did happen to do that, it would be noticed + - when git-annex is used without pid locking. + - + - If another process is already holding the pid lock, the child process + - is still run, but without setting the env var, so it can try to take the + - pid lock itself, and fail however is appropriate for it in that + - situation. + -} +pidLockChildProcess + :: FilePath + -> [CommandParam] + -> (CreateProcess -> CreateProcess) + -> (Maybe Handle -> Maybe Handle -> Maybe Handle -> ProcessHandle -> IO a) + -> Annex a +pidLockChildProcess cmd ps f a = do + let p = f (proc cmd (toCommand ps)) + let gonopidlock = withCreateProcess p a +#ifndef mingw32_HOST_OS + pidLockFile >>= liftIO . \case + Nothing -> gonopidlock + Just pidlock -> bracket + (setup pidlock) + cleanup + (go gonopidlock p pidlock) + where + setup pidlock = fmap fst <$> PidP.tryLock' pidlock + + cleanup (Just h) = dropLock h + cleanup Nothing = return () + + go gonopidlock _ _ Nothing = gonopidlock + go _ p pidlock (Just _h) = do + v <- PidF.pidLockEnv pidlock + baseenv <- case env p of + Nothing -> getEnvironment + Just baseenv -> pure baseenv + let p' = p { env = Just ((v, PidF.pidLockEnvValue) : baseenv) } + withCreateProcess p' a +#else + liftIO gonopidlock +#endif + +{- Wrap around actions that may run a git-annex child process via a git + - command. + - + - This is like pidLockChildProcess, but rather than running a process + - itself, it runs the action with a modified Annex state that passes the + - necessary env var when running git. + -} +runsGitAnnexChildProcessViaGit :: Annex a -> Annex a +#ifndef mingw32_HOST_OS +runsGitAnnexChildProcessViaGit a = pidLockFile >>= \case + Nothing -> a + Just pidlock -> bracket (setup pidlock) cleanup (go pidlock) + where + setup pidlock = liftIO $ fmap fst <$> PidP.tryLock' pidlock + + cleanup (Just h) = liftIO $ dropLock h + cleanup Nothing = return () + + go _ Nothing = a + go pidlock (Just _h) = do + v <- liftIO $ PidF.pidLockEnv pidlock + let addenv g = do + g' <- liftIO $ addGitEnv g v PidF.pidLockEnvValue + return (g', ()) + let rmenv oldg g + | any (\(k, _) -> k == v) (fromMaybe [] (Git.gitEnv oldg)) = g + | otherwise = + let e' = case Git.gitEnv g of + Just e -> Just (delEntry v e) + Nothing -> Nothing + in g { Git.gitEnv = e' } + withAltRepo addenv rmenv (const a) +#else +runsGitAnnexChildProcessViaGit a = a +#endif + +{- Like runsGitAnnexChildProcessViaGit, but the Annex state is not + - modified. Instead the input Repo's state is modified to set the + - necessary env var when git is run in that Repo. + -} +runsGitAnnexChildProcessViaGit' :: Git.Repo -> (Git.Repo -> Annex a) -> Annex a +#ifndef mingw32_HOST_OS +runsGitAnnexChildProcessViaGit' r a = pidLockFile >>= \case + Nothing -> a r + Just pidlock -> bracketIO (setup pidlock) cleanup (go pidlock) + where + setup pidlock = fmap fst <$> PidP.tryLock' pidlock + + cleanup (Just h) = dropLock h + cleanup Nothing = return () + + go _ Nothing = a r + go pidlock (Just _h) = do + v <- liftIO $ PidF.pidLockEnv pidlock + r' <- liftIO $ addGitEnv r v PidF.pidLockEnvValue + a r' +#else +runsGitAnnexChildProcessViaGit' r a = a r +#endif diff --git a/Annex/Proxy.hs b/Annex/Proxy.hs new file mode 100644 index 0000000000..5e42afa986 --- /dev/null +++ b/Annex/Proxy.hs @@ -0,0 +1,370 @@ +{- proxying + - + - Copyright 2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Proxy where + +import Annex.Common +import qualified Annex +import qualified Remote +import qualified Types.Remote as Remote +import qualified Remote.Git +import P2P.Proxy +import P2P.Protocol +import P2P.IO +import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection) +import Annex.Concurrent +import Annex.Tmp +import Annex.Verify +import Annex.UUID +import Logs.Proxy +import Logs.Cluster +import Logs.UUID +import Logs.Location +import Utility.Tmp.Dir +import Utility.Metered +import Git.Types +import qualified Database.Export as Export + +import Control.Concurrent.STM +import Control.Concurrent.Async +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as L +import qualified System.FilePath.ByteString as P +import qualified Data.Map as M +import qualified Data.Set as S + +proxyRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide +proxyRemoteSide clientmaxversion bypass r + | Remote.remotetype r == Remote.Git.remote = + proxyGitRemoteSide clientmaxversion bypass r + | otherwise = + proxySpecialRemoteSide clientmaxversion r + +proxyGitRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide +proxyGitRemoteSide clientmaxversion bypass r = mkRemoteSide r $ + openP2PShellConnection' r clientmaxversion bypass >>= \case + Just conn@(OpenConnection (remoterunst, remoteconn, _)) -> + return $ Just + ( remoterunst + , remoteconn + , void $ liftIO $ closeP2PShellConnection conn + ) + _ -> return Nothing + +proxySpecialRemoteSide :: ProtocolVersion -> Remote -> Annex RemoteSide +proxySpecialRemoteSide clientmaxversion r = mkRemoteSide r $ do + let protoversion = min clientmaxversion maxProtocolVersion + remoterunst <- Serving (Remote.uuid r) Nothing <$> + liftIO (newTVarIO protoversion) + ihdl <- liftIO newEmptyTMVarIO + ohdl <- liftIO newEmptyTMVarIO + iwaitv <- liftIO newEmptyTMVarIO + owaitv <- liftIO newEmptyTMVarIO + iclosedv <- liftIO newEmptyTMVarIO + oclosedv <- liftIO newEmptyTMVarIO + exportdb <- ifM (Remote.isExportSupported r) + ( Just <$> Export.openDb (Remote.uuid r) + , pure Nothing + ) + worker <- liftIO . async =<< forkState + (proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv exportdb) + let remoteconn = P2PConnection + { connRepo = Nothing + , connCheckAuth = const False + , connIhdl = P2PHandleTMVar ihdl (Just iwaitv) iclosedv + , connOhdl = P2PHandleTMVar ohdl (Just owaitv) oclosedv + , connIdent = ConnIdent (Just (Remote.name r)) + } + let closeremoteconn = do + liftIO $ atomically $ putTMVar oclosedv () + join $ liftIO (wait worker) + maybe noop Export.closeDb exportdb + return $ Just + ( remoterunst + , remoteconn + , closeremoteconn + ) + +-- Proxy for the special remote, speaking the P2P protocol. +proxySpecialRemote + :: ProtocolVersion + -> Remote + -> TMVar (Either L.ByteString Message) + -> TMVar (Either L.ByteString Message) + -> TMVar () + -> TMVar () + -> Maybe Export.ExportHandle + -> Annex () +proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go + where + go :: Annex () + go = liftIO receivemessage >>= \case + Just (CHECKPRESENT k) -> do + tryNonAsync (Remote.checkPresent r k) >>= \case + Right True -> liftIO $ sendmessage SUCCESS + Right False -> liftIO $ sendmessage FAILURE + Left err -> liftIO $ propagateerror err + go + Just (LOCKCONTENT _) -> do + -- Special remotes do not support locking content. + liftIO $ sendmessage FAILURE + go + Just (REMOVE k) -> do + tryNonAsync (Remote.removeKey r Nothing k) >>= \case + Right () -> liftIO $ sendmessage SUCCESS + Left err -> liftIO $ propagateerror err + go + Just (PUT (ProtoAssociatedFile af) k) -> do + proxyput af k + go + Just (GET offset (ProtoAssociatedFile af) k) -> do + proxyget offset af k + go + Just (BYPASS _) -> go + Just (CONNECT _) -> + -- Not supported and the protocol ends here. + liftIO $ sendmessage $ CONNECTDONE (ExitFailure 1) + Just NOTIFYCHANGE -> do + liftIO $ sendmessage $ + ERROR "NOTIFYCHANGE unsupported for a special remote" + go + Just _ -> giveup "protocol error" + Nothing -> return () + + receivemessage = liftIO (atomically recv) >>= \case + Right (Right m) -> return (Just m) + Right (Left _b) -> giveup "unexpected ByteString received from P2P MVar" + Left () -> return Nothing + where + recv = + (Right <$> takeTMVar ohdl) + `orElse` + (Left <$> readTMVar oclosedv) + + receivebytestring = atomically recv >>= \case + Right (Left b) -> return (Just b) + Right (Right _m) -> giveup "did not receive ByteString from P2P MVar" + Left () -> return Nothing + where + recv = + (Right <$> takeTMVar ohdl) + `orElse` + (Left <$> readTMVar oclosedv) + + sendmessage m = atomically $ putTMVar ihdl (Right m) + + sendbytestring b = atomically $ putTMVar ihdl (Left b) + + propagateerror err = sendmessage $ ERROR $ + "proxied special remote reports: " ++ show err + + -- Not using gitAnnexTmpObjectLocation because there might be + -- several concurrent GET and PUTs of the same key being proxied + -- from this special remote or others, and each needs to happen + -- independently. Also, this key is not getting added into the + -- local annex objects. + withproxytmpfile k a = withOtherTmp $ \othertmpdir -> + withTmpDirIn (fromRawFilePath othertmpdir) "proxy" $ \tmpdir -> + a (toRawFilePath tmpdir P. keyFile k) + + -- Verify the content received from the client, to avoid bad content + -- being stored in the special remote. + proxyput af k = do + liftIO $ sendmessage $ PUT_FROM (Offset 0) + withproxytmpfile k $ \tmpfile -> do + let store = tryNonAsync (storeput k af (decodeBS tmpfile)) >>= \case + Right () -> liftIO $ sendmessage SUCCESS + Left err -> liftIO $ propagateerror err + liftIO receivemessage >>= \case + Just (DATA (Len len)) -> do + iv <- startVerifyKeyContentIncrementally Remote.AlwaysVerify k + h <- liftIO $ openFile (fromRawFilePath tmpfile) WriteMode + gotall <- liftIO $ receivetofile iv h len + liftIO $ hClose h + verified <- if gotall + then fst <$> finishVerifyKeyContentIncrementally' True iv + else pure False + if protoversion > ProtocolVersion 1 + then liftIO receivemessage >>= \case + Just (VALIDITY Valid) + | verified -> store + | otherwise -> liftIO $ sendmessage FAILURE + Just (VALIDITY Invalid) -> + liftIO $ sendmessage FAILURE + _ -> giveup "protocol error" + else store + _ -> giveup "protocol error" + liftIO $ removeWhenExistsWith removeFile (fromRawFilePath tmpfile) + + storeput k af tmpfile = case mexportdb of + Just exportdb -> liftIO (Export.getExportTree exportdb k) >>= \case + [] -> storeputkey k af tmpfile + locs -> do + havelocs <- liftIO $ S.fromList + <$> Export.getExportedLocation exportdb k + let locs' = filter (`S.notMember` havelocs) locs + forM_ locs' $ \loc -> + storeputexport exportdb k loc tmpfile + liftIO $ Export.flushDbQueue exportdb + Nothing -> storeputkey k af tmpfile + + storeputkey k af tmpfile = + Remote.storeKey r k af (Just tmpfile) nullMeterUpdate + + storeputexport exportdb k loc tmpfile = do + Remote.storeExport (Remote.exportActions r) tmpfile k loc nullMeterUpdate + liftIO $ Export.addExportedLocation exportdb k loc + + receivetofile iv h n = liftIO receivebytestring >>= \case + Just b -> do + liftIO $ atomically $ + putTMVar owaitv () + `orElse` + readTMVar oclosedv + n' <- storetofile iv h n (L.toChunks b) + -- Normally all the data is sent in a single + -- lazy bytestring. However, when the special + -- remote is a node in a cluster, a PUT is + -- streamed to it in multiple chunks. + if n' == 0 + then return True + else receivetofile iv h n' + Nothing -> return False + + storetofile _ _ n [] = pure n + storetofile iv h n (b:bs) = do + writeVerifyChunk iv h b + storetofile iv h (n - fromIntegral (B.length b)) bs + + proxyget offset af k = withproxytmpfile k $ \tmpfile -> do + -- Don't verify the content from the remote, + -- because the client will do its own verification. + let vc = Remote.NoVerify + tryNonAsync (Remote.retrieveKeyFile r k af (fromRawFilePath tmpfile) nullMeterUpdate vc) >>= \case + Right _ -> liftIO $ senddata offset tmpfile + Left err -> liftIO $ propagateerror err + + senddata (Offset offset) f = do + size <- fromIntegral <$> getFileSize f + let n = max 0 (size - offset) + sendmessage $ DATA (Len n) + withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do + hSeek h AbsoluteSeek offset + sendbs =<< L.hGetContents h + -- Important to keep the handle open until + -- the client responds. The bytestring + -- could still be lazily streaming out to + -- the client. + waitclientresponse + where + sendbs bs = do + sendbytestring bs + when (protoversion > ProtocolVersion 0) $ + sendmessage (VALIDITY Valid) + + waitclientresponse = + receivemessage >>= \case + Just SUCCESS -> return () + Just FAILURE -> return () + Just _ -> giveup "protocol error" + Nothing -> return () + +{- Check if this repository can proxy for a specified remote uuid, + - and if so enable proxying for it. -} +checkCanProxy :: UUID -> UUID -> Annex Bool +checkCanProxy remoteuuid myuuid = do + myproxies <- M.lookup myuuid <$> getProxies + checkCanProxy' myproxies remoteuuid >>= \case + Right v -> do + Annex.changeState $ \st -> st { Annex.proxyremote = Just v } + return True + Left Nothing -> return False + Left (Just err) -> giveup err + +checkCanProxy' :: Maybe (S.Set Proxy) -> UUID -> Annex (Either (Maybe String) (Either ClusterUUID Remote)) +checkCanProxy' Nothing _ = return (Left Nothing) +checkCanProxy' (Just proxies) remoteuuid = + case filter (\p -> proxyRemoteUUID p == remoteuuid) (S.toList proxies) of + [] -> notconfigured + ps -> case mkClusterUUID remoteuuid of + Just cu -> proxyforcluster cu + Nothing -> proxyfor ps + where + proxyfor ps = do + rs <- concat . Remote.byCost <$> Remote.remoteList + myclusters <- annexClusters <$> Annex.getGitConfig + case canProxyForRemote rs ps myclusters remoteuuid of + Nothing -> notconfigured + Just r -> return (Right (Right r)) + + proxyforcluster cu = do + clusters <- getClusters + if M.member cu (clusterUUIDs clusters) + then return (Right (Left cu)) + else notconfigured + + notconfigured = M.lookup remoteuuid <$> uuidDescMap >>= \case + Just desc -> return $ Left $ Just $ + "not configured to proxy for repository " ++ fromUUIDDesc desc + Nothing -> return $ Left Nothing + +{- Remotes that this repository is configured to proxy for. + - + - When there are multiple remotes that access the same repository, + - this picks the lowest cost one that is configured to be used as a proxy. + -} +proxyForRemotes :: Annex [Remote] +proxyForRemotes = do + myuuid <- getUUID + (M.lookup myuuid <$> getProxies) >>= \case + Nothing -> return [] + Just myproxies -> do + let myproxies' = S.toList myproxies + rs <- concat . Remote.byCost <$> Remote.remoteList + myclusters <- annexClusters <$> Annex.getGitConfig + return $ mapMaybe (canProxyForRemote rs myproxies' myclusters . Remote.uuid) rs + +-- Only proxy for a remote when the git configuration allows it. +-- This is important to prevent changes to the git-annex branch +-- causing unexpected proxying for remotes. +canProxyForRemote + :: [Remote] -- ^ must be sorted by cost + -> [Proxy] + -> M.Map RemoteName ClusterUUID + -> UUID + -> (Maybe Remote) +canProxyForRemote rs myproxies myclusters remoteuuid = + headMaybe $ filter canproxy rs + where + canproxy r = + sameuuid r && + proxyisconfigured r && + any (isproxyfor r) myproxies + + sameuuid r = Remote.uuid r == remoteuuid + + isproxyfor r p = + proxyRemoteUUID p == remoteuuid && + Remote.name r == proxyRemoteName p + + proxyisconfigured r + | remoteAnnexProxy (Remote.gitconfig r) = True + -- Proxy for remotes that are configured as cluster nodes. + | any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True + -- Proxy for a remote when it is proxied by another remote + -- which is itself configured as a cluster gateway. + | otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of + Just proxyuuid -> not $ null $ + concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $ + filter (\p -> Remote.uuid p == proxyuuid) rs + Nothing -> False + +mkProxyMethods :: ProxyMethods +mkProxyMethods = ProxyMethods + { removedContent = \u k -> logChange k u InfoMissing + , addedContent = \u k -> logChange k u InfoPresent + } diff --git a/Annex/Queue.hs b/Annex/Queue.hs new file mode 100644 index 0000000000..b2b28bccb5 --- /dev/null +++ b/Annex/Queue.hs @@ -0,0 +1,97 @@ +{- git-annex command queue + - + - Copyright 2011-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns #-} + +module Annex.Queue ( + addCommand, + addFlushAction, + addUpdateIndex, + flush, + flushWhenFull, + size, + get, + mergeFrom, +) where + +import Annex.Common +import Annex hiding (new) +import Annex.LockFile +import qualified Git.Queue +import qualified Git.UpdateIndex + +{- Adds a git command to the queue. -} +addCommand :: [CommandParam] -> String -> [CommandParam] -> [FilePath] -> Annex () +addCommand commonparams command params files = do + q <- get + store =<< flushWhenFull =<< + (Git.Queue.addCommand commonparams command params files q =<< gitRepo) + +addFlushAction :: Git.Queue.FlushActionRunner Annex -> [RawFilePath] -> Annex () +addFlushAction runner files = do + q <- get + store =<< flushWhenFull =<< + (Git.Queue.addFlushAction runner files q =<< gitRepo) + +{- Adds an update-index stream to the queue. -} +addUpdateIndex :: Git.UpdateIndex.Streamer -> Annex () +addUpdateIndex streamer = do + q <- get + store =<< flushWhenFull =<< + (Git.Queue.addUpdateIndex streamer q =<< gitRepo) + +{- Runs the queue if it is full. -} +flushWhenFull :: Git.Queue.Queue Annex -> Annex (Git.Queue.Queue Annex) +flushWhenFull q + | Git.Queue.full q = flush' q + | otherwise = return q + +{- Runs (and empties) the queue. -} +flush :: Annex () +flush = do + q <- get + unless (0 == Git.Queue.size q) $ do + store =<< flush' q + +{- When there are multiple worker threads, each has its own queue. + - And of course multiple git-annex processes may be running each with its + - own queue. + - + - But, flushing two queues at the same time could lead to failures due to + - git locking files. So, only one queue is allowed to flush at a time. + -} +flush' :: Git.Queue.Queue Annex -> Annex (Git.Queue.Queue Annex) +flush' q = do + lck <- fromRepo gitAnnexGitQueueLock + withExclusiveLock lck $ do + showStoringStateAction + Git.Queue.flush q =<< gitRepo + +{- Gets the size of the queue. -} +size :: Annex Int +size = Git.Queue.size <$> get + +get :: Annex (Git.Queue.Queue Annex) +get = maybe new return =<< getState repoqueue + +new :: Annex (Git.Queue.Queue Annex) +new = do + sz <- annexQueueSize <$> getGitConfig + q <- liftIO $ Git.Queue.new sz Nothing + store q + return q + +store :: Git.Queue.Queue Annex -> Annex () +store q = changeState $ \s -> s { repoqueue = Just q } + +mergeFrom :: AnnexState -> Annex () +mergeFrom st = case repoqueue st of + Nothing -> noop + Just newq -> do + q <- get + let !q' = Git.Queue.merge q newq + store =<< flushWhenFull q' diff --git a/Annex/RemoteTrackingBranch.hs b/Annex/RemoteTrackingBranch.hs new file mode 100644 index 0000000000..f05b1512be --- /dev/null +++ b/Annex/RemoteTrackingBranch.hs @@ -0,0 +1,96 @@ +{- git-annex remote tracking branches + - + - Copyright 2019 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.RemoteTrackingBranch + ( RemoteTrackingBranch + , mkRemoteTrackingBranch + , fromRemoteTrackingBranch + , setRemoteTrackingBranch + , makeRemoteTrackingBranchMergeCommit + , makeRemoteTrackingBranchMergeCommit' + , getRemoteTrackingBranchImportHistory + ) where + +import Annex.Common +import Annex.CatFile +import qualified Annex +import Git.Types +import qualified Git.Ref +import qualified Git.Branch +import Git.History +import qualified Types.Remote as Remote + +import qualified Data.Set as S + +newtype RemoteTrackingBranch = RemoteTrackingBranch + { fromRemoteTrackingBranch :: Ref } + deriving (Show, Eq) + +{- Makes a remote tracking branch corresponding to a local branch. + - Note that the local branch does not need to exist yet. -} +mkRemoteTrackingBranch :: Remote -> Branch -> RemoteTrackingBranch +mkRemoteTrackingBranch remote ref = RemoteTrackingBranch $ + Git.Ref.underBase ("refs/remotes/" ++ Remote.name remote) ref + +{- Set remote tracking branch to point to a commit. -} +setRemoteTrackingBranch :: RemoteTrackingBranch -> Sha -> Annex () +setRemoteTrackingBranch tb commit = + inRepo $ Git.Branch.update' (fromRemoteTrackingBranch tb) commit + +{- Makes a merge commit that preserves the import history of the + - RemoteTrackingBranch, while grafting new git history into it. + - + - The second parent of the merge commit is the past history of the + - RemoteTrackingBranch as imported from a remote. When importing a + - history of trees from a remote, commits can be synthesized from + - them, but such commits won't have the same sha due to eg date differing. + - But since we know that the second parent consists entirely of such + - import commits, they can be reused when updating the + - RemoteTrackingBranch. + -} +makeRemoteTrackingBranchMergeCommit :: RemoteTrackingBranch -> Sha -> Annex Sha +makeRemoteTrackingBranchMergeCommit tb commitsha = + -- Check if the tracking branch exists. + inRepo (Git.Ref.sha (fromRemoteTrackingBranch tb)) >>= \case + Nothing -> return commitsha + Just _ -> inRepo (getHistoryToDepth 1 (fromRemoteTrackingBranch tb)) >>= \case + Nothing -> return commitsha + Just (History hc _) -> case historyCommitParents hc of + [_, importhistory] -> do + treesha <- maybe + (giveup $ "Unable to cat commit " ++ fromRef commitsha) + commitTree + <$> catCommit commitsha + makeRemoteTrackingBranchMergeCommit' commitsha importhistory treesha + -- Earlier versions of git-annex did not + -- make the merge commit, or perhaps + -- something else changed where the + -- tracking branch pointed. + _ -> return commitsha + +makeRemoteTrackingBranchMergeCommit' :: Sha -> Sha -> Sha -> Annex Sha +makeRemoteTrackingBranchMergeCommit' commitsha importedhistory treesha = do + cmode <- annexCommitMode <$> Annex.getGitConfig + inRepo $ Git.Branch.commitTree + cmode + ["remote tracking branch"] + [commitsha, importedhistory] + treesha + +{- When makeRemoteTrackingBranchMergeCommit was used, this finds the + - import history, starting from the second parent of the merge commit. + -} +getRemoteTrackingBranchImportHistory :: History HistoryCommit -> Maybe (History HistoryCommit) +getRemoteTrackingBranchImportHistory (History hc s) = + case historyCommitParents hc of + [_, importhistory] -> go importhistory (S.toList s) + _ -> Nothing + where + go _ [] = Nothing + go i (h@(History hc' _):hs) + | historyCommit hc' == i = Just h + | otherwise = go i hs diff --git a/Annex/ReplaceFile.hs b/Annex/ReplaceFile.hs new file mode 100644 index 0000000000..21735eba14 --- /dev/null +++ b/Annex/ReplaceFile.hs @@ -0,0 +1,87 @@ +{- git-annex file replacing + - + - Copyright 2013-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.ReplaceFile ( + replaceGitAnnexDirFile, + replaceGitDirFile, + replaceWorkTreeFile, + replaceFile, + replaceFile', +) where + +import Annex.Common +import Annex.Tmp +import Annex.Perms +import Git +import Utility.Tmp.Dir +import Utility.Directory.Create +#ifndef mingw32_HOST_OS +import Utility.Path.Max +#endif + +{- replaceFile on a file located inside the gitAnnexDir. -} +replaceGitAnnexDirFile :: FilePath -> (RawFilePath -> Annex a) -> Annex a +replaceGitAnnexDirFile = replaceFile createAnnexDirectory + +{- replaceFile on a file located inside the .git directory. -} +replaceGitDirFile :: FilePath -> (RawFilePath -> Annex a) -> Annex a +replaceGitDirFile = replaceFile $ \dir -> do + top <- fromRepo localGitDir + liftIO $ createDirectoryUnder [top] dir + +{- replaceFile on a worktree file. -} +replaceWorkTreeFile :: FilePath -> (RawFilePath -> Annex a) -> Annex a +replaceWorkTreeFile = replaceFile createWorkTreeDirectory + +{- Replaces a possibly already existing file with a new version, + - atomically, by running an action. + - + - The action is passed the name of temp file, in a temp directory, + - which it can write to, and once done the temp file is moved into place + - and anything else in the temp directory is deleted. + - + - The action can throw an exception, in which case the temp directory + - will be deleted, and the existing file will be preserved. + - + - Throws an IO exception when it was unable to replace the file. + - + - The createdirectory action is only run when moving the file into place + - fails, and can create any parent directory structure needed. + -} +replaceFile :: (RawFilePath -> Annex ()) -> FilePath -> (RawFilePath -> Annex a) -> Annex a +replaceFile createdirectory file action = replaceFile' createdirectory file (const True) action + +replaceFile' :: (RawFilePath -> Annex ()) -> FilePath -> (a -> Bool) -> (RawFilePath -> Annex a) -> Annex a +replaceFile' createdirectory file checkres action = withOtherTmp $ \othertmpdir -> do + let othertmpdir' = fromRawFilePath othertmpdir +#ifndef mingw32_HOST_OS + -- Use part of the filename as the template for the temp + -- directory. This does not need to be unique, but it + -- makes it more clear what this temp directory is for. + filemax <- liftIO $ fileNameLengthLimit othertmpdir' + let basetmp = take (filemax `div` 2) (takeFileName file) +#else + -- Windows has limits on the whole path length, so keep + -- it short. + let basetmp = "t" +#endif + withTmpDirIn othertmpdir' basetmp $ \tmpdir -> do + let tmpfile = toRawFilePath (tmpdir basetmp) + r <- action tmpfile + when (checkres r) $ + replaceFileFrom tmpfile (toRawFilePath file) createdirectory + return r + +replaceFileFrom :: RawFilePath -> RawFilePath -> (RawFilePath -> Annex ()) -> Annex () +replaceFileFrom src dest createdirectory = go `catchIO` fallback + where + go = liftIO $ moveFile src dest + fallback _ = do + createdirectory (parentDir dest) + go diff --git a/Annex/SafeDropProof.hs b/Annex/SafeDropProof.hs new file mode 100644 index 0000000000..eca3fa6f51 --- /dev/null +++ b/Annex/SafeDropProof.hs @@ -0,0 +1,34 @@ +{- git-annex safe drop proof + - + - Copyright 2014-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.SafeDropProof ( + SafeDropProof, + safeDropProofEndTime, + safeDropProofExpired, + checkSafeDropProofEndTime, +) where + +import Annex.Common +import Types.NumCopies + +import Data.Time.Clock.POSIX + +safeDropProofExpired :: Annex () +safeDropProofExpired = do + showNote "unsafe" + showLongNote $ UnquotedString + "Dropping took too long, and locks may have expired." + +checkSafeDropProofEndTime :: Maybe SafeDropProof -> IO Bool +checkSafeDropProofEndTime p = case safeDropProofEndTime =<< p of + Nothing -> return True + Just endtime -> do + now <- getPOSIXTime + return (endtime > now) + diff --git a/Annex/SpecialRemote.hs b/Annex/SpecialRemote.hs new file mode 100644 index 0000000000..2b23b06b5d --- /dev/null +++ b/Annex/SpecialRemote.hs @@ -0,0 +1,135 @@ +{- git-annex special remote configuration + - + - Copyright 2011-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.SpecialRemote ( + module Annex.SpecialRemote, + module Annex.SpecialRemote.Config +) where + +import Annex.Common +import Annex.SpecialRemote.Config +import Types.Remote (RemoteConfig, SetupStage(..), typename, setup) +import Types.GitConfig +import Types.ProposedAccepted +import Config +import Remote.List +import Logs.Remote +import Logs.Trust +import qualified Types.Remote as Remote +import Git.Types (RemoteName) +import Utility.SafeOutput + +import qualified Data.Map as M + +{- See if there's an existing special remote with this name. + - + - Remotes that are not dead come first in the list + - when a name appears multiple times. -} +findExisting :: RemoteName -> Annex [(UUID, RemoteConfig, Maybe (ConfigFrom UUID))] +findExisting name = do + (a, b) <- findExisting' name + return (a++b) + +{- Dead remotes with the name are in the second list, all others in the + - first list. -} +findExisting' :: RemoteName -> Annex ([(UUID, RemoteConfig, Maybe (ConfigFrom UUID))], [(UUID, RemoteConfig, Maybe (ConfigFrom UUID))]) +findExisting' name = do + t <- trustMap + partition (\(u, _, _) -> M.lookup u t /= Just DeadTrusted) + . findByRemoteConfig (\c -> lookupName c == Just name) + <$> Logs.Remote.remoteConfigMap + +newConfig + :: RemoteName + -> Maybe (Sameas UUID) + -> RemoteConfig + -- ^ configuration provided by the user + -> M.Map UUID RemoteConfig + -- ^ configuration of other special remotes, to inherit from + -- when sameas is used + -> RemoteConfig +newConfig name sameas fromuser m = case sameas of + Nothing -> M.insert nameField (Proposed name) fromuser + Just (Sameas u) -> addSameasInherited m $ M.fromList + [ (sameasNameField, Proposed name) + , (sameasUUIDField, Proposed (fromUUID u)) + ] `M.union` fromuser + +specialRemoteMap :: Annex (M.Map UUID RemoteName) +specialRemoteMap = do + m <- Logs.Remote.remoteConfigMap + return $ specialRemoteNameMap m + +specialRemoteNameMap :: M.Map UUID RemoteConfig -> M.Map UUID RemoteName +specialRemoteNameMap = M.fromList . mapMaybe go . M.toList + where + go (u, c) = case lookupName c of + Nothing -> Nothing + Just n -> Just (u, n) + +{- find the remote type -} +findType :: RemoteConfig -> Either String RemoteType +findType config = maybe unspecified (specified . fromProposedAccepted) $ + M.lookup typeField config + where + unspecified = Left "Specify the type of remote with type=" + specified s = case filter (findtype s) remoteTypes of + [] -> Left $ "Unknown remote type " ++ s + ++ " (pick from: " + ++ intercalate " " (map typename remoteTypes) + ++ ")" + (t:_) -> Right t + findtype s i = typename i == s + +autoEnable :: Annex () +autoEnable = do + m <- autoEnableable + enabled <- getenabledremotes + forM_ (M.toList m) $ \(cu, c) -> unless (cu `M.member` enabled) $ do + let u = case findSameasUUID c of + Just (Sameas u') -> u' + Nothing -> cu + case (lookupName c, findType c) of + -- Avoid auto-enabling when the name contains a + -- control character, because git does not avoid + -- displaying control characters in the name of a + -- remote, and an attacker could leverage + -- autoenabling it as part of an attack. + (Just name, Right t) | safeOutput name == name -> do + showSideAction $ UnquotedString $ "Auto enabling special remote " ++ name + dummycfg <- liftIO dummyRemoteGitConfig + tryNonAsync (setup t (AutoEnable c) (Just u) Nothing c dummycfg) >>= \case + Left e -> warning (UnquotedString (show e)) + Right (_c, _u) -> + when (cu /= u) $ + setConfig (remoteAnnexConfig c "config-uuid") (fromUUID cu) + _ -> return () + where + getenabledremotes = M.fromList + . map (\r -> (getcu r, r)) + <$> remoteList + getcu r = fromMaybe + (Remote.uuid r) + (remoteAnnexConfigUUID (Remote.gitconfig r)) + +autoEnableable :: Annex (M.Map UUID RemoteConfig) +autoEnableable = do + tm <- trustMap + (M.filterWithKey (notdead tm) . M.filter configured) + <$> remoteConfigMap + where + configured c = fromMaybe False $ + trueFalseParser' . fromProposedAccepted + =<< M.lookup autoEnableField c + notdead tm cu c = + let u = case findSameasUUID c of + Just (Sameas u') -> u' + Nothing -> cu + in lookupTrust' u tm /= DeadTrusted + diff --git a/Annex/SpecialRemote/Config.hs b/Annex/SpecialRemote/Config.hs new file mode 100644 index 0000000000..059a62f901 --- /dev/null +++ b/Annex/SpecialRemote/Config.hs @@ -0,0 +1,321 @@ +{- git-annex special remote configuration + - + - Copyright 2019-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Annex.SpecialRemote.Config where + +import Common +import Types.Remote (configParser) +import Types +import Types.UUID +import Types.ProposedAccepted +import Types.RemoteConfig +import Types.GitConfig +import Config.Cost + +import qualified Data.Map as M +import qualified Data.Set as S +import Text.Read +import Data.Typeable +import GHC.Stack + +newtype Sameas t = Sameas t + deriving (Show) + +newtype ConfigFrom t = ConfigFrom t + deriving (Show) + +{- The name of a configured remote is stored in its config using this key. -} +nameField :: RemoteConfigField +nameField = Accepted "name" + +{- The name of a sameas remote is stored using this key instead. + - This prevents old versions of git-annex getting confused. -} +sameasNameField :: RemoteConfigField +sameasNameField = Accepted "sameas-name" + +lookupName :: RemoteConfig -> Maybe String +lookupName c = fmap fromProposedAccepted $ + M.lookup nameField c <|> M.lookup sameasNameField c + +instance RemoteNameable RemoteConfig where + getRemoteName c = fromMaybe "" (lookupName c) + +{- The uuid that a sameas remote is the same as is stored in this key. -} +sameasUUIDField :: RemoteConfigField +sameasUUIDField = Accepted "sameas-uuid" + +{- The type of a remote is stored in its config using this key. -} +typeField :: RemoteConfigField +typeField = Accepted "type" + +autoEnableField :: RemoteConfigField +autoEnableField = Accepted "autoenable" + +costField :: RemoteConfigField +costField = Accepted "cost" + +encryptionField :: RemoteConfigField +encryptionField = Accepted "encryption" + +macField :: RemoteConfigField +macField = Accepted "mac" + +cipherField :: RemoteConfigField +cipherField = Accepted "cipher" + +cipherkeysField :: RemoteConfigField +cipherkeysField = Accepted "cipherkeys" + +pubkeysField :: RemoteConfigField +pubkeysField = Accepted "pubkeys" + +chunkField :: RemoteConfigField +chunkField = Accepted "chunk" + +chunksizeField :: RemoteConfigField +chunksizeField = Accepted "chunksize" + +embedCredsField :: RemoteConfigField +embedCredsField = Accepted "embedcreds" + +preferreddirField :: RemoteConfigField +preferreddirField = Accepted "preferreddir" + +exportTreeField :: RemoteConfigField +exportTreeField = Accepted "exporttree" + +importTreeField :: RemoteConfigField +importTreeField = Accepted "importtree" + +versioningField :: RemoteConfigField +versioningField = Accepted "versioning" + +exportTree :: ParsedRemoteConfig -> Bool +exportTree = fromMaybe False . getRemoteConfigValue exportTreeField + +importTree :: ParsedRemoteConfig -> Bool +importTree = fromMaybe False . getRemoteConfigValue importTreeField + +isVersioning :: ParsedRemoteConfig -> Bool +isVersioning = fromMaybe False . getRemoteConfigValue versioningField + +annexObjectsField :: RemoteConfigField +annexObjectsField = Accepted "annexobjects" + +annexObjects :: ParsedRemoteConfig -> Bool +annexObjects = fromMaybe False . getRemoteConfigValue annexObjectsField + +{- Parsers for fields that are common to all special remotes. -} +commonFieldParsers :: [RemoteConfigFieldParser] +commonFieldParsers = + [ optionalStringParser nameField + (FieldDesc "name for the special remote") + , optionalStringParser sameasNameField HiddenField + , optionalStringParser sameasUUIDField HiddenField + , autoEnableFieldParser + , costParser costField + (FieldDesc "default cost of this special remote") + , optionalStringParser preferreddirField + (FieldDesc "directory whose content is preferred") + ] ++ essentialFieldParsers + +{- Parsers for fields that are common to all special remotes, and are + - also essential to include in eg, annex:: urls. -} +essentialFieldParsers :: [RemoteConfigFieldParser] +essentialFieldParsers = + [ optionalStringParser typeField + (FieldDesc "type of special remote") + , yesNoParser exportTreeField (Just False) + (FieldDesc "export trees of files to this remote") + , yesNoParser importTreeField (Just False) + (FieldDesc "import trees of files from this remote") + , yesNoParser annexObjectsField (Just False) + (FieldDesc "store other objects in remote along with exported trees") + ] + +autoEnableFieldParser :: RemoteConfigFieldParser +autoEnableFieldParser = trueFalseParser autoEnableField (Just False) + (FieldDesc "automatically enable special remote") + +{- A remote with sameas-uuid set will inherit these values from the config + - of that uuid. These values cannot be overridden in the remote's config. -} +sameasInherits :: S.Set RemoteConfigField +sameasInherits = S.fromList + -- encryption configuration is necessarily the same for two + -- remotes that access the same data store + [ encryptionField + , macField + , cipherField + , cipherkeysField + , pubkeysField + -- legacy chunking was either enabled or not, so has to be the same + -- across configs for remotes that access the same data + , chunksizeField + -- (new-style chunking does not have that limitation) + -- but there is no benefit to picking a different chunk size + -- for the sameas remote, since it's reading whatever chunks were + -- stored + , chunkField + ] + +{- Each RemoteConfig that has a sameas-uuid inherits some fields + - from it. Such fields can only be set by inheritance; the RemoteConfig + - cannot provide values from them. -} +addSameasInherited :: M.Map UUID RemoteConfig -> RemoteConfig -> RemoteConfig +addSameasInherited m c = case findSameasUUID c of + Nothing -> c + Just (Sameas sameasuuid) -> case M.lookup sameasuuid m of + Nothing -> c + Just parentc -> + M.withoutKeys c sameasInherits + `M.union` + M.restrictKeys parentc sameasInherits + +findSameasUUID :: RemoteConfig -> Maybe (Sameas UUID) +findSameasUUID c = Sameas . toUUID . fromProposedAccepted + <$> M.lookup sameasUUIDField c + +{- Remove any fields inherited from a sameas-uuid. When storing a + - RemoteConfig, those fields don't get stored, since they were already + - inherited. -} +removeSameasInherited :: RemoteConfig -> RemoteConfig +removeSameasInherited c = case M.lookup sameasUUIDField c of + Nothing -> c + Just _ -> M.withoutKeys c sameasInherits + +{- Finds remote uuids with matching RemoteConfig. -} +findByRemoteConfig :: (RemoteConfig -> Bool) -> M.Map UUID RemoteConfig -> [(UUID, RemoteConfig, Maybe (ConfigFrom UUID))] +findByRemoteConfig matching = map sameasuuid . filter (matching . snd) . M.toList + where + sameasuuid (u, c) = case M.lookup sameasUUIDField c of + Nothing -> (u, c, Nothing) + Just u' -> (toUUID (fromProposedAccepted u'), c, Just (ConfigFrom u)) + +{- Extracts a value from ParsedRemoteConfig. -} +getRemoteConfigValue :: HasCallStack => Typeable v => RemoteConfigField -> ParsedRemoteConfig -> Maybe v +getRemoteConfigValue f (ParsedRemoteConfig m _) = case M.lookup f m of + Just (RemoteConfigValue v) -> case cast v of + Just v' -> Just v' + Nothing -> error $ unwords + [ "getRemoteConfigValue" + , fromProposedAccepted f + , "found value of unexpected type" + , show (typeOf v) ++ "." + , "This is a bug in git-annex!" + ] + Nothing -> Nothing + +{- Gets all fields that remoteConfigRestPassthrough matched. -} +getRemoteConfigPassedThrough :: ParsedRemoteConfig -> M.Map RemoteConfigField String +getRemoteConfigPassedThrough (ParsedRemoteConfig m _) = + flip M.mapMaybe m $ \(RemoteConfigValue v) -> + case cast v of + Just (PassedThrough s) -> Just s + Nothing -> Nothing + +newtype PassedThrough = PassedThrough String + +parsedRemoteConfig :: RemoteType -> RemoteConfig -> Annex ParsedRemoteConfig +parsedRemoteConfig t c = either (const emptycfg) id . parseRemoteConfig c + <$> configParser t c + where + emptycfg = ParsedRemoteConfig mempty c + +parseRemoteConfig :: RemoteConfig -> RemoteConfigParser -> Either String ParsedRemoteConfig +parseRemoteConfig c rpc = + go [] c (remoteConfigFieldParsers rpc ++ commonFieldParsers) + where + go l c' [] = + let (passover, leftovers) = partition + (maybe (const False) fst (remoteConfigRestPassthrough rpc) . fst) + (M.toList c') + leftovers' = filter (notaccepted . fst) leftovers + in if not (null leftovers') + then Left $ "Unexpected parameters: " ++ + unwords (map (fromProposedAccepted . fst) leftovers') + else + let m = M.fromList $ + l ++ map (uncurry passthrough) passover + in Right (ParsedRemoteConfig m c) + go l c' (p:rest) = do + let f = parserForField p + (valueParser p) (M.lookup f c) c >>= \case + Just v -> go ((f,v):l) (M.delete f c') rest + Nothing -> go l (M.delete f c') rest + + passthrough f v = (f, RemoteConfigValue (PassedThrough (fromProposedAccepted v))) + + notaccepted (Proposed _) = True + notaccepted (Accepted _) = False + +optionalStringParser :: RemoteConfigField -> FieldDesc -> RemoteConfigFieldParser +optionalStringParser f fielddesc = RemoteConfigFieldParser + { parserForField = f + , valueParser = p + , fieldDesc = fielddesc + , valueDesc = Nothing + } + where + p (Just v) _c = Right (Just (RemoteConfigValue (fromProposedAccepted v))) + p Nothing _c = Right Nothing + +yesNoParser :: RemoteConfigField -> Maybe Bool -> FieldDesc -> RemoteConfigFieldParser +yesNoParser f mdef fd = genParser yesno f mdef fd + (Just (ValueDesc "yes or no")) + where + yesno "yes" = Just True + yesno "no" = Just False + yesno _ = Nothing + +trueFalseParser :: RemoteConfigField -> Maybe Bool -> FieldDesc -> RemoteConfigFieldParser +trueFalseParser f mdef fd = genParser trueFalseParser' f mdef fd + (Just (ValueDesc "true or false")) + +-- Not using Git.Config.isTrueFalse because git supports +-- a lot of other values for true and false in its configs, +-- and this is not a git config and we want to avoid that mess. +trueFalseParser' :: String -> Maybe Bool +trueFalseParser' "true" = Just True +trueFalseParser' "false" = Just False +trueFalseParser' _ = Nothing + +costParser :: RemoteConfigField -> FieldDesc -> RemoteConfigFieldParser +costParser f fd = genParser readcost f Nothing fd + (Just (ValueDesc "a number")) + where + readcost :: String -> Maybe Cost + readcost = readMaybe + +genParser + :: Typeable t + => (String -> Maybe t) + -> RemoteConfigField + -> Maybe t -- ^ default if not configured + -> FieldDesc + -> Maybe ValueDesc + -> RemoteConfigFieldParser +genParser parse f mdef fielddesc valuedesc = RemoteConfigFieldParser + { parserForField = f + , valueParser = p + , fieldDesc = fielddesc + , valueDesc = valuedesc + } + where + p Nothing _c = Right (fmap RemoteConfigValue mdef) + p (Just v) _c = case parse (fromProposedAccepted v) of + Just b -> Right (Just (RemoteConfigValue b)) + Nothing -> case v of + Accepted _ -> Right (fmap RemoteConfigValue mdef) + Proposed _ -> Left $ + "Bad value for " ++ fromProposedAccepted f ++ + case valuedesc of + Just (ValueDesc vd) -> + " (expected " ++ vd ++ ")" + Nothing -> "" diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs new file mode 100644 index 0000000000..90d462f7be --- /dev/null +++ b/Annex/Ssh.hs @@ -0,0 +1,480 @@ +{- git-annex ssh interface, with connection caching + - + - Copyright 2012-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Ssh ( + ConsumeStdin(..), + SshCommand, + sshCommand, + sshOptions, + sshCacheDir, + sshReadPort, + forceSshCleanup, + sshOptionsEnv, + sshOptionsTo, + inRepoWithSshOptionsTo, + runSshOptions, + sshAskPassEnv, + runSshAskPass +) where + +import Annex.Common +import Annex.LockFile +import qualified BuildInfo +import qualified Annex +import qualified Git +import qualified Git.Url +import Config +import Annex.Path +import Utility.Env +import Utility.Hash +import Types.CleanupActions +import Annex.Concurrent.Utility +import Types.Concurrency +import Git.Env +import Git.Ssh +import qualified Utility.RawFilePath as R +import Annex.Perms +#ifndef mingw32_HOST_OS +import Annex.LockPool +#endif + +import Control.Concurrent.STM +import qualified Data.ByteString as S +import qualified System.FilePath.ByteString as P + +{- Some ssh commands are fed stdin on a pipe and so should be allowed to + - consume it. But ssh commands that are not piped stdin should generally + - not be allowed to consume the process's stdin. -} +data ConsumeStdin = ConsumeStdin | NoConsumeStdin + +{- Generates a command to ssh to a given host (or user@host) on a given + - port. This includes connection caching parameters, and any ssh-options. + - If GIT_SSH or GIT_SSH_COMMAND is enabled, they are used instead. -} +sshCommand :: ConsumeStdin -> (SshHost, Maybe SshPort) -> RemoteGitConfig -> SshCommand -> Annex (FilePath, [CommandParam]) +sshCommand cs (host, port) gc remotecmd = ifM (liftIO safe_GIT_SSH) + ( maybe go return + =<< liftIO (gitSsh' host port remotecmd (consumeStdinParams cs)) + , go + ) + where + go = do + ps <- sshOptions cs (host, port) gc [] + return ("ssh", Param (fromSshHost host):ps++[Param remotecmd]) + +{- Generates parameters to ssh to a given host (or user@host) on a given + - port. This includes connection caching parameters, and any + - ssh-options. Note that the host to ssh to and the command to run + - are not included in the returned options. -} +sshOptions :: ConsumeStdin -> (SshHost, Maybe Integer) -> RemoteGitConfig -> [CommandParam] -> Annex [CommandParam] +sshOptions cs (host, port) gc opts = go =<< sshCachingInfo (host, port) + where + go (Nothing, params) = return $ mkparams cs params + go (Just socketfile, params) = do + prepSocket socketfile host (mkparams NoConsumeStdin params) + + return $ mkparams cs params + mkparams cs' ps = concat + [ ps + , map Param (remoteAnnexSshOptions gc) + , opts + , portParams port + , consumeStdinParams cs' + , [Param "-T"] + ] + +{- Due to passing -n to GIT_SSH and GIT_SSH_COMMAND, some settings + - of those that expect exactly git's parameters will break. So only + - use those if the user set GIT_ANNEX_USE_GIT_SSH to say it's ok. -} +safe_GIT_SSH :: IO Bool +safe_GIT_SSH = (== Just "1") <$> getEnv "GIT_ANNEX_USE_GIT_SSH" + +consumeStdinParams :: ConsumeStdin -> [CommandParam] +consumeStdinParams ConsumeStdin = [] +consumeStdinParams NoConsumeStdin = [Param "-n"] + +{- Returns a filename to use for a ssh connection caching socket, and + - parameters to enable ssh connection caching. -} +sshCachingInfo :: (SshHost, Maybe Integer) -> Annex (Maybe FilePath, [CommandParam]) +sshCachingInfo (host, port) = go =<< sshCacheDir' + where + go (Right dir) = + liftIO (bestSocketPath $ dir P. hostport2socket host port) >>= return . \case + Nothing -> (Nothing, []) + Just socketfile -> + let socketfile' = fromRawFilePath socketfile + in (Just socketfile', sshConnectionCachingParams socketfile') + -- No connection caching with concurrency is not a good + -- combination, so warn the user. + go (Left whynocaching) = do + getConcurrency >>= \case + NonConcurrent -> return () + Concurrent {} -> warnnocaching whynocaching + ConcurrentPerCpu -> warnnocaching whynocaching + return (Nothing, []) + + warnnocaching whynocaching = + whenM (annexAdviceNoSshCaching <$> Annex.getGitConfig) $ do + warning $ UnquotedString nocachingwarning + warning $ UnquotedString whynocaching + + nocachingwarning = unwords + [ "You have enabled concurrency, but git-annex is not able" + , "to use ssh connection caching. This may result in" + , "multiple ssh processes prompting for passwords at the" + , "same time." + ] + +{- Given an absolute path to use for a socket file, + - returns whichever is shorter of that or the relative path to the same + - file. + - + - If no path can be constructed that is a valid socket, returns Nothing. -} +bestSocketPath :: RawFilePath -> IO (Maybe RawFilePath) +bestSocketPath abssocketfile = do + relsocketfile <- liftIO $ relPathCwdToFile abssocketfile + let socketfile = if S.length abssocketfile <= S.length relsocketfile + then abssocketfile + else relsocketfile + return $ if valid_unix_socket_path socketfile sshgarbagelen + then Just socketfile + else Nothing + where + -- ssh appends a 16 char extension to the socket when setting it + -- up, which needs to be taken into account when checking + -- that a valid socket was constructed. + sshgarbagelen = 1+16 + +sshConnectionCachingParams :: FilePath -> [CommandParam] +sshConnectionCachingParams socketfile = + [ Param "-S", Param socketfile + , Param "-o", Param "ControlMaster=auto" + , Param "-o", Param "ControlPersist=yes" + ] + +sshSocketDirEnv :: String +sshSocketDirEnv = "GIT_ANNEX_SSH_SOCKET_DIR" + +{- Returns the directory where ssh connection caching sockets can be + - stored. + - + - The directory will be created if it does not exist. + -} +sshCacheDir :: Annex (Maybe RawFilePath) +sshCacheDir = eitherToMaybe <$> sshCacheDir' + +sshCacheDir' :: Annex (Either String RawFilePath) +sshCacheDir' = + ifM (fromMaybe BuildInfo.sshconnectioncaching . annexSshCaching <$> Annex.getGitConfig) + ( ifM crippledFileSystem + ( gettmpdir >>= \case + Nothing -> + return (Left crippledfswarning) + Just tmpdir -> + liftIO $ catchMsgIO $ + usetmpdir tmpdir + , do + d <- fromRepo gitAnnexSshDir + createAnnexDirectory d + return (Right d) + ) + , return (Left "annex.sshcaching is not set to true") + ) + where + gettmpdir = liftIO $ getEnv sshSocketDirEnv + + usetmpdir tmpdir = do + let socktmp = tmpdir "ssh" + createDirectoryIfMissing True socktmp + return (toRawFilePath socktmp) + + crippledfswarning = unwords + [ "This repository is on a crippled filesystem, so unix named" + , "pipes probably don't work, and ssh connection caching" + , "relies on those. One workaround is to set" + , sshSocketDirEnv + , "to point to a directory on a non-crippled filesystem." + ] + +portParams :: Maybe Integer -> [CommandParam] +portParams Nothing = [] +portParams (Just port) = [Param "-p", Param $ show port] + +{- Prepare to use a socket file for ssh connection caching. + - + - When concurrency is enabled, this blocks until a ssh connection + - has been made to the host. So, any password prompting by ssh will + - happen in this call, and only one ssh process will prompt at a time. + - + - Locks the socket lock file to prevent other git-annex processes from + - stopping the ssh multiplexer on this socket. + -} +prepSocket :: FilePath -> SshHost -> [CommandParam] -> Annex () +prepSocket socketfile sshhost sshparams = do + -- There could be stale ssh connections hanging around + -- from a previous git-annex run that was interrupted. + -- This must run only once, before we have made any ssh connection, + -- and any other prepSocket calls must block while it's run. + tv <- Annex.getRead Annex.sshstalecleaned + join $ liftIO $ atomically $ do + cleaned <- takeTMVar tv + if cleaned + then do + putTMVar tv cleaned + return noop + else return $ do + sshCleanup + liftIO $ atomically $ putTMVar tv True + -- Cleanup at shutdown. + Annex.addCleanupAction SshCachingCleanup sshCleanup + + let socketlock = socket2lock socketfile + + getConcurrency >>= \case + NonConcurrent -> return () + Concurrent {} -> makeconnection socketlock + ConcurrentPerCpu -> makeconnection socketlock + + lockFileCached socketlock + where + -- When the LockCache already has the socketlock in it, + -- the connection has already been started. Otherwise, + -- get the connection started now. + makeconnection socketlock = debugLocks $ + whenM (isNothing <$> fromLockCache socketlock) $ + -- See if ssh can connect in batch mode, + -- if so there's no need to block for a password + -- prompt. + unlessM (tryssh ["-o", "BatchMode=true"]) $ + -- ssh needs to prompt (probably) + -- If the user enters the wrong password, + -- ssh will tell them, so we can ignore + -- failure. + void $ prompt $ tryssh [] + -- Try to ssh to the host quietly. Returns True if ssh apparently + -- connected to the host successfully. If ssh failed to connect, + -- returns False. + -- Even if ssh is forced to run some specific command, this will + -- return True. + -- (Except there's an unlikely false positive where a forced + -- ssh command exits 255.) + tryssh extraps = liftIO $ withNullHandle $ \nullh -> do + let p = (proc "ssh" $ concat + [ extraps + , toCommand sshparams + , [fromSshHost sshhost, "true"] + ]) + { std_out = UseHandle nullh + , std_err = UseHandle nullh + } + withCreateProcess p $ \_ _ _ pid -> do + exitcode <- waitForProcess pid + return $ case exitcode of + ExitFailure 255 -> False + _ -> True + +{- Find ssh socket files. + - + - The check that the lock file exists makes only socket files + - that were set up by prepSocket be found. On some NFS systems, + - a deleted socket file may linger for a while under another filename; + - and this check makes such files be skipped since the corresponding lock + - file won't exist. + -} +enumSocketFiles :: Annex [FilePath] +enumSocketFiles = liftIO . go =<< sshCacheDir + where + go Nothing = return [] + go (Just dir) = filterM (R.doesPathExist . socket2lock) + =<< filter (not . isLock) + <$> catchDefaultIO [] (dirContents (fromRawFilePath dir)) + +{- Stop any unused ssh connection caching processes. -} +sshCleanup :: Annex () +sshCleanup = mapM_ cleanup =<< enumSocketFiles + where + cleanup socketfile = do +#ifndef mingw32_HOST_OS + -- Drop any shared lock we have, and take an + -- exclusive lock, without blocking. If the lock + -- succeeds, nothing is using this ssh, and it can + -- be stopped. + -- + -- After ssh is stopped cannot remove the lock file; + -- other processes may be waiting on our exclusive + -- lock to use it. + let lockfile = socket2lock socketfile + unlockFile lockfile + mode <- annexFileMode + tryLockExclusive (Just mode) lockfile >>= \case + Nothing -> noop + Just lck -> do + forceStopSsh socketfile + liftIO $ dropLock lck +#else + forceStopSsh socketfile +#endif + +{- Stop all ssh connection caching processes, even when they're in use. -} +forceSshCleanup :: Annex () +forceSshCleanup = mapM_ forceStopSsh =<< enumSocketFiles + +forceStopSsh :: FilePath -> Annex () +forceStopSsh socketfile = withNullHandle $ \nullh -> do + let (dir, base) = splitFileName socketfile + let p = (proc "ssh" $ toCommand $ + [ Param "-O", Param "stop" ] ++ + sshConnectionCachingParams base ++ + [Param "localhost"]) + { cwd = Just dir + -- "ssh -O stop" is noisy on stderr even with -q + , std_out = UseHandle nullh + , std_err = UseHandle nullh + } + void $ liftIO $ catchMaybeIO $ withCreateProcess p $ \_ _ _ pid -> + forceSuccessProcess p pid + liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath socketfile) + +{- This needs to be as short as possible, due to limitations on the length + - of the path to a socket file. At the same time, it needs to be unique + - for each host. + -} +hostport2socket :: SshHost -> Maybe Integer -> RawFilePath +hostport2socket host Nothing = hostport2socket' $ fromSshHost host +hostport2socket host (Just port) = hostport2socket' $ + fromSshHost host ++ "!" ++ show port +hostport2socket' :: String -> RawFilePath +hostport2socket' s + | length s > lengthofmd5s = toRawFilePath $ show $ md5 $ encodeBL s + | otherwise = toRawFilePath s + where + lengthofmd5s = 32 + +socket2lock :: FilePath -> RawFilePath +socket2lock socket = toRawFilePath (socket ++ lockExt) + +isLock :: FilePath -> Bool +isLock f = lockExt `isSuffixOf` f + +lockExt :: String +lockExt = ".lock" + +{- This is the size of the sun_path component of sockaddr_un, which + - is the limit to the total length of the filename of a unix socket. + - + - On Linux, this is 108. On OSX, 104. TODO: Probe + -} +sizeof_sockaddr_un_sun_path :: Int +sizeof_sockaddr_un_sun_path = 100 + +{- Note that this looks at the true length of the path in bytes, as it will + - appear on disk. -} +valid_unix_socket_path :: RawFilePath -> Int -> Bool +valid_unix_socket_path f n = S.length f + n < sizeof_sockaddr_un_sun_path + +{- Parses the SSH port, and returns the other OpenSSH options. If + - several ports are found, the last one takes precedence. -} +sshReadPort :: [String] -> (Maybe Integer, [String]) +sshReadPort params = (port, reverse args) + where + (port,args) = aux (Nothing, []) params + aux (p,ps) [] = (p,ps) + aux (_,ps) ("-p":p:rest) = aux (readPort p, ps) rest + aux (p,ps) (q:rest) | "-p" `isPrefixOf` q = aux (readPort $ drop 2 q, ps) rest + | otherwise = aux (p,q:ps) rest + readPort p = fmap fst $ listToMaybe $ reads p + +{- When this env var is set, git-annex runs ssh with the specified + - options. (The options are separated by newlines.) + - + - This is a workaround for GIT_SSH not being able to contain + - additional parameters to pass to ssh. (GIT_SSH_COMMAND can, + - but is not supported by older versions of git.) -} +sshOptionsEnv :: String +sshOptionsEnv = "GIT_ANNEX_SSHOPTION" + +toSshOptionsEnv :: [CommandParam] -> String +toSshOptionsEnv = unlines . toCommand + +fromSshOptionsEnv :: String -> [CommandParam] +fromSshOptionsEnv = map Param . lines + +{- Enables ssh caching for git push/pull to a particular + - remote git repo. (Can safely be used on non-ssh remotes.) + - + - Also propagates any configured ssh-options. + - + - Like inRepo, the action is run with the local git repo. + - But here it's a modified version, with gitEnv to set GIT_SSH=git-annex, + - and sshOptionsEnv set so that git-annex will know what socket + - file to use. -} +inRepoWithSshOptionsTo :: Git.Repo -> RemoteGitConfig -> (Git.Repo -> IO a) -> Annex a +inRepoWithSshOptionsTo remote gc a = + liftIO . a =<< sshOptionsTo remote gc =<< gitRepo + +{- To make any git commands be run with ssh caching enabled, + - and configured ssh-options alters the local Git.Repo's gitEnv + - to set GIT_SSH=git-annex, and set sshOptionsEnv when running git + - commands. + - + - If GIT_SSH or GIT_SSH_COMMAND are enabled, this has no effect. -} +sshOptionsTo :: Git.Repo -> RemoteGitConfig -> Git.Repo -> Annex Git.Repo +sshOptionsTo remote gc localr + | not (Git.repoIsUrl remote) || Git.repoIsHttp remote = unchanged + | otherwise = case Git.Url.hostuser remote of + Nothing -> unchanged + Just host -> ifM (liftIO $ safe_GIT_SSH <&&> gitSshEnvSet) + ( unchanged + , do + let port = Git.Url.port remote + let sshhost = either giveup id (mkSshHost host) + (msockfile, cacheparams) <- sshCachingInfo (sshhost, port) + case msockfile of + Nothing -> use [] + Just sockfile -> do + prepSocket sockfile sshhost $ concat + [ cacheparams + , map Param (remoteAnnexSshOptions gc) + , portParams port + , consumeStdinParams NoConsumeStdin + , [Param "-T"] + ] + use cacheparams + ) + where + unchanged = return localr + + use opts = do + let sshopts = concat + [ opts + , map Param (remoteAnnexSshOptions gc) + ] + if null sshopts + then unchanged + else do + command <- liftIO programPath + liftIO $ do + localr' <- addGitEnv localr sshOptionsEnv + (toSshOptionsEnv sshopts) + addGitEnv localr' gitSshEnv command + +runSshOptions :: [String] -> String -> IO () +runSshOptions args s = do + let args' = toCommand (fromSshOptionsEnv s) ++ args + let p = proc "ssh" args' + exitcode <- withCreateProcess p $ \_ _ _ pid -> waitForProcess pid + exitWith exitcode + +{- When this env var is set, git-annex is being used as a ssh-askpass + - program, and should read the password from the specified location, + - and output it for ssh to read. -} +sshAskPassEnv :: String +sshAskPassEnv = "GIT_ANNEX_SSHASKPASS" + +runSshAskPass :: FilePath -> IO () +runSshAskPass passfile = putStrLn =<< readFile passfile diff --git a/Annex/StallDetection.hs b/Annex/StallDetection.hs new file mode 100644 index 0000000000..9b885c2ecf --- /dev/null +++ b/Annex/StallDetection.hs @@ -0,0 +1,154 @@ +{- Stall detection for transfers. + - + - Copyright 2020-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.StallDetection ( + getStallDetection, + detectStalls, + StallDetection, +) where + +import Annex.Common +import Types.StallDetection +import Types.Direction +import Types.Remote (gitconfig) +import Utility.Metered +import Utility.HumanTime +import Utility.DataUnits +import Utility.ThreadScheduler + +import Control.Concurrent.STM +import Control.Monad.IO.Class (MonadIO) +import Data.Time.Clock + +getStallDetection :: Direction -> Remote -> Maybe StallDetection +getStallDetection Download r = + remoteAnnexStallDetectionDownload (gitconfig r) + <|> remoteAnnexStallDetection (gitconfig r) +getStallDetection Upload r = + remoteAnnexStallDetectionUpload (gitconfig r) + <|> remoteAnnexStallDetection (gitconfig r) + +{- This may be safely canceled (with eg uninterruptibleCancel), + - as long as the passed action can be safely canceled. -} +detectStalls :: (Monad m, MonadIO m) => Maybe StallDetection -> TVar (Maybe BytesProcessed) -> m () -> m () +detectStalls Nothing _ _ = noop +detectStalls (Just StallDetectionDisabled) _ _ = noop +detectStalls (Just (StallDetection bwrate@(BwRate _minsz duration))) metervar onstall = do + -- If the progress is being updated, but less frequently than + -- the specified duration, a stall would be incorrectly detected. + -- + -- For example, consider the case of a remote that does + -- not support progress updates, but is chunked with a large chunk + -- size. In that case, progress is only updated after each chunk. + -- + -- So, wait for the first update, and see how long it takes. + -- When it's longer than the duration (or close to it), + -- upscale the duration and minsz accordingly. + starttime <- liftIO getCurrentTime + v <- waitforfirstupdate =<< readMeterVar metervar + endtime <- liftIO getCurrentTime + let timepassed = floor (endtime `diffUTCTime` starttime) + let BwRate scaledminsz scaledduration = upscale bwrate timepassed + detectStalls' scaledminsz scaledduration metervar onstall v + where + minwaitsecs = Seconds $ + min 60 (fromIntegral (durationSeconds duration)) + waitforfirstupdate startval = do + liftIO $ threadDelaySeconds minwaitsecs + v <- readMeterVar metervar + if v > startval + then return v + else waitforfirstupdate startval +detectStalls (Just ProbeStallDetection) metervar onstall = do + -- Only do stall detection once the progress is confirmed to be + -- consistently updating. After the first update, it needs to + -- advance twice within 30 seconds. With that established, + -- if no data at all is sent for a 60 second period, it's + -- assumed to be a stall. + v <- readMeterVar metervar >>= waitforfirstupdate + ontimelyadvance v $ \v' -> ontimelyadvance v' $ + detectStalls' 1 duration metervar onstall + where + duration = Duration 60 + + delay = Seconds (fromIntegral (durationSeconds duration) `div` 2) + + waitforfirstupdate startval = do + liftIO $ threadDelaySeconds delay + v <- readMeterVar metervar + if v > startval + then return v + else waitforfirstupdate startval + + ontimelyadvance v cont = do + liftIO $ threadDelaySeconds delay + v' <- readMeterVar metervar + when (v' > v) $ + cont v' + +detectStalls' + :: (Monad m, MonadIO m) + => ByteSize + -> Duration + -> TVar (Maybe BytesProcessed) + -> m () + -> Maybe ByteSize + -> m () +detectStalls' minsz duration metervar onstall st = do + liftIO $ threadDelaySeconds delay + -- Get whatever progress value was reported most recently, if any. + v <- readMeterVar metervar + let cont = detectStalls' minsz duration metervar onstall v + case (st, v) of + (Nothing, _) -> cont + (_, Nothing) -> cont + (Just prev, Just sofar) + -- Just in case a progress meter somehow runs + -- backwards, or a second progress meter was + -- started and is at a smaller value than + -- the previous one. + | prev > sofar -> cont + | sofar - prev < minsz -> onstall + | otherwise -> cont + where + delay = Seconds (fromIntegral (durationSeconds duration)) + +readMeterVar + :: MonadIO m + => TVar (Maybe BytesProcessed) + -> m (Maybe ByteSize) +readMeterVar metervar = liftIO $ atomically $ + fmap fromBytesProcessed <$> readTVar metervar + +-- Scale up the minsz and duration to match the observed time that passed +-- between progress updates. This allows for some variation in the transfer +-- rate causing later progress updates to happen less frequently. +upscale :: BwRate -> Integer -> BwRate +upscale input@(BwRate minsz duration) timepassedsecs + | timepassedsecs > dsecs `div` allowedvariation = BwRate + (ceiling (fromIntegral minsz * scale)) + (Duration (ceiling (fromIntegral dsecs * scale))) + | otherwise = input + where + scale = max (1 :: Double) $ + (fromIntegral timepassedsecs / fromIntegral (max dsecs 1)) + * fromIntegral allowedvariation + + dsecs = durationSeconds duration + + -- Setting this too low will make normal bandwidth variations be + -- considered to be stalls, while setting it too high will make + -- stalls not be detected for much longer than the expected + -- duration. + -- + -- For example, a BwRate of 20MB/1m, when the first progress + -- update takes 10m to arrive, is scaled to 600MB/30m. That 30m + -- is a reasonable since only 3 chunks get sent in that amount of + -- time at that rate. If allowedvariation = 10, that would + -- be 2000MB/100m, which seems much too long to wait to detect a + -- stall. + allowedvariation = 3 diff --git a/Annex/Startup.hs b/Annex/Startup.hs new file mode 100644 index 0000000000..c9ae3f3364 --- /dev/null +++ b/Annex/Startup.hs @@ -0,0 +1,67 @@ +{- git-annex startup + - + - Copyright 2010-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.Startup where + +import Annex.Common +import qualified Annex +import Logs.Cluster + +#ifndef mingw32_HOST_OS +import Control.Concurrent.STM +import System.Posix.Signals +#endif + +{- Run when starting up the main git-annex program. -} +startup :: Annex () +startup = do + startupSignals + gc <- Annex.getGitConfig + when (isinitialized gc) + startupAnnex + where + isinitialized gc = annexUUID gc /= NoUUID + && isJust (annexVersion gc) + +{- Run when starting up the main git-annex program when + - git-annex has already been initialized. + - Alternatively, run after initialization. + -} +startupAnnex :: Annex () +startupAnnex = doQuietAction $ + -- Logs.Location needs this before it is used, in order for a + -- cluster to be treated as the location of keys + -- that are located in any of its nodes. + preLoadClusters + +startupSignals :: Annex () +startupSignals = do +#ifndef mingw32_HOST_OS + av <- Annex.getRead Annex.signalactions + let propagate sig = liftIO $ installhandleronce sig av + propagate sigINT + propagate sigQUIT + propagate sigTERM + propagate sigTSTP + propagate sigCONT + propagate sigHUP + -- sigWINCH is not propagated; it should not be needed, + -- and the concurrent-output library installs its own signal + -- handler for it. + -- sigSTOP and sigKILL cannot be caught, so will not be propagated. + where + installhandleronce sig av = void $ + installHandler sig (CatchOnce (gotsignal sig av)) Nothing + gotsignal sig av = do + mapM_ (\a -> a (fromIntegral sig)) =<< atomically (readTVar av) + raiseSignal sig + installhandleronce sig av +#else + return () +#endif diff --git a/Annex/TaggedPush.hs b/Annex/TaggedPush.hs new file mode 100644 index 0000000000..d728678e9a --- /dev/null +++ b/Annex/TaggedPush.hs @@ -0,0 +1,68 @@ +{- git-annex tagged pushes + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.TaggedPush where + +import Annex.Common +import qualified Remote +import qualified Annex.Branch +import qualified Git +import qualified Git.Ref +import qualified Git.Command +import qualified Git.Branch +import Utility.Base64 + +import qualified Data.ByteString as S + +{- Converts a git branch into a branch that is tagged with a UUID, typically + - the UUID of the repo that will be pushing it, and possibly with other + - information. + - + - Pushing to branches on the remote that have our uuid in them is ugly, + - but it reserves those branches for pushing by us, and so our pushes will + - never conflict with other pushes. + - + - To avoid cluttering up the branch display, the branch is put under + - refs/synced/, rather than the usual refs/remotes/ + - + - Both UUIDs and Base64 encoded data are always legal to be used in git + - refs, per git-check-ref-format. + -} +toTaggedBranch :: UUID -> Maybe String -> Git.Branch -> Git.Ref +toTaggedBranch u info b = Git.Ref $ S.intercalate "/" $ catMaybes + [ Just "refs/synced" + , Just $ fromUUID u + , toB64 . encodeBS <$> info + , Just $ Git.fromRef' $ Git.Ref.base b + ] + +fromTaggedBranch :: Git.Ref -> Maybe (UUID, Maybe S.ByteString) +fromTaggedBranch b = case splitc '/' $ Git.fromRef b of + ("refs":"synced":u:info:_base) -> + Just (toUUID u, fromB64Maybe (encodeBS info)) + ("refs":"synced":u:_base) -> + Just (toUUID u, Nothing) + _ -> Nothing + +listTaggedBranches :: Annex [(Git.Sha, Git.Ref)] +listTaggedBranches = filter (isJust . fromTaggedBranch . snd) + <$> inRepo Git.Ref.list + +taggedPush :: UUID -> Maybe String -> Git.Ref -> Remote -> Git.Repo -> IO Bool +taggedPush u info branch remote = Git.Command.runBool + [ Param "push" + , Param $ Remote.name remote + {- Using forcePush here is safe because we "own" the tagged branch + - we're pushing; it has no other writers. Ensures it is pushed + - even if it has been rewritten by a transition. -} + , Param $ Git.Branch.forcePush $ refspec Annex.Branch.name + , Param $ refspec branch + ] + where + refspec b = Git.fromRef b ++ ":" ++ Git.fromRef (toTaggedBranch u info b) diff --git a/Annex/Tmp.hs b/Annex/Tmp.hs new file mode 100644 index 0000000000..2bbebd6388 --- /dev/null +++ b/Annex/Tmp.hs @@ -0,0 +1,74 @@ +{- git-annex tmp files + - + - Copyright 2019 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Tmp where + +import Annex.Common +import qualified Annex +import Annex.LockFile +import Annex.Perms +import Types.CleanupActions +import qualified Utility.RawFilePath as R + +import Data.Time.Clock.POSIX +import System.PosixCompat.Files (modificationTime) + +-- | For creation of tmp files, other than for key's contents. +-- +-- The action should normally clean up whatever files it writes to the temp +-- directory that is passed to it. However, once the action is done, +-- any files left in that directory may be cleaned up by another process at +-- any time. +withOtherTmp :: (RawFilePath -> Annex a) -> Annex a +withOtherTmp a = do + Annex.addCleanupAction OtherTmpCleanup cleanupOtherTmp + tmpdir <- fromRepo gitAnnexTmpOtherDir + tmplck <- fromRepo gitAnnexTmpOtherLock + withSharedLock tmplck $ do + void $ createAnnexDirectory tmpdir + a tmpdir + +-- | This uses an alternate temp directory. The action should normally +-- clean up whatever files it writes there, but if it leaves files +-- there (perhaps due to being interrupted), the files will be eventually +-- cleaned up by another git-annex process (after they're a week old). +-- +-- Unlike withOtherTmp, this does not rely on locking working. +-- Its main use is in situations where the state of lockfile is not +-- determined yet, eg during initialization. +withEventuallyCleanedOtherTmp :: (RawFilePath -> Annex a) -> Annex a +withEventuallyCleanedOtherTmp = bracket setup cleanup + where + setup = do + tmpdir <- fromRepo gitAnnexTmpOtherDirOld + void $ createAnnexDirectory tmpdir + return tmpdir + cleanup = liftIO . void . tryIO . removeDirectory . fromRawFilePath + +-- | Cleans up any tmp files that were left by a previous +-- git-annex process that got interrupted or failed to clean up after +-- itself for some other reason. +-- +-- Does not do anything if withOtherTmp is running. +cleanupOtherTmp :: Annex () +cleanupOtherTmp = do + tmplck <- fromRepo gitAnnexTmpOtherLock + void $ tryIO $ tryExclusiveLock tmplck $ do + tmpdir <- fromRawFilePath <$> fromRepo gitAnnexTmpOtherDir + void $ liftIO $ tryIO $ removeDirectoryRecursive tmpdir + oldtmp <- fromRawFilePath <$> fromRepo gitAnnexTmpOtherDirOld + liftIO $ mapM_ cleanold + =<< emptyWhenDoesNotExist (dirContentsRecursive oldtmp) + liftIO $ void $ tryIO $ removeDirectory oldtmp -- when empty + where + cleanold f = do + now <- liftIO getPOSIXTime + let oldenough = now - (60 * 60 * 24 * 7) + catchMaybeIO (modificationTime <$> R.getSymbolicLinkStatus (toRawFilePath f)) >>= \case + Just mtime | realToFrac mtime <= oldenough -> + void $ tryIO $ removeWhenExistsWith R.removeLink (toRawFilePath f) + _ -> return () diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs new file mode 100644 index 0000000000..1c1abf4fd5 --- /dev/null +++ b/Annex/Transfer.hs @@ -0,0 +1,443 @@ +{- git-annex transfers + - + - Copyright 2012-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP, BangPatterns, OverloadedStrings #-} + +module Annex.Transfer ( + module X, + upload, + upload', + alwaysUpload, + download, + download', + runTransfer, + alwaysRunTransfer, + noRetry, + stdRetry, + pickRemote, +) where + +import Annex.Common +import qualified Annex +import Logs.Transfer as X +import Types.Transfer as X +import Annex.Notification as X +import Annex.Content +import Annex.Perms +import Annex.Action +import Utility.Metered +import Utility.ThreadScheduler +import Utility.FileMode +import Annex.LockPool +import Types.Key +import qualified Types.Remote as Remote +import qualified Types.Backend +import Types.Concurrency +import Annex.Concurrent +import Types.WorkerPool +import Annex.WorkerPool +import Annex.TransferrerPool +import Annex.StallDetection +import Backend (isCryptographicallySecureKey) +import Types.StallDetection +import qualified Utility.RawFilePath as R + +import Control.Concurrent +import Control.Concurrent.Async +import Control.Concurrent.STM hiding (retry) +import qualified Data.Map.Strict as M +import qualified System.FilePath.ByteString as P +import Data.Ord + +-- Upload, supporting canceling detected stalls. +upload :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool +upload r key af d witness = + case getStallDetection Upload r of + Nothing -> go (Just ProbeStallDetection) + Just StallDetectionDisabled -> go Nothing + Just sd -> runTransferrer sd r key af d Upload witness + where + go sd = upload' (Remote.uuid r) key af sd d (action . Remote.storeKey r key af Nothing) witness + +-- Upload, not supporting canceling detected stalls +upload' :: Observable v => UUID -> Key -> AssociatedFile -> Maybe StallDetection -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v +upload' u key f sd d a _witness = guardHaveUUID u $ + runTransfer (Transfer Upload u (fromKey id key)) Nothing f sd d a + +alwaysUpload :: Observable v => UUID -> Key -> AssociatedFile -> Maybe StallDetection -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v +alwaysUpload u key f sd d a _witness = guardHaveUUID u $ + alwaysRunTransfer (Transfer Upload u (fromKey id key)) Nothing f sd d a + +-- Download, supporting canceling detected stalls. +download :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool +download r key f d witness = + case getStallDetection Download r of + Nothing -> go (Just ProbeStallDetection) + Just StallDetectionDisabled -> go Nothing + Just sd -> runTransferrer sd r key f d Download witness + where + go sd = getViaTmp (Remote.retrievalSecurityPolicy r) vc key f Nothing $ \dest -> + download' (Remote.uuid r) key f sd d (go' dest) witness + go' dest p = verifiedAction $ + Remote.retrieveKeyFile r key f (fromRawFilePath dest) p vc + vc = Remote.RemoteVerify r + +-- Download, not supporting canceling detected stalls. +download' :: Observable v => UUID -> Key -> AssociatedFile -> Maybe StallDetection -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v +download' u key f sd d a _witness = guardHaveUUID u $ + runTransfer (Transfer Download u (fromKey id key)) Nothing f sd d a + +guardHaveUUID :: Observable v => UUID -> Annex v -> Annex v +guardHaveUUID u a + | u == NoUUID = return observeFailure + | otherwise = a + +{- Runs a transfer action. Creates and locks the lock file while the + - action is running, and stores info in the transfer information + - file. + - + - If the transfer action returns False, the transfer info is + - left in the failedTransferDir. + - + - If the transfer is already in progress, returns False. + - + - An upload can be run from a read-only filesystem, and in this case + - no transfer information or lock file is used. + - + - Cannot cancel stalls, but when a likely stall is detected, + - suggests to the user that they enable stall detection handling. + -} +runTransfer :: Observable v => Transfer -> Maybe Backend -> AssociatedFile -> Maybe StallDetection -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v +runTransfer = runTransfer' False + +{- Like runTransfer, but ignores any existing transfer lock file for the + - transfer, allowing re-running a transfer that is already in progress. + -} +alwaysRunTransfer :: Observable v => Transfer -> Maybe Backend -> AssociatedFile -> Maybe StallDetection -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v +alwaysRunTransfer = runTransfer' True + +runTransfer' :: Observable v => Bool -> Transfer -> Maybe Backend -> AssociatedFile -> Maybe StallDetection -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v +runTransfer' ignorelock t eventualbackend afile stalldetection retrydecider transferaction = + enteringStage (TransferStage (transferDirection t)) $ + debugLocks $ + preCheckSecureHashes (transferKey t) eventualbackend go + where + go = do + info <- liftIO $ startTransferInfo afile + (tfile, lckfile, moldlckfile) <- fromRepo $ transferFileAndLockFile t + (meter, createtfile, metervar) <- mkProgressUpdater t info tfile + mode <- annexFileMode + (lck, inprogress) <- prep lckfile moldlckfile createtfile mode + if inprogress && not ignorelock + then do + warning "transfer already in progress, or unable to take transfer lock" + return observeFailure + else do + v <- retry 0 info metervar $ + detectStallsAndSuggestConfig stalldetection metervar $ + transferaction meter + liftIO $ cleanup tfile lckfile moldlckfile lck + if observeBool v + then removeFailedTransfer t + else recordFailedTransfer t info + return v + + prep :: RawFilePath -> Maybe RawFilePath -> Annex () -> ModeSetter -> Annex (Maybe (LockHandle, Maybe LockHandle), Bool) +#ifndef mingw32_HOST_OS + prep lckfile moldlckfile createtfile mode = catchPermissionDenied (const prepfailed) $ do + createAnnexDirectory $ P.takeDirectory lckfile + tryLockExclusive (Just mode) lckfile >>= \case + Nothing -> return (Nothing, True) + -- Since the lock file is removed in cleanup, + -- there's a race where different processes + -- may have a deleted and a new version of the same + -- lock file open. checkSaneLock guards against + -- that. + Just lockhandle -> ifM (checkSaneLock lckfile lockhandle) + ( case moldlckfile of + Nothing -> do + createtfile + return (Just (lockhandle, Nothing), False) + Just oldlckfile -> do + createAnnexDirectory $ P.takeDirectory oldlckfile + tryLockExclusive (Just mode) oldlckfile >>= \case + Nothing -> do + liftIO $ dropLock lockhandle + return (Nothing, True) + Just oldlockhandle -> ifM (checkSaneLock oldlckfile oldlockhandle) + ( do + createtfile + return (Just (lockhandle, Just oldlockhandle), False) + , do + liftIO $ dropLock oldlockhandle + liftIO $ dropLock lockhandle + return (Nothing, True) + ) + , do + liftIO $ dropLock lockhandle + return (Nothing, True) + ) +#else + prep lckfile moldlckfile createtfile _mode = catchPermissionDenied (const prepfailed) $ do + createAnnexDirectory $ P.takeDirectory lckfile + catchMaybeIO (liftIO $ lockExclusive lckfile) >>= \case + Just (Just lockhandle) -> case moldlckfile of + Nothing -> do + createtfile + return (Just (lockhandle, Nothing), False) + Just oldlckfile -> do + createAnnexDirectory $ P.takeDirectory oldlckfile + catchMaybeIO (liftIO $ lockExclusive oldlckfile) >>= \case + Just (Just oldlockhandle) -> do + createtfile + return (Just (lockhandle, Just oldlockhandle), False) + _ -> do + liftIO $ dropLock lockhandle + return (Nothing, False) + _ -> return (Nothing, False) +#endif + prepfailed = return (Nothing, False) + + cleanup _ _ _ Nothing = noop + cleanup tfile lckfile moldlckfile (Just (lockhandle, moldlockhandle)) = do + void $ tryIO $ R.removeLink tfile +#ifndef mingw32_HOST_OS + void $ tryIO $ R.removeLink lckfile + maybe noop (void . tryIO . R.removeLink) moldlckfile + maybe noop dropLock moldlockhandle + dropLock lockhandle +#else + {- Windows cannot delete the lockfile until the lock + - is closed. So it's possible to race with another + - process that takes the lock before it's removed, + - so ignore failure to remove. + -} + maybe noop dropLock moldlockhandle + dropLock lockhandle + void $ tryIO $ R.removeLink lckfile + maybe noop (void . tryIO . R.removeLink) moldlckfile +#endif + + retry numretries oldinfo metervar run = + tryNonAsync run >>= \case + Right v + | observeBool v -> return v + | otherwise -> checkretry + Left e -> do + warning (UnquotedString (show e)) + checkretry + where + checkretry = do + b <- getbytescomplete metervar + let newinfo = oldinfo { bytesComplete = Just b } + let !numretries' = succ numretries + ifM (retrydecider numretries' oldinfo newinfo) + ( retry numretries' newinfo metervar run + , return observeFailure + ) + + getbytescomplete metervar = liftIO $ + maybe 0 fromBytesProcessed <$> readTVarIO metervar + +detectStallsAndSuggestConfig :: Maybe StallDetection -> TVar (Maybe BytesProcessed) -> Annex a -> Annex a +detectStallsAndSuggestConfig Nothing _ a = a +detectStallsAndSuggestConfig sd@(Just _) metervar a = + bracket setup cleanup (const a) + where + setup = do + v <- liftIO newEmptyTMVarIO + sdt <- liftIO $ async $ detectStalls sd metervar $ + void $ atomically $ tryPutTMVar v True + wt <- liftIO . async =<< forkState (warnonstall v) + return (v, sdt, wt) + cleanup (v, sdt, wt) = do + liftIO $ uninterruptibleCancel sdt + void $ liftIO $ atomically $ tryPutTMVar v False + join (liftIO (wait wt)) + warnonstall v = whenM (liftIO (atomically (takeTMVar v))) $ + warning "Transfer seems to have stalled. To restart stalled transfers, configure annex.stalldetection" + +{- Runs a transfer using a separate process, which lets detected stalls be + - canceled. -} +runTransferrer + :: StallDetection + -> Remote + -> Key + -> AssociatedFile + -> RetryDecider + -> Direction + -> NotifyWitness + -> Annex Bool +runTransferrer sd r k afile retrydecider direction _witness = + enteringStage (TransferStage direction) $ preCheckSecureHashes k Nothing $ do + info <- liftIO $ startTransferInfo afile + go 0 info + where + go numretries info = + withTransferrer (performTransfer (Just sd) AnnexLevel id (Just r) t info) >>= \case + Right () -> return True + Left newinfo -> do + let !numretries' = succ numretries + ifM (retrydecider numretries' info newinfo) + ( go numretries' newinfo + , return False + ) + t = Transfer direction (Remote.uuid r) (fromKey id k) + +{- Avoid download and upload of keys with insecure content when + - annex.securehashesonly is configured. + - + - This is not a security check. Even if this let the content be + - downloaded, the actual security checks would prevent the content from + - being added to the repository. The only reason this is done here is to + - avoid transferring content that's going to be rejected anyway. + - + - We assume that, if annex.securehashesonly is set and the local repo + - still contains content using an insecure hash, remotes will likewise + - tend to be configured to reject it, so Upload is also prevented. + -} +preCheckSecureHashes :: Observable v => Key -> Maybe Backend -> Annex v -> Annex v +preCheckSecureHashes k meventualbackend a = case meventualbackend of + Just eventualbackend -> go + (pure (Types.Backend.isCryptographicallySecure eventualbackend)) + (Types.Backend.backendVariety eventualbackend) + Nothing -> go + (isCryptographicallySecureKey k) + (fromKey keyVariety k) + where + go checksecure variety = ifM checksecure + ( a + , ifM (annexSecureHashesOnly <$> Annex.getGitConfig) + ( blocked variety + , a + ) + ) + blocked variety = do + warning $ UnquotedString $ "annex.securehashesonly blocked transfer of " ++ decodeBS (formatKeyVariety variety) ++ " key" + return observeFailure + +type NumRetries = Integer + +type RetryDecider = NumRetries -> TransferInfo -> TransferInfo -> Annex Bool + +{- Both retry deciders are checked together, so if one chooses to delay, + - it will always take effect. -} +combineRetryDeciders :: RetryDecider -> RetryDecider -> RetryDecider +combineRetryDeciders a b = \n old new -> do + ar <- a n old new + br <- b n old new + return (ar || br) + +noRetry :: RetryDecider +noRetry _ _ _ = pure False + +stdRetry :: RetryDecider +stdRetry = combineRetryDeciders forwardRetry configuredRetry + +{- Keep retrying failed transfers, as long as forward progress is being + - made. + - + - Up to a point -- while some remotes can resume where the previous + - transfer left off, and so it would make sense to keep retrying forever, + - other remotes restart each transfer from the beginning, and so even if + - forward progress is being made, it's not real progress. So, retry a + - maximum of 5 times by default. + -} +forwardRetry :: RetryDecider +forwardRetry numretries old new + | fromMaybe 0 (bytesComplete old) < fromMaybe 0 (bytesComplete new) = + (numretries <=) <$> maybe globalretrycfg pure remoteretrycfg + | otherwise = return False + where + globalretrycfg = fromMaybe 5 . annexForwardRetry + <$> Annex.getGitConfig + remoteretrycfg = remoteAnnexRetry =<< + (Remote.gitconfig <$> transferRemote new) + +{- Retries a number of times with growing delays in between when enabled + - by git configuration. -} +configuredRetry :: RetryDecider +configuredRetry numretries _old new = do + (maxretries, Seconds initretrydelay) <- getcfg $ + Remote.gitconfig <$> transferRemote new + if numretries < maxretries + then do + let retrydelay = Seconds (initretrydelay * 2^(numretries-1)) + showSideAction $ UnquotedString $ "Delaying " ++ show (fromSeconds retrydelay) ++ "s before retrying." + liftIO $ threadDelaySeconds retrydelay + return True + else return False + where + globalretrycfg = fromMaybe 0 . annexRetry + <$> Annex.getGitConfig + globalretrydelaycfg = fromMaybe (Seconds 1) . annexRetryDelay + <$> Annex.getGitConfig + getcfg Nothing = (,) <$> globalretrycfg <*> globalretrydelaycfg + getcfg (Just gc) = (,) + <$> maybe globalretrycfg return (remoteAnnexRetry gc) + <*> maybe globalretrydelaycfg return (remoteAnnexRetryDelay gc) + +{- Picks a remote from the list and tries a transfer to it. If the transfer + - does not succeed, goes on to try other remotes from the list. + - + - The list should already be ordered by remote cost, and is normally + - tried in order. However, when concurrent jobs are running, they will + - be assigned different remotes of the same cost when possible. This can + - increase total transfer speed. + -} +pickRemote :: Observable v => [Remote] -> (Remote -> Annex v) -> Annex v +pickRemote l a = debugLocks $ go l =<< getConcurrency + where + go [] _ = return observeFailure + go (r:[]) _ = a r + go rs NonConcurrent = gononconcurrent rs + go rs (Concurrent n) + | n <= 1 = gononconcurrent rs + | otherwise = goconcurrent rs + go rs ConcurrentPerCpu = goconcurrent rs + + gononconcurrent [] = return observeFailure + gononconcurrent (r:rs) = do + ok <- a r + if observeBool ok + then return ok + else gononconcurrent rs + + goconcurrent rs = do + mv <- Annex.getRead Annex.activeremotes + active <- liftIO $ takeMVar mv + let rs' = sortBy (lessActiveFirst active) rs + goconcurrent' mv active rs' + + goconcurrent' mv active [] = do + liftIO $ putMVar mv active + return observeFailure + goconcurrent' mv active (r:rs) = do + let !active' = M.insertWith (+) r 1 active + liftIO $ putMVar mv active' + let getnewactive = do + active'' <- liftIO $ takeMVar mv + let !active''' = M.update (\n -> if n > 1 then Just (n-1) else Nothing) r active'' + return active''' + let removeactive = liftIO . putMVar mv =<< getnewactive + ok <- a r `onException` removeactive + if observeBool ok + then do + removeactive + return ok + else do + active'' <- getnewactive + -- Re-sort the remaining rs + -- because other threads could have + -- been assigned them in the meantime. + let rs' = sortBy (lessActiveFirst active'') rs + goconcurrent' mv active'' rs' + +lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering +lessActiveFirst active a b + | Remote.cost a == Remote.cost b = comparing (`M.lookup` active) a b + | otherwise = comparing Remote.cost a b diff --git a/Annex/TransferrerPool.hs b/Annex/TransferrerPool.hs new file mode 100644 index 0000000000..481e08e9f7 --- /dev/null +++ b/Annex/TransferrerPool.hs @@ -0,0 +1,300 @@ +{- A pool of "git-annex transferrer" processes + - + - Copyright 2013-2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE CPP #-} + +module Annex.TransferrerPool where + +import Annex.Common +import qualified Annex +import Types.TransferrerPool +import Types.Transferrer +import Types.Transfer +import qualified Types.Remote as Remote +import Types.Messages +import Types.CleanupActions +import Messages.Serialized +import Annex.Path +import Annex.StallDetection +import Annex.Link +import Utility.Batch +import Utility.Metered +import qualified Utility.SimpleProtocol as Proto + +import Control.Concurrent +import Control.Concurrent.Async +import Control.Concurrent.STM hiding (check) +import Control.Monad.IO.Class (MonadIO) +import qualified Data.Map as M +#ifndef mingw32_HOST_OS +import System.Posix.Signals +import System.Posix.Process (getProcessGroupIDOf) +#endif + +type SignalActionsVar = TVar (M.Map SignalAction (Int -> IO ())) + +data RunTransferrer = RunTransferrer String [CommandParam] BatchCommandMaker + +mkRunTransferrer :: BatchCommandMaker -> Annex RunTransferrer +mkRunTransferrer batchmaker = RunTransferrer + <$> liftIO programPath + <*> gitAnnexChildProcessParams "transferrer" [] + <*> pure batchmaker + +{- Runs an action with a Transferrer from the pool. -} +withTransferrer :: (Transferrer -> Annex a) -> Annex a +withTransferrer a = do + rt <- mkRunTransferrer nonBatchCommandMaker + pool <- Annex.getRead Annex.transferrerpool + let nocheck = pure (pure True) + signalactonsvar <- Annex.getRead Annex.signalactions + withTransferrer' False signalactonsvar nocheck rt pool a + +withTransferrer' + :: (MonadIO m, MonadMask m) + => Bool + -- ^ When minimizeprocesses is True, only one Transferrer is left + -- running in the pool at a time. So if this needed to start a + -- new Transferrer, it's stopped when done. Otherwise, idle + -- processes are left in the pool for use later. + -> SignalActionsVar + -> MkCheckTransferrer + -> RunTransferrer + -> TransferrerPool + -> (Transferrer -> m a) + -> m a +withTransferrer' minimizeprocesses signalactonsvar mkcheck rt pool a = do + (mi, leftinpool) <- liftIO $ atomically (popTransferrerPool pool) + (i@(TransferrerPoolItem _ check), t) <- liftIO $ case mi of + Nothing -> do + t <- mkTransferrer signalactonsvar rt + i <- mkTransferrerPoolItem mkcheck t + return (i, t) + Just i -> checkTransferrerPoolItem signalactonsvar rt i + a t `finally` returntopool leftinpool check t i + where + returntopool leftinpool check t i + | not minimizeprocesses || leftinpool == 0 = + -- If the transferrer got killed, the handles will + -- be closed, so it should not be returned to the + -- pool. + liftIO $ whenM (hIsOpen (transferrerWrite t)) $ + liftIO $ atomically $ pushTransferrerPool pool i + | otherwise = liftIO $ do + void $ forkIO $ transferrerShutdown t + atomically $ pushTransferrerPool pool $ TransferrerPoolItem Nothing check + +{- Check if a Transferrer from the pool is still ok to be used. + - If not, stop it and start a new one. -} +checkTransferrerPoolItem :: SignalActionsVar -> RunTransferrer -> TransferrerPoolItem -> IO (TransferrerPoolItem, Transferrer) +checkTransferrerPoolItem signalactonsvar rt i = case i of + TransferrerPoolItem (Just t) check -> ifM check + ( return (i, t) + , do + transferrerShutdown t + new check + ) + TransferrerPoolItem Nothing check -> new check + where + new check = do + t <- mkTransferrer signalactonsvar rt + return (TransferrerPoolItem (Just t) check, t) + +data TransferRequestLevel = AnnexLevel | AssistantLevel + deriving (Show) + +{- Requests that a Transferrer perform a Transfer, and waits for it to + - finish. + - + - When a stall is detected, kills the Transferrer. + - + - If the transfer failed or stalled, returns TransferInfo with an + - updated bytesComplete reflecting how much data has been transferred. + -} +performTransfer + :: (Monad m, MonadIO m, MonadMask m) + => Maybe StallDetection + -> TransferRequestLevel + -> (forall a. Annex a -> m a) + -- ^ Run an annex action in the monad. Will not be used with + -- actions that block for a long time. + -> Maybe Remote + -> Transfer + -> TransferInfo + -> Transferrer + -> m (Either TransferInfo ()) +performTransfer stalldetection level runannex r t info transferrer = do + bpv <- liftIO $ newTVarIO zeroBytesProcessed + ifM (catchBoolIO $ bracket setup cleanup (go bpv)) + ( return (Right ()) + , do + n <- liftIO $ atomically $ + fromBytesProcessed <$> readTVar bpv + return $ Left $ info { bytesComplete = Just n } + ) + where + setup = do + liftIO $ sendRequest level t r + (associatedFile info) + (transferrerWrite transferrer) + metervar <- liftIO $ newTVarIO Nothing + stalledvar <- liftIO $ newTVarIO False + tid <- liftIO $ async $ + detectStalls stalldetection metervar $ do + atomically $ writeTVar stalledvar True + killTransferrer transferrer + return (metervar, tid, stalledvar) + + cleanup (_, tid, stalledvar) = do + liftIO $ uninterruptibleCancel tid + whenM (liftIO $ atomically $ readTVar stalledvar) $ do + runannex $ showLongNote "Transfer stalled" + -- Close handles, to prevent the transferrer being + -- reused since the process was killed. + liftIO $ hClose $ transferrerRead transferrer + liftIO $ hClose $ transferrerWrite transferrer + + go bpv (metervar, _, _) = relaySerializedOutput + (liftIO $ readResponse (transferrerRead transferrer)) + (liftIO . sendSerializedOutputResponse (transferrerWrite transferrer)) + (updatemeter bpv metervar) + runannex + + updatemeter bpv metervar (Just n) = liftIO $ do + atomically $ writeTVar metervar (Just n) + atomically $ writeTVar bpv n + updatemeter _bpv metervar Nothing = liftIO $ + atomically $ writeTVar metervar Nothing + +{- Starts a new git-annex transfer process, setting up handles + - that will be used to communicate with it. -} +mkTransferrer :: SignalActionsVar -> RunTransferrer -> IO Transferrer +#ifndef mingw32_HOST_OS +mkTransferrer signalactonsvar (RunTransferrer program params batchmaker) = do +#else +mkTransferrer _ (RunTransferrer program params batchmaker) = do +#endif + {- It runs as a batch job. -} + let (program', params') = batchmaker (program, params) + {- It's put into its own group so that the whole group can be + - killed to stop a transfer. -} + (Just writeh, Just readh, _, ph) <- createProcess + (proc program' $ toCommand params') + { create_group = True + , std_in = CreatePipe + , std_out = CreatePipe + } + + {- Set up signal propagation, so eg ctrl-c will also interrupt + - the processes in the transferrer's process group. + - + - There is a race between the process being created and this point. + - If a signal is received before this can run, it is not sent to + - the transferrer. This leaves the transferrer waiting for the + - first message on stdin to tell what to do. If the signal kills + - this parent process, the transferrer will then get a sigpipe + - and die too. If the signal suspends this parent process, + - it's ok to leave the transferrer running, as it's waiting on + - the pipe until this process wakes back up. + -} +#ifndef mingw32_HOST_OS + pid <- getPid ph + unregistersignalprop <- case pid of + Just p -> getProcessGroupIDOf p >>= \pgrp -> do + atomically $ modifyTVar' signalactonsvar $ + M.insert (PropagateSignalProcessGroup p) $ \sig -> + signalProcessGroup (fromIntegral sig) pgrp + return $ atomically $ modifyTVar' signalactonsvar $ + M.delete (PropagateSignalProcessGroup p) + Nothing -> return noop +#else + let unregistersignalprop = noop +#endif + + return $ Transferrer + { transferrerRead = readh + , transferrerWrite = writeh + , transferrerHandle = ph + , transferrerShutdown = do + -- The transferrer may write to stdout + -- as it's shutting down, so don't close + -- the readh right away. Instead, drain + -- anything sent to it. + drainer <- async $ void $ hGetContents readh + hClose writeh + void $ waitForProcess ph + wait drainer + hClose readh + unregistersignalprop + } + +-- | Send a request to perform a transfer. +sendRequest :: TransferRequestLevel -> Transfer -> Maybe Remote -> AssociatedFile -> Handle -> IO () +sendRequest level t mremote afile h = do + let tr = maybe + (TransferRemoteUUID (transferUUID t)) + (TransferRemoteName . Remote.name) + mremote + let f = case (level, transferDirection t) of + (AnnexLevel, Upload) -> UploadRequest + (AnnexLevel, Download) -> DownloadRequest + (AssistantLevel, Upload) -> AssistantUploadRequest + (AssistantLevel, Download) -> AssistantDownloadRequest + let r = f tr (transferKey t) (TransferAssociatedFile afile) + let l = unwords $ Proto.formatMessage r + debug "Annex.TransferrerPool" ("> " ++ l) + hPutStrLn h l + hFlush h + +sendSerializedOutputResponse :: Handle -> SerializedOutputResponse -> IO () +sendSerializedOutputResponse h sor = do + let l = unwords $ Proto.formatMessage $ + TransferSerializedOutputResponse sor + debug "Annex.TransferrerPool" ("> " ++ show l) + hPutStrLn h l + hFlush h + +-- | Read a response to a transfer request. +-- +-- Before the final response, this will return whatever SerializedOutput +-- should be displayed as the transfer is performed. +readResponse :: Handle -> IO (Either SerializedOutput Bool) +readResponse h = do + l <- liftIO $ hGetLine h + debug "Annex.TransferrerPool" ("< " ++ l) + case Proto.parseMessage l of + Just (TransferOutput so) -> return (Left so) + Just (TransferResult r) -> return (Right r) + Nothing -> transferrerProtocolError l + +transferrerProtocolError :: String -> a +transferrerProtocolError l = giveup $ "transferrer protocol error: " ++ show l + +{- Kill the transferrer, and all its child processes. -} +killTransferrer :: Transferrer -> IO () +killTransferrer t = do + interruptProcessGroupOf $ transferrerHandle t + threadDelay 50000 -- 0.05 second grace period + terminateProcess $ transferrerHandle t + +{- Stop all transferrers in the pool. -} +emptyTransferrerPool :: Annex () +emptyTransferrerPool = do + poolvar <- Annex.getRead Annex.transferrerpool + pool <- liftIO $ atomically $ swapTVar poolvar [] + liftIO $ forM_ pool $ \case + TransferrerPoolItem (Just t) _ -> transferrerShutdown t + TransferrerPoolItem Nothing _ -> noop + -- Transferrers usually restage pointer files themselves, + -- but when killTransferrer is used, a transferrer may have + -- pointer files it has not gotten around to restaging yet. + -- So, restage pointer files here in clean up from such killed + -- transferrers. + unless (null pool) $ + restagePointerFiles =<< Annex.gitRepo diff --git a/Annex/UUID.hs b/Annex/UUID.hs new file mode 100644 index 0000000000..5d846c7724 --- /dev/null +++ b/Annex/UUID.hs @@ -0,0 +1,127 @@ +{- git-annex uuids + - + - Each git repository used by git-annex has an annex.uuid setting that + - uniquely identifies that repository. + - + - UUIDs of remotes are cached in git config, using keys named + - remote..annex-uuid + - + - Copyright 2010-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.UUID ( + configkeyUUID, + configRepoUUID, + getUUID, + getRepoUUID, + getUncachedUUID, + isUUIDConfigured, + prepUUID, + genUUID, + genUUIDInNameSpace, + gCryptNameSpace, + removeRepoUUID, + storeUUID, + storeUUIDIn, + setUUID, + webUUID, + bitTorrentUUID, +) where + +import Annex.Common +import qualified Annex +import qualified Git +import qualified Git.Config +import Git.Types +import Config + +import qualified Data.UUID as U +import qualified Data.UUID.V4 as U4 +import qualified Data.UUID.V5 as U5 +import qualified Data.ByteString as S +import Data.String + +configkeyUUID :: ConfigKey +configkeyUUID = annexConfig "uuid" + +configRepoUUID :: Git.Repo -> ConfigKey +configRepoUUID r = remoteAnnexConfig r "uuid" + +{- Generates a random UUID, that does not include the MAC address. -} +genUUID :: IO UUID +genUUID = toUUID <$> U4.nextRandom + +{- Generates a UUID from a given string, using a namespace. + - Given the same namespace, the same string will always result + - in the same UUID. -} +genUUIDInNameSpace :: U.UUID -> S.ByteString -> UUID +genUUIDInNameSpace namespace = toUUID . U5.generateNamed namespace . S.unpack + +{- Namespace used for UUIDs derived from git-remote-gcrypt ids. -} +gCryptNameSpace :: U.UUID +gCryptNameSpace = U5.generateNamed U5.namespaceURL $ + S.unpack "http://git-annex.branchable.com/design/gcrypt/" + +{- Get current repository's UUID. -} +getUUID :: Annex UUID +getUUID = annexUUID <$> Annex.getGitConfig + +{- Looks up a remote repo's UUID, caching it in .git/config if + - it's not already. -} +getRepoUUID :: Git.Repo -> Annex UUID +getRepoUUID r = do + c <- toUUID <$> getConfig cachekey "" + let u = getUncachedUUID r + + if c /= u && u /= NoUUID + then do + updatecache u + return u + else return c + where + updatecache u = do + g <- gitRepo + when (g /= r) $ storeUUIDIn cachekey u + cachekey = configRepoUUID r + +removeRepoUUID :: Annex () +removeRepoUUID = do + unsetConfig configkeyUUID + storeUUID NoUUID + +getUncachedUUID :: Git.Repo -> UUID +getUncachedUUID = toUUID . Git.Config.get configkeyUUID "" + +-- Does the repo's config have a key for the UUID? +-- True even when the key has no value. +isUUIDConfigured :: Git.Repo -> Bool +isUUIDConfigured = isJust . Git.Config.getMaybe configkeyUUID + +{- Make sure that the repo has an annex.uuid setting. -} +prepUUID :: Annex () +prepUUID = whenM ((==) NoUUID <$> getUUID) $ + storeUUID =<< liftIO genUUID + +storeUUID :: UUID -> Annex () +storeUUID = storeUUIDIn configkeyUUID + +storeUUIDIn :: ConfigKey -> UUID -> Annex () +storeUUIDIn configfield = setConfig configfield . fromUUID + +{- Only sets the configkey in the Repo; does not change .git/config -} +setUUID :: Git.Repo -> UUID -> IO Git.Repo +setUUID r u = do + let s = encodeBS $ show configkeyUUID ++ "=" ++ fromUUID u + Git.Config.store s Git.Config.ConfigList r + +-- Dummy uuid for the whole web. Do not alter. +webUUID :: UUID +webUUID = UUID (fromString "00000000-0000-0000-0000-000000000001") + +-- Dummy uuid for bittorrent. Do not alter. +bitTorrentUUID :: UUID +bitTorrentUUID = UUID (fromString "00000000-0000-0000-0000-000000000002") diff --git a/Annex/UntrustedFilePath.hs b/Annex/UntrustedFilePath.hs new file mode 100644 index 0000000000..df7ac3bd35 --- /dev/null +++ b/Annex/UntrustedFilePath.hs @@ -0,0 +1,77 @@ +{- handling untrusted filepaths + - + - Copyright 2010-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.UntrustedFilePath where + +import Data.Char +import System.FilePath + +import Utility.SafeOutput + +{- Given a string that we'd like to use as the basis for FilePath, but that + - was provided by a third party and is not to be trusted, returns the closest + - sane FilePath. + - + - All spaces and punctuation and other wacky stuff are replaced + - with '_', except for '.' and '-' + - + - "../" becomes ".._", which is safe. + - "/foo" becomes "_foo", which is safe. + - "c:foo" becomes "c_foo", which is safe even on windows. + - + - Leading '.' and '-' are also replaced with '_', so + - so no dotfiles that might control a program are inadvertently created, + - and to avoid filenames being treated as options to commands the user + - might run. + - + - Also there's an off chance the string might be empty, so to avoid + - needing to handle such an invalid filepath, return a dummy "file" in + - that case. + -} +sanitizeFilePath :: String -> FilePath +sanitizeFilePath = sanitizeLeadingFilePathCharacter . sanitizeFilePathComponent + +{- For when the filepath is being built up out of components that should be + - individually sanitized, this can be used for each component, followed by + - sanitizeLeadingFilePathCharacter for the whole thing. + -} +sanitizeFilePathComponent :: String -> String +sanitizeFilePathComponent = map sanitize + where + sanitize c + | c == '.' || c == '-' = c + | isSpace c || isPunctuation c || isSymbol c || isControl c || c == '/' = '_' + | otherwise = c + +sanitizeLeadingFilePathCharacter :: String -> FilePath +sanitizeLeadingFilePathCharacter [] = "file" +sanitizeLeadingFilePathCharacter ('.':s) = '_':s +sanitizeLeadingFilePathCharacter ('-':s) = '_':s +sanitizeLeadingFilePathCharacter ('/':s) = '_':s +sanitizeLeadingFilePathCharacter s = s + +controlCharacterInFilePath :: FilePath -> Bool +controlCharacterInFilePath = any (not . safechar) + where + safechar c = safeOutputChar c && c /= '\n' + +{- ../ is a path traversal, no matter where it appears. + - + - An absolute path is, of course. + -} +pathTraversalInFilePath :: FilePath -> Bool +pathTraversalInFilePath f + | isAbsolute f = True + | any (== "..") (splitPath f) = True + -- On windows, C:foo with no directory is not considered absolute + | hasDrive f = True + | otherwise = False + +gitDirectoryInFilePath :: FilePath -> Bool +gitDirectoryInFilePath = any (== ".git") + . map dropTrailingPathSeparator + . splitPath diff --git a/Annex/UpdateInstead.hs b/Annex/UpdateInstead.hs new file mode 100644 index 0000000000..3f197cb580 --- /dev/null +++ b/Annex/UpdateInstead.hs @@ -0,0 +1,23 @@ +{- git-annex UpdateIntead emulation + - + - Copyright 2017 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.UpdateInstead where + +import qualified Annex +import Annex.Common +import Annex.AdjustedBranch +import Git.Branch +import Git.ConfigTypes + +{- receive.denyCurrentBranch=updateInstead does not work + - when an adjusted branch is checked out, so must be emulated. -} +needUpdateInsteadEmulation :: Annex Bool +needUpdateInsteadEmulation = updateinsteadset <&&> isadjusted + where + updateinsteadset = (== UpdateInstead) . receiveDenyCurrentBranch + <$> Annex.getGitConfig + isadjusted = (maybe False (isJust . getAdjustment) <$> inRepo Git.Branch.current) diff --git a/Annex/Url.hs b/Annex/Url.hs new file mode 100644 index 0000000000..2f12a10768 --- /dev/null +++ b/Annex/Url.hs @@ -0,0 +1,190 @@ +{- Url downloading, with git-annex user agent and configured http + - headers, security restrictions, etc. + - + - Copyright 2013-2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Url ( + withUrlOptions, + withUrlOptionsPromptingCreds, + getUrlOptions, + getUserAgent, + ipAddressesUnlimited, + checkBoth, + download, + download', + exists, + getUrlInfo, + U.URLString, + U.UrlOptions(..), + U.UrlInfo(..), + U.sinkResponseFile, + U.matchStatusCodeException, + U.downloadConduit, + U.downloadPartial, + U.parseURIRelaxed, + U.allowedScheme, + U.assumeUrlExists, +) where + +import Annex.Common +import qualified Annex +import qualified Utility.Url as U +import qualified Utility.Url.Parse as U +import Utility.Hash (IncrementalVerifier) +import Utility.IPAddress +import Network.HTTP.Client.Restricted +import Utility.Metered +import Git.Credential +import qualified BuildInfo + +import Network.Socket +import Network.HTTP.Client +import Network.HTTP.Client.TLS +import Text.Read +import qualified Data.Set as S + +defaultUserAgent :: U.UserAgent +defaultUserAgent = "git-annex/" ++ BuildInfo.packageversion + +getUserAgent :: Annex U.UserAgent +getUserAgent = Annex.getRead $ + fromMaybe defaultUserAgent . Annex.useragent + +getUrlOptions :: Annex U.UrlOptions +getUrlOptions = Annex.getState Annex.urloptions >>= \case + Just uo -> return uo + Nothing -> do + uo <- mk + Annex.changeState $ \s -> s + { Annex.urloptions = Just uo } + return uo + where + mk = do + (urldownloader, manager) <- checkallowedaddr + U.mkUrlOptions + <$> (Just <$> getUserAgent) + <*> headers + <*> pure urldownloader + <*> pure manager + <*> (annexAllowedUrlSchemes <$> Annex.getGitConfig) + <*> pure (Just (\u -> "Configuration of annex.security.allowed-url-schemes does not allow accessing " ++ show u)) + <*> pure U.noBasicAuth + + headers = annexHttpHeadersCommand <$> Annex.getGitConfig >>= \case + Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd]) + Nothing -> annexHttpHeaders <$> Annex.getGitConfig + + checkallowedaddr = words . annexAllowedIPAddresses <$> Annex.getGitConfig >>= \case + ["all"] -> do + curlopts <- map Param . annexWebOptions <$> Annex.getGitConfig + allowedurlschemes <- annexAllowedUrlSchemes <$> Annex.getGitConfig + let urldownloader = if null curlopts && not (any (`S.notMember` U.conduitUrlSchemes) allowedurlschemes) + then U.DownloadWithConduit $ + U.DownloadWithCurlRestricted mempty + else U.DownloadWithCurl curlopts + manager <- liftIO $ U.newManager $ + avoidtimeout $ tlsManagerSettings + return (urldownloader, manager) + allowedaddrsports -> do + addrmatcher <- liftIO $ + (\l v -> any (\f -> f v) l) . catMaybes + <$> mapM (uncurry makeAddressMatcher) + (mapMaybe splitAddrPort allowedaddrsports) + -- Default to not allowing access to loopback + -- and private IP addresses to avoid data + -- leakage. + let isallowed addr + | addrmatcher addr = True + | isLoopbackAddress addr = False + | isPrivateAddress addr = False + | otherwise = True + let connectionrestricted = connectionRestricted + ("Configuration of annex.security.allowed-ip-addresses does not allow accessing address " ++) + let r = addressRestriction $ \addr -> + if isallowed (addrAddress addr) + then Nothing + else Just (connectionrestricted addr) + (settings, pr) <- liftIO $ + mkRestrictedManagerSettings r Nothing Nothing + case pr of + Nothing -> return () + Just ProxyRestricted -> toplevelWarning True + "http proxy settings not used due to annex.security.allowed-ip-addresses configuration" + manager <- liftIO $ U.newManager $ + avoidtimeout settings + -- Curl is not used, as its interface does not allow + -- preventing it from accessing specific IP addresses. + let urldownloader = U.DownloadWithConduit $ + U.DownloadWithCurlRestricted r + return (urldownloader, manager) + + -- http-client defailts to timing out a request after 30 seconds + -- or so, but some web servers are slower and git-annex has its own + -- separate timeout controls, so disable that. + avoidtimeout s = s { managerResponseTimeout = responseTimeoutNone } + +splitAddrPort :: String -> Maybe (String, Maybe PortNumber) +splitAddrPort s + -- "[addr]:port" (also allow "[addr]") + | "[" `isPrefixOf` s = case splitc ']' (drop 1 s) of + [a,cp] -> case splitc ':' cp of + ["",p] -> do + pn <- readMaybe p + return (a, Just pn) + [""] -> Just (a, Nothing) + _ -> Nothing + _ -> Nothing + | otherwise = Just (s, Nothing) + +ipAddressesUnlimited :: Annex Bool +ipAddressesUnlimited = + ("all" == ) . annexAllowedIPAddresses <$> Annex.getGitConfig + +withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a +withUrlOptions a = a =<< getUrlOptions + +-- When downloading an url, if authentication is needed, uses +-- git-credential to prompt for username and password. +-- +-- Note that, when the downloader is curl, it will not use git-credential. +-- If the user wants to, they can configure curl to use a netrc file that +-- handles authentication. +withUrlOptionsPromptingCreds :: (U.UrlOptions -> Annex a) -> Annex a +withUrlOptionsPromptingCreds a = do + g <- Annex.gitRepo + uo <- getUrlOptions + prompter <- mkPrompter + cc <- Annex.getRead Annex.gitcredentialcache + a $ uo + { U.getBasicAuth = \u -> prompter $ + getBasicAuthFromCredential g cc u + } + +checkBoth :: U.URLString -> Maybe Integer -> U.UrlOptions -> Annex Bool +checkBoth url expected_size uo = + liftIO (U.checkBoth url expected_size uo) >>= \case + Right r -> return r + Left err -> warning (UnquotedString err) >> return False + +download :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex Bool +download meterupdate iv url file uo = + liftIO (U.download meterupdate iv url file uo) >>= \case + Right () -> return True + Left err -> warning (UnquotedString err) >> return False + +download' :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex (Either String ()) +download' meterupdate iv url file uo = + liftIO (U.download meterupdate iv url file uo) + +exists :: U.URLString -> U.UrlOptions -> Annex Bool +exists url uo = liftIO (U.exists url uo) >>= \case + Right b -> return b + Left err -> warning (UnquotedString err) >> return False + +getUrlInfo :: U.URLString -> U.UrlOptions -> Annex (Either String U.UrlInfo) +getUrlInfo url uo = liftIO (U.getUrlInfo url uo) diff --git a/Annex/VariantFile.hs b/Annex/VariantFile.hs new file mode 100644 index 0000000000..781732368d --- /dev/null +++ b/Annex/VariantFile.hs @@ -0,0 +1,45 @@ +{- git-annex .variant files for automatic merge conflict resolution + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.VariantFile where + +import Annex.Common +import Utility.Hash + +import qualified Data.ByteString as S + +variantMarker :: String +variantMarker = ".variant-" + +mkVariant :: FilePath -> String -> FilePath +mkVariant file variant = takeDirectory file + dropExtension (takeFileName file) + ++ variantMarker ++ variant + ++ takeExtension file + +{- The filename to use when resolving a conflicted merge of a file, + - that points to a key. + - + - Something derived from the key needs to be included in the filename, + - but rather than exposing the whole key to the user, a very weak hash + - is used. There is a very real, although still unlikely, chance of + - conflicts using this hash. + - + - In the event that there is a conflict with the filename generated + - for some other key, that conflict will itself be handled by the + - conflicted merge resolution code. That case is detected, and the full + - key is used in the filename. + -} +variantFile :: FilePath -> Key -> FilePath +variantFile file key + | doubleconflict = mkVariant file (fromRawFilePath (keyFile key)) + | otherwise = mkVariant file (shortHash $ serializeKey' key) + where + doubleconflict = variantMarker `isInfixOf` file + +shortHash :: S.ByteString -> String +shortHash = take 4 . show . md5s diff --git a/Annex/VectorClock.hs b/Annex/VectorClock.hs new file mode 100644 index 0000000000..db2c63c0bd --- /dev/null +++ b/Annex/VectorClock.hs @@ -0,0 +1,83 @@ +{- git-annex vector clocks + - + - These are basically a timestamp. However, when logging a new + - value, if the old value has a vector clock that is the same or greater + - than the current vector clock, the old vector clock is incremented. + - This way, clock skew does not cause confusion. + - + - Copyright 2017-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.VectorClock ( + module Annex.VectorClock, + module Types.VectorClock, +) where + +import Types.VectorClock +import Annex.Common +import qualified Annex +import Utility.TimeStamp + +import Data.ByteString.Builder +import qualified Data.Attoparsec.ByteString.Lazy as A + +currentVectorClock :: Annex CandidateVectorClock +currentVectorClock = liftIO =<< Annex.getState Annex.getvectorclock + +-- Runs the action and uses the same vector clock throughout, +-- except when it's necessary to use a newer one due to a past value having +-- a newer vector clock. +-- +-- When the action modifies several files in the git-annex branch, +-- this can cause less space to be used, since the same vector clock +-- value is used, which can compress better. +-- +-- However, this should not be used when running a long-duration action, +-- because the vector clock is based on the start of the action, and not on +-- the later points where it writes changes. For example, if this were +-- used across downloads of several files, the location log information +-- would have an earlier vector clock than necessary, which might cause it +-- to be disregarded in favor of other information that was collected +-- at an earlier point in time than when the transfers completted and the +-- log was written. +reuseVectorClockWhile :: Annex a -> Annex a +reuseVectorClockWhile = bracket setup cleanup . const + where + setup = do + origget <- Annex.getState Annex.getvectorclock + vc <- liftIO origget + use (pure vc) + return origget + + cleanup origget = use origget + + use vc = Annex.changeState $ \s -> + s { Annex.getvectorclock = vc } + +-- Convert a candidate vector clock in to the final one to use, +-- advancing it if necessary when necessary to get ahead of a previously +-- used vector clock. +advanceVectorClock :: CandidateVectorClock -> [VectorClock] -> VectorClock +advanceVectorClock (CandidateVectorClock c) [] = VectorClock c +advanceVectorClock (CandidateVectorClock c) prevs + | prev >= VectorClock c = case prev of + VectorClock v -> VectorClock (v + 1) + Unknown -> VectorClock c + | otherwise = VectorClock c + where + prev = maximum prevs + +formatVectorClock :: VectorClock -> String +formatVectorClock Unknown = "0" +formatVectorClock (VectorClock t) = show t + +buildVectorClock :: VectorClock -> Builder +buildVectorClock = string7 . formatVectorClock + +parseVectorClock :: String -> Maybe VectorClock +parseVectorClock t = VectorClock <$> parsePOSIXTime t + +vectorClockParser :: A.Parser VectorClock +vectorClockParser = VectorClock <$> parserPOSIXTime diff --git a/Annex/VectorClock/Utility.hs b/Annex/VectorClock/Utility.hs new file mode 100644 index 0000000000..76b74d9cd5 --- /dev/null +++ b/Annex/VectorClock/Utility.hs @@ -0,0 +1,33 @@ +{- git-annex vector clock utilities + - + - Copyright 2017-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.VectorClock.Utility where + +import Data.Time.Clock.POSIX + +import Types.VectorClock +import Utility.Env +import Utility.TimeStamp + +startVectorClock :: IO (IO CandidateVectorClock) +startVectorClock = go =<< getEnv "GIT_ANNEX_VECTOR_CLOCK" + where + go Nothing = timebased + go (Just s) = case parsePOSIXTime s of + Just t -> return (pure (CandidateVectorClock t)) + Nothing -> timebased + -- Avoid using fractional seconds in the CandidateVectorClock. + -- This reduces the size of the packed git-annex branch by up + -- to 8%. + -- + -- Due to the use of vector clocks, high resolution timestamps are + -- not necessary to make clear which information is most recent when + -- eg, a value is changed repeatedly in the same second. In such a + -- case, the vector clock will be advanced to the next second after + -- the last modification. + timebased = return $ + CandidateVectorClock . truncateResolution 0 <$> getPOSIXTime diff --git a/Annex/Verify.hs b/Annex/Verify.hs new file mode 100644 index 0000000000..697ffeadc0 --- /dev/null +++ b/Annex/Verify.hs @@ -0,0 +1,398 @@ +{- verification + - + - Copyright 2010-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Verify ( + shouldVerify, + verifyKeyContentPostRetrieval, + verifyKeyContent, + verifyKeyContent', + Verification(..), + unVerified, + warnUnverifiableInsecure, + isVerifiable, + startVerifyKeyContentIncrementally, + finishVerifyKeyContentIncrementally, + finishVerifyKeyContentIncrementally', + verifyKeyContentIncrementally, + IncrementalVerifier(..), + writeVerifyChunk, + resumeVerifyFromOffset, + tailVerify, +) where + +import Annex.Common +import qualified Annex +import qualified Types.Remote +import Types.Remote (VerifyConfigA(..)) +import qualified Types.Backend +import qualified Backend +import Types.Remote (unVerified, Verification(..), RetrievalSecurityPolicy(..)) +import Utility.Hash (IncrementalVerifier(..)) +import Utility.Metered +import Annex.WorkerPool +import Types.WorkerPool +import Types.Key + +import Control.Concurrent.STM +import Control.Concurrent.Async +import qualified Data.ByteString as S +#if WITH_INOTIFY +import qualified System.INotify as INotify +import qualified System.FilePath.ByteString as P +#endif + +shouldVerify :: VerifyConfig -> Annex Bool +shouldVerify AlwaysVerify = return True +shouldVerify NoVerify = return False +shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig +shouldVerify (RemoteVerify r) = + (shouldVerify DefaultVerify + <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))) + -- Export remotes are not key/value stores, so always verify + -- content from them even when verification is disabled. + <||> Types.Remote.isExportSupported r + +{- Verifies that a file is the expected content of a key. + - + - Configuration can prevent verification, for either a + - particular remote or always, unless the RetrievalSecurityPolicy + - requires verification. + - + - Most keys have a known size, and if so, the file size is checked. + - + - When the key's backend allows verifying the content (via checksum), + - it is checked. + - + - If the RetrievalSecurityPolicy requires verification and the key's + - backend doesn't support it, the verification will fail. + -} +verifyKeyContentPostRetrieval :: RetrievalSecurityPolicy -> VerifyConfig -> Verification -> Key -> RawFilePath -> Annex Bool +verifyKeyContentPostRetrieval rsp v verification k f = case (rsp, verification) of + (_, Verified) -> return True + (RetrievalVerifiableKeysSecure, _) -> ifM (isVerifiable k) + ( verify + , ifM (annexAllowUnverifiedDownloads <$> Annex.getGitConfig) + ( verify + , warnUnverifiableInsecure k >> return False + ) + ) + (_, UnVerified) -> ifM (shouldVerify v) + ( verify + , return True + ) + (_, IncompleteVerify _) -> ifM (shouldVerify v) + ( verify + , return True + ) + (_, MustVerify) -> verify + (_, MustFinishIncompleteVerify _) -> verify + where + verify = enteringStage VerifyStage $ + case verification of + IncompleteVerify iv -> + resumeVerifyKeyContent k f iv + MustFinishIncompleteVerify iv -> + resumeVerifyKeyContent k f iv + _ -> verifyKeyContent k f + +-- When possible, does an incremental verification, because that can be +-- faster. Eg, the VURL backend can need to try multiple checksums and only +-- with an incremental verification does it avoid reading files twice. +verifyKeyContent :: Key -> RawFilePath -> Annex Bool +verifyKeyContent k f = verifyKeySize k f <&&> verifyKeyContent' k f + +-- Does not verify size. +verifyKeyContent' :: Key -> RawFilePath -> Annex Bool +verifyKeyContent' k f = + Backend.maybeLookupBackendVariety (fromKey keyVariety k) >>= \case + Nothing -> return True + Just b -> case (Types.Backend.verifyKeyContentIncrementally b, Types.Backend.verifyKeyContent b) of + (Nothing, Nothing) -> return True + (Just mkiv, mverifier) -> do + iv <- mkiv k + showAction (UnquotedString (descIncrementalVerifier iv)) + res <- liftIO $ catchDefaultIO Nothing $ + withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do + feedIncrementalVerifier h iv + finalizeIncrementalVerifier iv + case res of + Just res' -> return res' + Nothing -> case mverifier of + Nothing -> return True + Just verifier -> verifier k f + (Nothing, Just verifier) -> verifier k f + +resumeVerifyKeyContent :: Key -> RawFilePath -> IncrementalVerifier -> Annex Bool +resumeVerifyKeyContent k f iv = liftIO (positionIncrementalVerifier iv) >>= \case + Nothing -> fallback + Just endpos -> do + fsz <- liftIO $ catchDefaultIO 0 $ getFileSize f + if fsz < endpos + then fallback + else case fromKey keySize k of + Just size | fsz /= size -> return False + _ -> go fsz endpos >>= \case + Just v -> return v + Nothing -> fallback + where + fallback = verifyKeyContent k f + + go fsz endpos + | fsz == endpos = + liftIO $ catchDefaultIO (Just False) $ + finalizeIncrementalVerifier iv + | otherwise = do + showAction (UnquotedString (descIncrementalVerifier iv)) + liftIO $ catchDefaultIO (Just False) $ + withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do + hSeek h AbsoluteSeek endpos + feedIncrementalVerifier h iv + finalizeIncrementalVerifier iv + +feedIncrementalVerifier :: Handle -> IncrementalVerifier -> IO () +feedIncrementalVerifier h iv = do + b <- S.hGetSome h chunk + if S.null b + then return () + else do + updateIncrementalVerifier iv b + feedIncrementalVerifier h iv + where + chunk = 65536 + +verifyKeySize :: Key -> RawFilePath -> Annex Bool +verifyKeySize k f = case fromKey keySize k of + Just size -> do + size' <- liftIO $ catchDefaultIO 0 $ getFileSize f + return (size' == size) + Nothing -> return True + +warnUnverifiableInsecure :: Key -> Annex () +warnUnverifiableInsecure k = warning $ UnquotedString $ unwords + [ "Getting " ++ kv ++ " keys with this remote is not secure;" + , "the content cannot be verified to be correct." + , "(Use annex.security.allow-unverified-downloads to bypass" + , "this safety check.)" + ] + where + kv = decodeBS (formatKeyVariety (fromKey keyVariety k)) + +isVerifiable :: Key -> Annex Bool +isVerifiable k = maybe False (isJust . Types.Backend.verifyKeyContent) + <$> Backend.maybeLookupBackendVariety (fromKey keyVariety k) + +startVerifyKeyContentIncrementally :: VerifyConfig -> Key -> Annex (Maybe IncrementalVerifier) +startVerifyKeyContentIncrementally verifyconfig k = + ifM (shouldVerify verifyconfig) + ( Backend.maybeLookupBackendVariety (fromKey keyVariety k) >>= \case + Just b -> case Types.Backend.verifyKeyContentIncrementally b of + Just v -> Just <$> v k + Nothing -> return Nothing + Nothing -> return Nothing + , return Nothing + ) + +finishVerifyKeyContentIncrementally :: Maybe IncrementalVerifier -> Annex (Bool, Verification) +finishVerifyKeyContentIncrementally = finishVerifyKeyContentIncrementally' False + +finishVerifyKeyContentIncrementally' :: Bool -> Maybe IncrementalVerifier -> Annex (Bool, Verification) +finishVerifyKeyContentIncrementally' _ Nothing = + return (True, UnVerified) +finishVerifyKeyContentIncrementally' quiet (Just iv) = + liftIO (finalizeIncrementalVerifier iv) >>= \case + Just True -> return (True, Verified) + Just False -> do + unless quiet $ + warning "verification of content failed" + return (False, UnVerified) + -- Incremental verification was not able to be done. + Nothing -> return (True, UnVerified) + +verifyKeyContentIncrementally :: VerifyConfig -> Key -> (Maybe IncrementalVerifier -> Annex ()) -> Annex Verification +verifyKeyContentIncrementally verifyconfig k a = do + miv <- startVerifyKeyContentIncrementally verifyconfig k + a miv + snd <$> finishVerifyKeyContentIncrementally miv + +writeVerifyChunk :: Maybe IncrementalVerifier -> Handle -> S.ByteString -> IO () +writeVerifyChunk (Just iv) h c = do + S.hPut h c + updateIncrementalVerifier iv c +writeVerifyChunk Nothing h c = S.hPut h c + +{- Given a file handle that is open for reading (and likely also for writing), + - and an offset, feeds the current content of the file up to the offset to + - the IncrementalVerifier. Leaves the file seeked to the offset. + - Returns the meter with the offset applied. -} +resumeVerifyFromOffset + :: Integer + -> Maybe IncrementalVerifier + -> MeterUpdate + -> Handle + -> IO MeterUpdate +resumeVerifyFromOffset o incrementalverifier meterupdate h + | o /= 0 = do + maybe noop (`go` o) incrementalverifier + -- Make sure the handle is seeked to the offset. + -- (Reading the file probably left it there + -- when that was done, but let's be sure.) + hSeek h AbsoluteSeek o + return offsetmeterupdate + | otherwise = return meterupdate + where + offsetmeterupdate = offsetMeterUpdate meterupdate (toBytesProcessed o) + go iv n + | n == 0 = return () + | otherwise = do + let c = if n > fromIntegral defaultChunkSize + then defaultChunkSize + else fromIntegral n + b <- S.hGet h c + updateIncrementalVerifier iv b + unless (b == S.empty) $ + go iv (n - fromIntegral (S.length b)) + +-- | Runs a writer action that retrieves to a file. In another thread, +-- reads the file as it grows, and feeds it to the incremental verifier. +-- +-- Once the writer finishes, this returns quickly. It may not feed +-- the entire content of the file to the incremental verifier. +-- +-- The file does not need to exist yet when this is called. It will wait +-- for the file to appear before opening it and starting verification. +-- +-- This is not supported for all OSs, and on OS's where it is not +-- supported, verification will not happen. +-- +-- The writer probably needs to be another process. If the file is being +-- written directly by git-annex, the haskell RTS will prevent opening it +-- for read at the same time, and verification will not happen. +-- +-- Note that there are situations where the file may fail to verify despite +-- having the correct content. For example, when the file is written out +-- of order, or gets replaced part way through. To deal with such cases, +-- when verification fails, it should not be treated as if the file's +-- content is known to be incorrect, but instead as an indication that the +-- file should be verified again, once it's done being written to. +-- +-- (It is also possible, in theory, for a file to verify despite having +-- incorrect content. For that to happen, the file would need to have +-- the right content when this checks it, but then the content gets +-- changed later by whatever is writing to the file.) +-- +-- This should be fairly efficient, reading from the disk cache, +-- as long as the writer does not get very far ahead of it. However, +-- there are situations where it would be much less expensive to verify +-- chunks as they are being written. For example, when resuming with +-- a lot of content in the file, all that content needs to be read, +-- and if the disk is slow, the reader may never catch up to the writer, +-- and the disk cache may never speed up reads. So this should only be +-- used when there's not a better way to incrementally verify. +tailVerify :: Maybe IncrementalVerifier -> RawFilePath -> Annex a -> Annex a +tailVerify (Just iv) f writer = do + finished <- liftIO newEmptyTMVarIO + t <- liftIO $ async $ tailVerify' iv f finished + let finishtail = do + liftIO $ atomically $ putTMVar finished () + liftIO (wait t) + writer `finally` finishtail +tailVerify Nothing _ writer = writer + +tailVerify' :: IncrementalVerifier -> RawFilePath -> TMVar () -> IO () +#if WITH_INOTIFY +tailVerify' iv f finished = + tryNonAsync go >>= \case + Right r -> return r + Left _ -> unableIncrementalVerifier iv + where + -- Watch the directory containing the file, and wait for + -- the file to be modified. It's possible that the file already + -- exists before the downloader starts, but it replaces it instead + -- of resuming, and waiting for modification deals with such + -- situations. + inotifydirchange i cont = + INotify.addWatch i [INotify.Modify] dir $ \case + -- Ignore changes to other files in the directory. + INotify.Modified { INotify.maybeFilePath = fn } + | fn == Just basef -> cont + _ -> noop + where + (dir, basef) = P.splitFileName f + + inotifyfilechange i = INotify.addWatch i [INotify.Modify] f . const + + go = INotify.withINotify $ \i -> do + modified <- newEmptyTMVarIO + let signalmodified = atomically $ void $ tryPutTMVar modified () + wd <- inotifydirchange i signalmodified + let cleanup = void . tryNonAsync . INotify.removeWatch + let stop w = do + cleanup w + unableIncrementalVerifier iv + waitopen modified >>= \case + Nothing -> stop wd + Just h -> do + cleanup wd + wf <- inotifyfilechange i signalmodified + tryNonAsync (follow h modified) >>= \case + Left _ -> stop wf + Right () -> cleanup wf + hClose h + + waitopen modified = do + v <- atomically $ + (Just <$> takeTMVar modified) + `orElse` + ((const Nothing) <$> takeTMVar finished) + case v of + Just () -> do + r <- tryNonAsync $ + tryWhenExists (openBinaryFile (fromRawFilePath f) ReadMode) >>= \case + Just h -> return (Just h) + -- File does not exist, must have been + -- deleted. Wait for next modification + -- and try again. + Nothing -> waitopen modified + case r of + Right r' -> return r' + -- Permission error prevents + -- reading, or this same process + -- is writing to the file, + -- and it cannot be read at the + -- same time. + Left _ -> return Nothing + -- finished without the file being modified + Nothing -> return Nothing + + follow h modified = do + b <- S.hGetNonBlocking h chunk + if S.null b + then do + -- We've caught up to the writer. + -- Wait for the file to get modified again, + -- or until we're told it is done being + -- written. + cont <- atomically $ + (const (follow h modified) + <$> takeTMVar modified) + `orElse` + (const (return ()) + <$> takeTMVar finished) + cont + else do + updateIncrementalVerifier iv b + atomically (tryTakeTMVar finished) >>= \case + Nothing -> follow h modified + Just () -> return () + + chunk = 65536 +#else +tailVerify' iv _ _ = unableIncrementalVerifier iv +#endif diff --git a/Annex/Version.hs b/Annex/Version.hs new file mode 100644 index 0000000000..c3504cfd93 --- /dev/null +++ b/Annex/Version.hs @@ -0,0 +1,68 @@ +{- git-annex repository versioning + - + - Copyright 2010-2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Annex.Version where + +import Annex.Common +import Config +import Git.Types +import Types.RepoVersion +import qualified Annex + +import qualified Data.Map as M + +defaultVersion :: RepoVersion +defaultVersion = RepoVersion 10 + +latestVersion :: RepoVersion +latestVersion = RepoVersion 10 + +supportedVersions :: [RepoVersion] +supportedVersions = map RepoVersion [8, 9, 10] + +upgradeableVersions :: [RepoVersion] +#ifndef mingw32_HOST_OS +upgradeableVersions = map RepoVersion [0..10] +#else +upgradeableVersions = map RepoVersion [2..10] +#endif + +autoUpgradeableVersions :: M.Map RepoVersion RepoVersion +autoUpgradeableVersions = M.fromList + [ (RepoVersion 3, defaultVersion) + , (RepoVersion 4, defaultVersion) + , (RepoVersion 5, defaultVersion) + , (RepoVersion 6, defaultVersion) + , (RepoVersion 7, defaultVersion) + , (RepoVersion 8, defaultVersion) + , (RepoVersion 9, defaultVersion) + ] + +versionField :: ConfigKey +versionField = annexConfig "version" + +getVersion :: Annex (Maybe RepoVersion) +getVersion = annexVersion <$> Annex.getGitConfig + +setVersion :: RepoVersion -> Annex () +setVersion (RepoVersion v) = setConfig versionField (show v) + +removeVersion :: Annex () +removeVersion = unsetConfig versionField + +versionSupportsFilterProcess :: Maybe RepoVersion -> Bool +versionSupportsFilterProcess (Just v) + | v >= RepoVersion 9 = True +versionSupportsFilterProcess _ = False + +versionNeedsWritableContentFiles :: Maybe RepoVersion -> Bool +versionNeedsWritableContentFiles (Just v) + | v >= RepoVersion 10 = False +versionNeedsWritableContentFiles _ = True diff --git a/Annex/View.hs b/Annex/View.hs new file mode 100644 index 0000000000..482ce17c3a --- /dev/null +++ b/Annex/View.hs @@ -0,0 +1,636 @@ +{- metadata based branch views + - + - Copyright 2014-2023 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings, PackageImports #-} + +module Annex.View where + +import Annex.Common +import Annex.View.ViewedFile +import Types.View +import Types.AdjustedBranch +import Types.MetaData +import Annex.MetaData +import qualified Annex +import qualified Annex.Branch +import qualified Git +import qualified Git.DiffTree as DiffTree +import qualified Git.Branch +import qualified Git.LsFiles +import qualified Git.LsTree +import qualified Git.Ref +import Git.CatFile +import Git.UpdateIndex +import Git.Sha +import Git.Types +import Git.FilePath +import Annex.WorkTree +import Annex.GitOverlay +import Annex.Link +import Annex.CatFile +import Annex.Concurrent +import Annex.Content.Presence +import Logs +import Logs.MetaData +import Logs.View +import Utility.Glob +import Types.Command +import CmdLine.Action +import qualified Utility.RawFilePath as R + +import qualified Data.Text as T +import qualified Data.ByteString as B +import qualified Data.Set as S +import qualified Data.Map as M +import qualified System.FilePath.ByteString as P +import Control.Concurrent.Async +import "mtl" Control.Monad.Writer + +{- Each visible ViewFilter in a view results in another level of + - subdirectory nesting. When a file matches multiple ways, it will appear + - in multiple subdirectories. This means there is a bit of an exponential + - blowup with a single file appearing in a crazy number of places! + - + - Capping the view size to 5 is reasonable; why wants to dig + - through 5+ levels of subdirectories to find anything? + -} +viewTooLarge :: View -> Bool +viewTooLarge view = visibleViewSize view > 5 + +visibleViewSize :: View -> Int +visibleViewSize = length . filter viewVisible . viewComponents + +{- Parses field=value, field!=value, field?=value, tag, !tag, and ?tag + - + - Note that the field may not be a legal metadata field name, + - but it's let through anyway. + - This is useful when matching on directory names with spaces, + - which are not legal MetaFields. + -} +parseViewParam :: ViewUnset -> String -> (MetaField, ViewFilter) +parseViewParam vu s = case separate (== '=') s of + ('!':tag, []) | not (null tag) -> + ( tagMetaField + , mkExcludeValues tag + ) + ('?':tag, []) | not (null tag) -> + ( tagMetaField + , mkFilterOrUnsetValues tag + ) + (tag, []) -> + ( tagMetaField + , mkFilterValues tag + ) + (field, wanted) + | end field == "!" -> + ( mkMetaFieldUnchecked (T.pack (beginning field)) + , mkExcludeValues wanted + ) + | end field == "?" -> + ( mkMetaFieldUnchecked (T.pack (beginning field)) + , mkFilterOrUnsetValues wanted + ) + | otherwise -> + ( mkMetaFieldUnchecked (T.pack field) + , mkFilterValues wanted + ) + where + mkExcludeValues = ExcludeValues . S.singleton . toMetaValue . encodeBS + mkFilterValues v + | any (`elem` v) ['*', '?'] = FilterGlob v + | otherwise = FilterValues $ S.singleton $ toMetaValue $ encodeBS v + mkFilterOrUnsetValues v + | any (`elem` v) ['*', '?'] = FilterGlobOrUnset v vu + | otherwise = FilterValuesOrUnset (S.singleton $ toMetaValue $ encodeBS v) vu + +data ViewChange = Unchanged | Narrowing | Widening + deriving (Ord, Eq, Show) + +{- Updates a view, adding new fields to filter on (Narrowing), + - or allowing new values in an existing field (Widening). -} +refineView :: View -> [(MetaField, ViewFilter)] -> (View, ViewChange) +refineView origview = checksize . calc Unchanged origview + where + calc c v [] = (v, c) + calc c v ((f, vf):rest) = + let (v', c') = refine v f vf + in calc (max c c') v' rest + + refine view field vf + | field `elem` map viewField (viewComponents view) = + let (components', viewchanges) = runWriter $ + mapM (\c -> updateViewComponent c field vf) (viewComponents view) + viewchange = if field `elem` map viewField (viewComponents origview) + then maximum viewchanges + else Narrowing + in (view { viewComponents = components' }, viewchange) + | otherwise = + let component = mkViewComponent field vf + view' = view { viewComponents = component : viewComponents view } + in (view', Narrowing) + + checksize r@(v, _) + | viewTooLarge v = giveup $ "View is too large (" ++ show (visibleViewSize v) ++ " levels of subdirectories)" + | otherwise = r + +updateViewComponent :: ViewComponent -> MetaField -> ViewFilter -> Writer [ViewChange] ViewComponent +updateViewComponent c field vf + | viewField c == field = do + let (newvf, viewchange) = combineViewFilter (viewFilter c) vf + tell [viewchange] + return $ mkViewComponent field newvf + | otherwise = return c + +{- Adds an additional filter to a view. This can only result in narrowing + - the view. Multivalued filters are added in non-visible form. -} +filterView :: View -> [(MetaField, ViewFilter)] -> View +filterView v vs = v { viewComponents = viewComponents f' ++ viewComponents v} + where + f = fst $ refineView (v {viewComponents = []}) vs + f' = f { viewComponents = map toinvisible (viewComponents f) } + toinvisible c = c { viewVisible = False } + +{- Combine old and new ViewFilters, yielding a result that matches + - either old+new, or only new. Which depends on the types of things + - being combined. + -} +combineViewFilter :: ViewFilter -> ViewFilter -> (ViewFilter, ViewChange) +combineViewFilter old@(FilterValues olds) (FilterValues news) + | combined == old = (combined, Unchanged) + | otherwise = (combined, Widening) + where + combined = FilterValues (S.union olds news) +combineViewFilter old@(ExcludeValues olds) (ExcludeValues news) + | combined == old = (combined, Unchanged) + | otherwise = (combined, Narrowing) + where + combined = ExcludeValues (S.union olds news) +{- If we have FilterValues and change to a FilterGlob, + - it's always a widening change, because the glob could match other + - values. OTOH, going the other way, it's a Narrowing change if the old + - glob matches all the new FilterValues. -} +combineViewFilter (FilterValues _) newglob@(FilterGlob _) = + (newglob, Widening) +combineViewFilter (FilterGlob oldglob) new@(FilterValues s) + | all (matchGlob (compileGlob oldglob CaseInsensitive (GlobFilePath False)) . decodeBS . fromMetaValue) (S.toList s) = (new, Narrowing) + | otherwise = (new, Widening) +{- With two globs, the old one is discarded, and the new one is used. + - We can tell if that's a narrowing change by checking if the old + - glob matches the new glob. For example, "*" matches "foo*", + - so that's narrowing. While "f?o" does not match "f??", so that's + - widening. -} +combineViewFilter (FilterGlob old) newglob@(FilterGlob new) + | old == new = (newglob, Unchanged) + | matchGlob (compileGlob old CaseInsensitive (GlobFilePath False)) new = (newglob, Narrowing) + | otherwise = (newglob, Widening) +{- Combining FilterValuesOrUnset and FilterGlobOrUnset with FilterValues + - and FilterGlob maintains the OrUnset if the second parameter has it, + - and is otherwise the same as combining without OrUnset, except that + - eliminating the OrUnset can be narrowing, and adding it can be widening. -} +combineViewFilter old@(FilterValuesOrUnset olds _) (FilterValuesOrUnset news newvu) + | combined == old = (combined, Unchanged) + | otherwise = (combined, Widening) + where + combined = FilterValuesOrUnset (S.union olds news) newvu +combineViewFilter (FilterValues olds) (FilterValuesOrUnset news vu) = + (combined, Widening) + where + combined = FilterValuesOrUnset (S.union olds news) vu +combineViewFilter old@(FilterValuesOrUnset olds _) (FilterValues news) + | combined == old = (combined, Narrowing) + | otherwise = (combined, Widening) + where + combined = FilterValues (S.union olds news) +combineViewFilter (FilterValuesOrUnset _ _) newglob@(FilterGlob _) = + (newglob, Widening) +combineViewFilter (FilterGlob _) new@(FilterValuesOrUnset _ _) = + (new, Widening) +combineViewFilter (FilterValues _) newglob@(FilterGlobOrUnset _ _) = + (newglob, Widening) +combineViewFilter (FilterValuesOrUnset _ _) newglob@(FilterGlobOrUnset _ _) = + (newglob, Widening) +combineViewFilter (FilterGlobOrUnset oldglob _) new@(FilterValues _) = + combineViewFilter (FilterGlob oldglob) new +combineViewFilter (FilterGlobOrUnset oldglob _) new@(FilterValuesOrUnset _ _) = + let (_, viewchange) = combineViewFilter (FilterGlob oldglob) new + in (new, viewchange) +combineViewFilter (FilterGlobOrUnset old _) newglob@(FilterGlobOrUnset new _) + | old == new = (newglob, Unchanged) + | matchGlob (compileGlob old CaseInsensitive (GlobFilePath False)) new = (newglob, Narrowing) + | otherwise = (newglob, Widening) +combineViewFilter (FilterGlob _) newglob@(FilterGlobOrUnset _ _) = + (newglob, Widening) +combineViewFilter (FilterGlobOrUnset _ _) newglob@(FilterGlob _) = + (newglob, Narrowing) +{- There is not a way to filter a value and also apply an exclude. So: + - When adding an exclude to a filter, use only the exclude. + - When adding a filter to an exclude, use only the filter. -} +combineViewFilter (FilterGlob _) new@(ExcludeValues _) = (new, Narrowing) +combineViewFilter (ExcludeValues _) new@(FilterGlob _) = (new, Widening) +combineViewFilter (FilterValues _) new@(ExcludeValues _) = (new, Narrowing) +combineViewFilter (ExcludeValues _) new@(FilterValues _) = (new, Widening) +combineViewFilter (FilterValuesOrUnset _ _) new@(ExcludeValues _) = (new, Narrowing) +combineViewFilter (ExcludeValues _) new@(FilterValuesOrUnset _ _) = (new, Widening) +combineViewFilter (FilterGlobOrUnset _ _) new@(ExcludeValues _) = (new, Narrowing) +combineViewFilter (ExcludeValues _) new@(FilterGlobOrUnset _ _) = (new, Widening) + +{- Generates views for a file from a branch, based on its metadata + - and the filename used in the branch. + - + - Note that a file may appear multiple times in a view, when it + - has multiple matching values for a MetaField used in the View. + - + - Of course if its MetaData does not match the View, it won't appear at + - all. + - + - Note that for efficiency, it's useful to partially + - evaluate this function with the view parameter and reuse + - the result. The globs in the view will then be compiled and memoized. + -} +viewedFiles :: View -> MkViewedFile -> FilePath -> MetaData -> [ViewedFile] +viewedFiles view = + let matchers = map viewComponentMatcher (viewComponents view) + in \mkviewedfile file metadata -> + let matches = map (\m -> m metadata) matchers + in if any isNothing matches + then [] + else + let paths = pathProduct $ + map (map toviewpath) (visible matches) + in if null paths + then [mkviewedfile file] + else map ( mkviewedfile file) paths + where + visible = map (fromJust . snd) . + filter (viewVisible . fst) . + zip (viewComponents view) + + toviewpath (MatchingMetaValue v) = toViewPath v + toviewpath (MatchingUnset v) = toViewPath (toMetaValue (encodeBS v)) + +data MatchingValue = MatchingMetaValue MetaValue | MatchingUnset String + +{- Checks if metadata matches a ViewComponent filter, and if so + - returns the value, or values that match. Self-memoizing on ViewComponent. -} +viewComponentMatcher :: ViewComponent -> (MetaData -> Maybe [MatchingValue]) +viewComponentMatcher viewcomponent = \metadata -> + matcher Nothing (viewFilter viewcomponent) + (currentMetaDataValues metafield metadata) + where + metafield = viewField viewcomponent + matcher matchunset (FilterValues s) = + \values -> setmatches matchunset $ S.intersection s values + matcher matchunset (FilterGlob glob) = + let cglob = compileGlob glob CaseInsensitive (GlobFilePath False) + in \values -> setmatches matchunset $ + S.filter (matchGlob cglob . decodeBS . fromMetaValue) values + matcher _ (ExcludeValues excludes) = + \values -> + if S.null (S.intersection values excludes) + then Just [] + else Nothing + matcher _ (FilterValuesOrUnset s (ViewUnset u)) = + matcher (Just [MatchingUnset u]) (FilterValues s) + matcher _ (FilterGlobOrUnset glob (ViewUnset u)) = + matcher (Just [MatchingUnset u]) (FilterGlob glob) + + setmatches matchunset s + | S.null s = matchunset + | otherwise = Just $ + map MatchingMetaValue (S.toList s) + +-- This is '∕', a unicode character that displays the same as '/' but is +-- not it. It is encoded using the filesystem encoding, which allows it +-- to be used even when not in a unicode capable locale. +pseudoSlash :: String +pseudoSlash = "\56546\56456\56469" + +-- And this is '╲' similarly. +pseudoBackslash :: String +pseudoBackslash = "\56546\56469\56498" + +-- And this is '﹕' similarly. +pseudoColon :: String +pseudoColon = "\56559\56505\56469" + +toViewPath :: MetaValue -> FilePath +toViewPath = escapepseudo [] . decodeBS . fromMetaValue + where + escapepseudo s ('/':cs) = escapepseudo (pseudoSlash:s) cs + escapepseudo s ('\\':cs) = escapepseudo (pseudoBackslash:s) cs + escapepseudo s (':':cs) = escapepseudo (pseudoColon:s) cs + escapepseudo s ('%':cs) = escapepseudo ("%%":s) cs + escapepseudo s (c1:c2:c3:cs) + | [c1,c2,c3] == pseudoSlash = escapepseudo ("%":pseudoSlash:s) cs + | [c1,c2,c3] == pseudoBackslash = escapepseudo ("%":pseudoBackslash:s) cs + | [c1,c2,c3] == pseudoColon = escapepseudo ("%":pseudoColon:s) cs + | otherwise = escapepseudo ([c1]:s) (c2:c3:cs) + escapepseudo s (c:cs) = escapepseudo ([c]:s) cs + escapepseudo s [] = concat (reverse s) + +fromViewPath :: FilePath -> MetaValue +fromViewPath = toMetaValue . encodeBS . deescapepseudo [] + where + deescapepseudo s ('%':escapedc:cs) = deescapepseudo ([escapedc]:s) cs + deescapepseudo s (c1:c2:c3:cs) + | [c1,c2,c3] == pseudoSlash = deescapepseudo ("/":s) cs + | [c1,c2,c3] == pseudoBackslash = deescapepseudo ("\\":s) cs + | [c1,c2,c3] == pseudoColon = deescapepseudo (":":s) cs + | otherwise = deescapepseudo ([c1]:s) (c2:c3:cs) + deescapepseudo s cs = concat (reverse (cs:s)) + +prop_viewPath_roundtrips :: MetaValue -> Bool +prop_viewPath_roundtrips v = fromViewPath (toViewPath v) == v + +pathProduct :: [[FilePath]] -> [FilePath] +pathProduct [] = [] +pathProduct (l:ls) = foldl combinel l ls + where + combinel xs ys = [combine x y | x <- xs, y <- ys] + +{- Extracts the metadata from a ViewedFile, based on the view that was used + - to construct it. + - + - Derived metadata is excluded. + -} +fromView :: View -> ViewedFile -> MetaData +fromView view f = MetaData $ m `M.difference` derived + where + m = M.fromList $ map convfield $ + filter (not . isviewunset) (zip visible values) + visible = filter viewVisible (viewComponents view) + paths = splitDirectories (dropFileName f) + values = map (S.singleton . fromViewPath) paths + MetaData derived = getViewedFileMetaData f + convfield (vc, v) = (viewField vc, v) + + -- When a directory is the one used to hold files that don't + -- have the metadata set, don't include it in the MetaData. + isviewunset (vc, v) = case viewFilter vc of + FilterValues {} -> False + FilterGlob {} -> False + ExcludeValues {} -> False + FilterValuesOrUnset _ (ViewUnset vu) -> isviewunset' vu v + FilterGlobOrUnset _ (ViewUnset vu) -> isviewunset' vu v + isviewunset' vu v = S.member (fromViewPath vu) v + +{- Constructing a view that will match arbitrary metadata, and applying + - it to a file yields a set of ViewedFile which all contain the same + - MetaFields that were present in the input metadata + - (excluding fields that are not visible). -} +prop_view_roundtrips :: AssociatedFile -> MetaData -> Bool -> Bool +prop_view_roundtrips (AssociatedFile Nothing) _ _ = True +prop_view_roundtrips (AssociatedFile (Just f)) metadata visible = or + [ B.null (P.takeFileName f) && B.null (P.takeDirectory f) + , viewTooLarge view + , all hasfields (viewedFiles view (viewedFileFromReference' Nothing Nothing) (fromRawFilePath f) metadata) + ] + where + view = View (Git.Ref "foo") $ + map (\(mf, mv) -> ViewComponent mf (FilterValues $ S.filter (not . B.null . fromMetaValue) mv) visible) + (fromMetaData metadata) + visiblefields = sort (map viewField $ filter viewVisible (viewComponents view)) + hasfields fv = sort (map fst (fromMetaData (fromView view fv))) == visiblefields + +{- A directory foo/bar/baz/ is turned into metadata fields + - /=foo, foo/=bar, foo/bar/=baz. + - + - Note that this may generate MetaFields that legalField rejects. + - This is necessary to have a 1:1 mapping between directory names and + - fields. So this MetaData cannot safely be serialized. -} +getDirMetaData :: FilePath -> MetaData +getDirMetaData d = MetaData $ M.fromList $ zip fields values + where + dirs = splitDirectories d + fields = map (mkMetaFieldUnchecked . T.pack . addTrailingPathSeparator . joinPath) + (inits dirs) + values = map (S.singleton . toMetaValue . encodeBS . fromMaybe "" . headMaybe) + (tails dirs) + +getWorkTreeMetaData :: FilePath -> MetaData +getWorkTreeMetaData = getDirMetaData . dropFileName + +getViewedFileMetaData :: FilePath -> MetaData +getViewedFileMetaData = getDirMetaData . dirFromViewedFile . takeFileName + +{- Applies a view to the currently checked out branch, generating a new + - branch for the view. + -} +applyView :: View -> Maybe Adjustment -> Annex Git.Branch +applyView v ma = do + gc <- Annex.getGitConfig + applyView' (viewedFileFromReference gc) getWorkTreeMetaData v ma + +{- Generates a new branch for a View, which must be a more narrow + - version of the View originally used to generate the currently + - checked out branch. That is, it must match a subset of the files + - in view, not any others. + -} +narrowView :: View -> Maybe Adjustment -> Annex Git.Branch +narrowView = applyView' viewedFileReuse getViewedFileMetaData + +{- Go through each staged file. + - If the file is not annexed, skip it, unless it's a dotfile in the top, + - or a file in a dotdir in the top. + - Look up the metadata of annexed files, and generate any ViewedFiles, + - and stage them. + -} +applyView' :: MkViewedFile -> (FilePath -> MetaData) -> View -> Maybe Adjustment -> Annex Git.Branch +applyView' mkviewedfile getfilemetadata view madj = do + top <- fromRepo Git.repoPath + (l, clean) <- inRepo $ Git.LsFiles.inRepoDetails [] [top] + applyView'' mkviewedfile getfilemetadata view madj l clean $ + \(f, sha, mode) -> do + topf <- inRepo (toTopFilePath f) + k <- lookupKey f + return (topf, sha, toTreeItemType mode, k) + genViewBranch view madj + +applyView'' + :: MkViewedFile + -> (FilePath -> MetaData) + -> View + -> Maybe Adjustment + -> [t] + -> IO Bool + -> (t -> Annex (TopFilePath, Sha, Maybe TreeItemType, Maybe Key)) + -> Annex () +applyView'' mkviewedfile getfilemetadata view madj l clean conv = do + viewg <- withNewViewIndex gitRepo + withUpdateIndex viewg $ \uh -> do + g <- Annex.gitRepo + gc <- Annex.getGitConfig + -- Streaming the metadata like this is an optimisation. + catObjectStream g $ \mdfeeder mdcloser mdreader -> do + tid <- liftIO . async =<< forkState + (getmetadata gc mdfeeder mdcloser l) + process uh mdreader + join (liftIO (wait tid)) + liftIO $ void clean + where + genviewedfiles = viewedFiles view mkviewedfile -- enables memoization + + getmetadata _ _ mdcloser [] = liftIO mdcloser + getmetadata gc mdfeeder mdcloser (t:ts) = do + v@(topf, _sha, _treeitemtype, mkey) <- conv t + let feed mdlogf = liftIO $ mdfeeder + (v, Git.Ref.branchFileRef Annex.Branch.fullname mdlogf) + case mkey of + Just key -> feed (metaDataLogFile gc key) + Nothing + -- Handle toplevel dotfiles that are not + -- annexed files by feeding through a query + -- for dummy metadata. Calling + -- Git.UpdateIndex.streamUpdateIndex' + -- here would race with process's calls + -- to it. + | "." `B.isPrefixOf` getTopFilePath topf -> + feed "dummy" + | otherwise -> noop + getmetadata gc mdfeeder mdcloser ts + + process uh mdreader = liftIO mdreader >>= \case + Just ((topf, _, mtreeitemtype, Just k), mdlog) -> do + let metadata = maybe emptyMetaData parseCurrentMetaData mdlog + let f = fromRawFilePath $ getTopFilePath topf + let metadata' = getfilemetadata f `unionMetaData` metadata + forM_ (genviewedfiles f metadata') $ \fv -> do + f' <- fromRepo (fromTopFilePath $ asTopFilePath $ toRawFilePath fv) + stagefile uh f' k mtreeitemtype + process uh mdreader + Just ((topf, sha, Just treeitemtype, Nothing), _) -> do + liftIO $ Git.UpdateIndex.streamUpdateIndex' uh $ + pureStreamer $ updateIndexLine sha treeitemtype topf + process uh mdreader + Just _ -> process uh mdreader + Nothing -> return () + + stagefile uh f k mtreeitemtype = case madj of + Nothing -> stagesymlink uh f k + Just (LinkAdjustment UnlockAdjustment) -> + stagepointerfile uh f k mtreeitemtype + Just (LinkPresentAdjustment UnlockPresentAdjustment) -> + ifM (inAnnex k) + ( stagepointerfile uh f k mtreeitemtype + , stagesymlink uh f k + ) + Just (PresenceAdjustment HideMissingAdjustment (Just UnlockAdjustment)) -> + whenM (inAnnex k) $ + stagepointerfile uh f k mtreeitemtype + Just (PresenceAdjustment HideMissingAdjustment _) -> + whenM (inAnnex k) $ + stagesymlink uh f k + _ -> stagesymlink uh f k + + stagesymlink uh f k = do + linktarget <- calcRepo (gitAnnexLink f k) + sha <- hashSymlink linktarget + liftIO . Git.UpdateIndex.streamUpdateIndex' uh + =<< inRepo (Git.UpdateIndex.stageSymlink f sha) + + stagepointerfile uh f k mtreeitemtype = do + let treeitemtype = if mtreeitemtype == Just TreeExecutable + then TreeExecutable + else TreeFile + sha <- hashPointerFile k + liftIO . Git.UpdateIndex.streamUpdateIndex' uh + =<< inRepo (Git.UpdateIndex.stageFile sha treeitemtype f) + +{- Updates the current view with any changes that have been made to its + - parent branch or the metadata since the view was created or last updated. + - + - When there were changes, returns a ref to a commit for the updated view. + - Does not update the view branch with it. + - + - This is not very optimised. An incremental update would be possible to + - implement and would be faster, but more complicated. + -} +updateView :: View -> Maybe Adjustment -> Annex (Maybe Git.Ref) +updateView view madj = do + (l, clean) <- inRepo $ Git.LsTree.lsTree + Git.LsTree.LsTreeRecursive + (Git.LsTree.LsTreeLong True) + (viewParentBranch view) + gc <- Annex.getGitConfig + applyView'' (viewedFileFromReference gc) getWorkTreeMetaData view madj l clean $ + \ti -> do + let ref = Git.Ref.branchFileRef (viewParentBranch view) + (getTopFilePath (Git.LsTree.file ti)) + k <- case Git.LsTree.size ti of + Nothing -> catKey ref + Just sz -> catKey' ref sz + return + ( (Git.LsTree.file ti) + , (Git.LsTree.sha ti) + , (toTreeItemType (Git.LsTree.mode ti)) + , k + ) + oldcommit <- inRepo $ Git.Ref.sha (branchView view madj) + oldtree <- maybe (pure Nothing) (inRepo . Git.Ref.tree) oldcommit + newtree <- withViewIndex $ inRepo Git.Branch.writeTree + if oldtree /= Just newtree + then Just <$> do + cmode <- annexCommitMode <$> Annex.getGitConfig + let msg = "updated " ++ fromRef (branchView view madj) + let parent = catMaybes [oldcommit] + inRepo (Git.Branch.commitTree cmode [msg] parent newtree) + else return Nothing + +{- Diff between currently checked out branch and staged changes, and + - update metadata to reflect the changes that are being committed to the + - view. + - + - Adding a file to a directory adds the metadata represented by + - that directory to the file, and removing a file from a directory + - removes the metadata. + - + - Note that removes must be handled before adds. This is so + - that moving a file from x/foo/ to x/bar/ adds back the metadata for x. + -} +withViewChanges :: (ViewedFile -> Key -> CommandStart) -> (ViewedFile -> Key -> CommandStart) -> Annex () +withViewChanges addmeta removemeta = do + (diffs, cleanup) <- inRepo $ DiffTree.diffIndex Git.Ref.headRef + forM_ diffs handleremovals + forM_ diffs handleadds + void $ liftIO cleanup + where + handleremovals item + | DiffTree.srcsha item `notElem` nullShas = + handlechange item removemeta + =<< catKey (DiffTree.srcsha item) + | otherwise = noop + handleadds item + | DiffTree.dstsha item `notElem` nullShas = + handlechange item addmeta + =<< catKey (DiffTree.dstsha item) + | otherwise = noop + handlechange item a = maybe noop + (void . commandAction . a (fromRawFilePath $ getTopFilePath $ DiffTree.file item)) + +{- Runs an action using the view index file. + - Note that the file does not necessarily exist, or can contain + - info staged for an old view. -} +withViewIndex :: Annex a -> Annex a +withViewIndex = withIndexFile ViewIndexFile . const + +withNewViewIndex :: Annex a -> Annex a +withNewViewIndex a = do + liftIO . removeWhenExistsWith R.removeLink =<< fromRepo gitAnnexViewIndex + withViewIndex a + +{- Generates a branch for a view, using the view index file + - to make a commit to the view branch. The view branch is not + - checked out, but entering it will display the view. -} +genViewBranch :: View -> Maybe Adjustment -> Annex Git.Branch +genViewBranch view madj = withViewIndex $ do + let branch = branchView view madj + cmode <- annexCommitMode <$> Annex.getGitConfig + void $ inRepo $ Git.Branch.commit cmode True (fromRef branch) branch [] + return branch + +withCurrentView :: (View -> Maybe Adjustment -> Annex a) -> Annex a +withCurrentView a = maybe (giveup "Not in a view.") (uncurry a) =<< currentView diff --git a/Annex/View/ViewedFile.hs b/Annex/View/ViewedFile.hs new file mode 100644 index 0000000000..84dcbc897a --- /dev/null +++ b/Annex/View/ViewedFile.hs @@ -0,0 +1,112 @@ +{- filenames (not paths) used in views + - + - Copyright 2014-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Annex.View.ViewedFile ( + ViewedFile, + MkViewedFile, + viewedFileFromReference, + viewedFileFromReference', + viewedFileReuse, + dirFromViewedFile, + prop_viewedFile_roundtrips, +) where + +import Annex.Common +import Utility.QuickCheck +import Backend.Utilities (maxExtensions) + +import qualified Data.ByteString as S + +type FileName = String +type ViewedFile = FileName + +type MkViewedFile = FilePath -> ViewedFile + +{- Converts a filepath used in a reference branch to the + - filename that will be used in the view. + - + - No two filepaths from the same branch should yield the same result, + - so all directory structure needs to be included in the output filename + - in some way. + - + - So, from dir/subdir/file.foo, generate file_%dir%subdir%.foo + -} +viewedFileFromReference :: GitConfig -> MkViewedFile +viewedFileFromReference g = viewedFileFromReference' + (annexMaxExtensionLength g) + (annexMaxExtensions g) + +viewedFileFromReference' :: Maybe Int -> Maybe Int -> MkViewedFile +viewedFileFromReference' maxextlen maxextensions f = concat $ + [ escape (fromRawFilePath base') + , if null dirs then "" else "_%" ++ intercalate "%" (map escape dirs) ++ "%" + , escape $ fromRawFilePath $ S.concat extensions' + ] + where + (path, basefile) = splitFileName f + dirs = filter (/= ".") $ map dropTrailingPathSeparator (splitPath path) + (base, extensions) = case maxextlen of + Nothing -> splitShortExtensions (toRawFilePath basefile') + Just n -> splitShortExtensions' (n+1) (toRawFilePath basefile') + {- Limit number of extensions. -} + maxextensions' = fromMaybe maxExtensions maxextensions + (base', extensions') + | length extensions <= maxextensions' = (base, extensions) + | otherwise = + let (es,more) = splitAt maxextensions' (reverse extensions) + in (base <> mconcat (reverse more), reverse es) + {- On Windows, if the filename looked like "dir/c:foo" then + - basefile would look like it contains a drive letter, which will + - not work. There cannot really be a filename like that, probably, + - but it prevents the test suite failing. -} + (_basedrive, basefile') = splitDrive basefile + + {- To avoid collisions with filenames or directories that contain + - '%', and to allow the original directories to be extracted + - from the ViewedFile, '%' is escaped. ) + -} + escape :: String -> String + escape = replace "%" (escchar:'%':[]) . replace [escchar] [escchar, escchar] + +escchar :: Char +#ifndef mingw32_HOST_OS +escchar = '\\' +#else +-- \ is path separator on Windows, so instead use ! +escchar = '!' +#endif + +{- For use when operating already within a view, so whatever filepath + - is present in the work tree is already a ViewedFile. -} +viewedFileReuse :: MkViewedFile +viewedFileReuse = takeFileName + +{- Extracts from a ViewedFile the directory where the file is located on + - in the reference branch. -} +dirFromViewedFile :: ViewedFile -> FilePath +dirFromViewedFile = joinPath . drop 1 . sep [] "" + where + sep l _ [] = reverse l + sep l curr (c:cs) + | c == '%' = sep (reverse curr:l) "" cs + | c == escchar = case cs of + (c':cs') -> sep l (c':curr) cs' + [] -> sep l curr cs + | otherwise = sep l (c:curr) cs + +prop_viewedFile_roundtrips :: TestableFilePath -> Bool +prop_viewedFile_roundtrips tf + -- Relative filenames wanted, not directories. + | any (isPathSeparator) (end f ++ beginning f) = True + | isAbsolute f || isDrive f = True + | otherwise = dir == dirFromViewedFile + (viewedFileFromReference' Nothing Nothing f) + where + f = fromTestableFilePath tf + dir = joinPath $ beginning $ splitDirectories f diff --git a/Annex/Wanted.hs b/Annex/Wanted.hs new file mode 100644 index 0000000000..87fdbdae49 --- /dev/null +++ b/Annex/Wanted.hs @@ -0,0 +1,75 @@ +{- git-annex checking whether content is wanted + - + - Copyright 2012-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Wanted where + +import Annex.Common +import Logs.PreferredContent +import Annex.UUID +import Annex.CatFile +import Git.FilePath +import qualified Database.Keys +import Types.FileMatcher + +import qualified Data.Set as S + +{- Check if a file is preferred content for the local repository. -} +wantGet :: Bool -> Maybe Key -> AssociatedFile -> Annex Bool +wantGet d key file = isPreferredContent Nothing S.empty key file d + +{- Check if a file is preferred content for a repository. -} +wantGetBy :: Bool -> Maybe Key -> AssociatedFile -> UUID -> Annex Bool +wantGetBy d key file to = isPreferredContent (Just to) S.empty key file d + +{- Check if a file is not preferred or required content, and can be + - dropped. When a UUID is provided, checks for that repository. + - + - The AssociatedFile is the one that the user requested to drop. + - There may be other files that use the same key, and preferred content + - may match some of those and not others. If any are preferred content, + - that will prevent dropping. When the other associated files are known, + - they can be provided, otherwise this looks them up. + -} +wantDrop :: Bool -> Maybe UUID -> Maybe Key -> AssociatedFile -> (Maybe [AssociatedFile]) -> Annex Bool +wantDrop d from key file others = + isNothing <$> checkDrop isPreferredContent d from key file others + +{- Generalization of wantDrop that can also be used with isRequiredContent. + - + - When the content should not be dropped, returns Just the file that + - the checker matches. + -} +checkDrop :: (Maybe UUID -> AssumeNotPresent -> Maybe Key -> AssociatedFile -> Bool -> Annex Bool) -> Bool -> Maybe UUID -> Maybe Key -> AssociatedFile -> (Maybe [AssociatedFile]) -> Annex (Maybe AssociatedFile) +checkDrop checker d from key file others = do + u <- maybe getUUID (pure . id) from + let s = S.singleton u + let checker' f = checker (Just u) s key f d + ifM (checker' file) + ( return (Just file) + , do + others' <- case others of + Just afs -> pure (filter (/= file) afs) + Nothing -> case key of + Just k -> + mapM (\f -> AssociatedFile . Just <$> fromRepo (fromTopFilePath f)) + =<< Database.Keys.getAssociatedFiles k + Nothing -> pure [] + l <- filterM checker' others' + if null l + then return Nothing + else checkassociated l + ) + where + -- Some associated files that are in the keys database may no + -- longer correspond to files in the repository, and should + -- not prevent dropping. + checkassociated [] = return Nothing + checkassociated (af@(AssociatedFile (Just f)):fs) = + catKeyFile f >>= \case + Just k | Just k == key -> return (Just af) + _ -> checkassociated fs + checkassociated (AssociatedFile Nothing:fs) = checkassociated fs diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs new file mode 100644 index 0000000000..41abc2471e --- /dev/null +++ b/Annex/WorkTree.hs @@ -0,0 +1,60 @@ +{- git-annex worktree files + - + - Copyright 2013-2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.WorkTree where + +import Annex.Common +import Annex.Link +import Annex.CatFile +import Annex.CurrentBranch +import qualified Database.Keys + +{- Looks up the key corresponding to an annexed file in the work tree, + - by examining what the symlink points to. + - + - An unlocked file will not have a link on disk, so fall back to + - looking for a pointer to a key in git. + - + - When in an adjusted branch that may have hidden the file, looks for a + - pointer to a key in the original branch. + -} +lookupKey :: RawFilePath -> Annex (Maybe Key) +lookupKey = lookupKey' catkeyfile + where + catkeyfile file = + ifM (liftIO $ doesFileExist $ fromRawFilePath file) + ( catKeyFile file + , catKeyFileHidden file =<< getCurrentBranch + ) + +{- Like lookupKey, but only looks at files staged in git, not at unstaged + - changes in the work tree. This means it's slower, but it also has + - consistently the same behavior for locked files as for unlocked files. + -} +lookupKeyStaged :: RawFilePath -> Annex (Maybe Key) +lookupKeyStaged file = catKeyFile file >>= \case + Just k -> return (Just k) + Nothing -> catKeyFileHidden file =<< getCurrentBranch + +{- Like lookupKey, but does not find keys for hidden files. -} +lookupKeyNotHidden :: RawFilePath -> Annex (Maybe Key) +lookupKeyNotHidden = lookupKey' catkeyfile + where + catkeyfile file = + ifM (liftIO $ doesFileExist $ fromRawFilePath file) + ( catKeyFile file + , return Nothing + ) + +lookupKey' :: (RawFilePath -> Annex (Maybe Key)) -> RawFilePath -> Annex (Maybe Key) +lookupKey' catkeyfile file = isAnnexLink file >>= \case + Just key -> return (Just key) + Nothing -> catkeyfile file + +{- Find all annexed files and update the keys database for them. -} +scanAnnexedFiles :: Annex () +scanAnnexedFiles = Database.Keys.updateDatabase diff --git a/Annex/WorkerPool.hs b/Annex/WorkerPool.hs new file mode 100644 index 0000000000..ddad985b42 --- /dev/null +++ b/Annex/WorkerPool.hs @@ -0,0 +1,124 @@ +{- git-annex worker thread pool + - + - Copyright 2015-2019 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.WorkerPool where + +import Annex +import Annex.Common +import Types.WorkerPool + +import Control.Concurrent +import Control.Concurrent.STM + +{- Runs an action and makes the current thread have the specified stage + - while doing so. If too many other threads are running in the specified + - stage, waits for one of them to become idle. + - + - Noop if the current thread already has the requested stage, or if the + - current thread is not in the worker pool, or if concurrency is not + - enabled. + - + - Also a noop if the stage is not one of the stages that the worker pool + - uses. + -} +enteringStage :: WorkerStage -> Annex a -> Annex a +enteringStage newstage a = Annex.getState Annex.workers >>= \case + Nothing -> a + Just tv -> do + mytid <- liftIO myThreadId + let set = changeStageTo mytid tv (const newstage) + let restore = maybe noop (void . changeStageTo mytid tv . const) + bracket set restore (const a) + +{- Transition the current thread to the initial stage. + - This is done once the thread is ready to begin work. + -} +enteringInitialStage :: Annex () +enteringInitialStage = Annex.getState Annex.workers >>= \case + Nothing -> noop + Just tv -> do + mytid <- liftIO myThreadId + void $ changeStageTo mytid tv initialStage + +{- This needs to leave the WorkerPool with the same number of + - idle and active threads, and with the same number of threads for each + - WorkerStage. So, all it can do is swap the WorkerStage of our thread's + - ActiveWorker with an IdleWorker. + - + - Must avoid a deadlock if all worker threads end up here at the same + - time, or if there are no suitable IdleWorkers left. So if necessary + - we first replace our ActiveWorker with an IdleWorker in the pool, to allow + - some other thread to use it, before waiting for a suitable IdleWorker + - for us to use. + - + - Note that the spareVals in the WorkerPool does not get anything added to + - it when adding the IdleWorker, so there will for a while be more IdleWorkers + - in the pool than spareVals. That does not prevent other threads that call + - this from using them though, so it's fine. + -} +changeStageTo :: ThreadId -> TMVar (WorkerPool t) -> (UsedStages -> WorkerStage) -> Annex (Maybe WorkerStage) +changeStageTo mytid tv getnewstage = liftIO $ + replaceidle >>= maybe + (return Nothing) + (either waitidle (return . Just)) + where + replaceidle = atomically $ do + pool <- takeTMVar tv + let newstage = getnewstage (usedStages pool) + let notchanging = do + putTMVar tv pool + return Nothing + if memberStage newstage (usedStages pool) + then case removeThreadIdWorkerPool mytid pool of + Just ((myaid, oldstage), pool') + | oldstage /= newstage -> case getIdleWorkerSlot newstage pool' of + Nothing -> do + putTMVar tv $ + addWorkerPool (IdleWorker oldstage) pool' + return $ Just $ Left (myaid, newstage, oldstage) + Just pool'' -> do + -- optimisation + putTMVar tv $ + addWorkerPool (IdleWorker oldstage) $ + addWorkerPool (ActiveWorker myaid newstage) pool'' + return $ Just $ Right oldstage + | otherwise -> notchanging + _ -> notchanging + else notchanging + + waitidle (myaid, newstage, oldstage) = atomically $ do + pool <- waitIdleWorkerSlot newstage =<< takeTMVar tv + putTMVar tv $ addWorkerPool (ActiveWorker myaid newstage) pool + return (Just oldstage) + +-- | Waits until there's an idle StartStage worker in the worker pool, +-- removes it from the pool, and returns its state. +waitStartWorkerSlot :: TMVar (WorkerPool t) -> STM (t, WorkerStage) +waitStartWorkerSlot tv = do + pool <- takeTMVar tv + v <- go pool + return (v, StartStage) + where + go pool = case spareVals pool of + [] -> retry + (v:vs) -> do + let pool' = pool { spareVals = vs } + putTMVar tv =<< waitIdleWorkerSlot StartStage pool' + return v + +waitIdleWorkerSlot :: WorkerStage -> WorkerPool t -> STM (WorkerPool t) +waitIdleWorkerSlot wantstage = maybe retry return . getIdleWorkerSlot wantstage + +getIdleWorkerSlot :: WorkerStage -> WorkerPool t -> Maybe (WorkerPool t) +getIdleWorkerSlot wantstage pool = do + l <- findidle [] (workerList pool) + return $ pool { workerList = l } + where + findidle _ [] = Nothing + findidle c ((IdleWorker stage):rest) + | stage == wantstage = Just (c ++ rest) + findidle c (w:rest) = findidle (w:c) rest diff --git a/Annex/YoutubeDl.hs b/Annex/YoutubeDl.hs new file mode 100644 index 0000000000..3a4dd051bc --- /dev/null +++ b/Annex/YoutubeDl.hs @@ -0,0 +1,410 @@ +{- yt-dlp (and deprecated youtube-dl) integration for git-annex + - + - Copyright 2017-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE DeriveGeneric #-} + +module Annex.YoutubeDl ( + youtubeDl, + youtubeDlTo, + youtubeDlSupported, + youtubeDlCheck, + youtubeDlFileName, + youtubeDlFileNameHtmlOnly, + youtubeDlCommand, + youtubePlaylist, + YoutubePlaylistItem(..), +) where + +import Annex.Common +import qualified Annex +import Annex.Content +import Annex.Url +import Utility.DiskFree +import Utility.HtmlDetect +import Utility.Process.Transcript +import Utility.Metered +import Utility.Tmp +import Messages.Progress +import Logs.Transfer + +import Network.URI +import Control.Concurrent.Async +import Text.Read +import Data.Either +import qualified Data.Aeson as Aeson +import GHC.Generics +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as B8 + +-- youtube-dl can follow redirects to anywhere, including potentially +-- localhost or a private address. So, it's only allowed to download +-- content if the user has allowed access to all addresses. +youtubeDlAllowed :: Annex Bool +youtubeDlAllowed = ipAddressesUnlimited + +youtubeDlNotAllowedMessage :: String +youtubeDlNotAllowedMessage = unwords + [ "This url is supported by yt-dlp, but" + , "yt-dlp could potentially access any address, and the" + , "configuration of annex.security.allowed-ip-addresses" + , "does not allow that. Not using yt-dlp (or youtube-dl)." + ] + +-- Runs youtube-dl in a work directory, to download a single media file +-- from the url. Returns the path to the media file in the work directory. +-- +-- Displays a progress meter as youtube-dl downloads. +-- +-- If no file is downloaded, or the program is not installed, +-- returns Right Nothing. +-- +-- youtube-dl can write to multiple files, either temporary files, or +-- multiple videos found at the url, and git-annex needs only one file. +-- So we need to find the destination file, and make sure there is not +-- more than one. With yt-dlp use --print-to-file to make it record the +-- file(s) it downloads. With youtube-dl, the best that can be done is +-- to require that the work directory end up with only 1 file in it. +-- (This can fail, but youtube-dl is deprecated, and they closed my +-- issue requesting something like --print-to-file; +-- ) +youtubeDl :: URLString -> FilePath -> MeterUpdate -> Annex (Either String (Maybe FilePath)) +youtubeDl url workdir p = ifM ipAddressesUnlimited + ( withUrlOptions $ youtubeDl' url workdir p + , return $ Left youtubeDlNotAllowedMessage + ) + +youtubeDl' :: URLString -> FilePath -> MeterUpdate -> UrlOptions -> Annex (Either String (Maybe FilePath)) +youtubeDl' url workdir p uo + | supportedScheme uo url = do + cmd <- youtubeDlCommand + ifM (liftIO $ inSearchPath cmd) + ( runcmd cmd >>= \case + Right True -> downloadedfiles cmd >>= \case + (f:[]) -> return (Right (Just f)) + [] -> return (nofiles cmd) + fs -> return (toomanyfiles cmd fs) + Right False -> workdirfiles >>= \case + [] -> return (Right Nothing) + _ -> return (Left $ cmd ++ " download is incomplete. Run the command again to resume.") + Left msg -> return (Left msg) + , return (Right Nothing) + ) + | otherwise = return (Right Nothing) + where + nofiles cmd = Left $ cmd ++ " did not put any media in its work directory, perhaps it's been configured to store files somewhere else?" + toomanyfiles cmd fs = Left $ cmd ++ " downloaded multiple media files; git-annex is only able to deal with one per url: " ++ show fs + downloadedfiles cmd + | isytdlp cmd = liftIO $ + (nub . lines <$> readFile filelistfile) + `catchIO` (pure . const []) + | otherwise = workdirfiles + workdirfiles = liftIO $ filter (/= filelistfile) + <$> (filterM (doesFileExist) =<< dirContents workdir) + filelistfile = workdir filelistfilebase + filelistfilebase = "git-annex-file-list-file" + isytdlp cmd = cmd == "yt-dlp" + runcmd cmd = youtubeDlMaxSize workdir >>= \case + Left msg -> return (Left msg) + Right maxsize -> do + opts <- youtubeDlOpts (dlopts cmd ++ maxsize) + oh <- mkOutputHandlerQuiet + -- The size is unknown to start. Once youtube-dl + -- outputs some progress, the meter will be updated + -- with the size, which is why it's important the + -- meter is passed into commandMeter' + let unknownsize = Nothing :: Maybe FileSize + ok <- metered (Just p) unknownsize Nothing $ \meter meterupdate -> + liftIO $ commandMeter' + (if isytdlp cmd then parseYtdlpProgress else parseYoutubeDlProgress) + oh (Just meter) meterupdate cmd opts + (\pr -> pr { cwd = Just workdir }) + return (Right ok) + dlopts cmd = + [ Param url + -- To make it only download one file when given a + -- page with a video and a playlist, download only the video. + , Param "--no-playlist" + -- And when given a page with only a playlist, download only + -- the first video on the playlist. (Assumes the video is + -- somewhat stable, but this is the only way to prevent + -- it from downloading the whole playlist.) + , Param "--playlist-items", Param "0" + ] ++ + if isytdlp cmd + then + -- Avoid warnings, which go to + -- stderr and may mess up + -- git-annex's display. + [ Param "--no-warnings" + , Param "--progress-template" + , Param progressTemplate + , Param "--print-to-file" + , Param "after_move:filepath" + , Param filelistfilebase + ] + else [] + +-- To honor annex.diskreserve, ask youtube-dl to not download too +-- large a media file. Factors in other downloads that are in progress, +-- and any files in the workdir that it may have partially downloaded +-- before. +youtubeDlMaxSize :: FilePath -> Annex (Either String [CommandParam]) +youtubeDlMaxSize workdir = ifM (Annex.getRead Annex.force) + ( return $ Right [] + , liftIO (getDiskFree workdir) >>= \case + Just have -> do + inprogress <- sizeOfDownloadsInProgress (const True) + partial <- liftIO $ sum + <$> (mapM (getFileSize . toRawFilePath) =<< dirContents workdir) + reserve <- annexDiskReserve <$> Annex.getGitConfig + let maxsize = have - reserve - inprogress + partial + if maxsize > 0 + then return $ Right + [ Param "--max-filesize" + , Param (show maxsize) + ] + else return $ Left $ + needMoreDiskSpace $ + negate maxsize + 1024 + Nothing -> return $ Right [] + ) + +-- Download a media file to a destination, +youtubeDlTo :: Key -> URLString -> FilePath -> MeterUpdate -> Annex Bool +youtubeDlTo key url dest p = do + res <- withTmpWorkDir key $ \workdir -> + youtubeDl url (fromRawFilePath workdir) p >>= \case + Right (Just mediafile) -> do + liftIO $ moveFile (toRawFilePath mediafile) (toRawFilePath dest) + return (Just True) + Right Nothing -> return (Just False) + Left msg -> do + warning (UnquotedString msg) + return Nothing + return (fromMaybe False res) + +-- youtube-dl supports downloading urls that are not html pages, +-- but we don't want to use it for such urls, since they can be downloaded +-- without it. So, this first downloads part of the content and checks +-- if it's a html page; only then is youtube-dl used. +htmlOnly :: URLString -> a -> Annex a -> Annex a +htmlOnly url fallback a = withUrlOptions $ \uo -> + liftIO (downloadPartial url uo htmlPrefixLength) >>= \case + Just bs | isHtmlBs bs -> a + _ -> return fallback + +-- Check if youtube-dl supports downloading content from an url. +youtubeDlSupported :: URLString -> Annex Bool +youtubeDlSupported url = either (const False) id + <$> withUrlOptions (youtubeDlCheck' url) + +-- Check if youtube-dl can find media in an url. +-- +-- While this does not download anything, it checks youtubeDlAllowed +-- for symmetry with youtubeDl; the check should not succeed if the +-- download won't succeed. +youtubeDlCheck :: URLString -> Annex (Either String Bool) +youtubeDlCheck url = ifM youtubeDlAllowed + ( withUrlOptions $ youtubeDlCheck' url + , return $ Left youtubeDlNotAllowedMessage + ) + +youtubeDlCheck' :: URLString -> UrlOptions -> Annex (Either String Bool) +youtubeDlCheck' url uo + | supportedScheme uo url = catchMsgIO $ htmlOnly url False $ do + opts <- youtubeDlOpts [ Param url, Param "--simulate" ] + cmd <- youtubeDlCommand + liftIO $ snd <$> processTranscript cmd (toCommand opts) Nothing + | otherwise = return (Right False) + +-- Ask youtube-dl for the filename of media in an url. +-- +-- (This is not always identical to the filename it uses when downloading.) +youtubeDlFileName :: URLString -> Annex (Either String FilePath) +youtubeDlFileName url = withUrlOptions go + where + go uo + | supportedScheme uo url = flip catchIO (pure . Left . show) $ + htmlOnly url nomedia (youtubeDlFileNameHtmlOnly' url uo) + | otherwise = return nomedia + nomedia = Left "no media in url" + +-- Does not check if the url contains htmlOnly; use when that's already +-- been verified. +youtubeDlFileNameHtmlOnly :: URLString -> Annex (Either String FilePath) +youtubeDlFileNameHtmlOnly = withUrlOptions . youtubeDlFileNameHtmlOnly' + +youtubeDlFileNameHtmlOnly' :: URLString -> UrlOptions -> Annex (Either String FilePath) +youtubeDlFileNameHtmlOnly' url uo + | supportedScheme uo url = flip catchIO (pure . Left . show) go + | otherwise = return nomedia + where + go = do + -- Sometimes youtube-dl will fail with an ugly backtrace + -- (eg, http://bugs.debian.org/874321) + -- so catch stderr as well as stdout to avoid the user + -- seeing it. --no-warnings avoids warning messages that + -- are output to stdout. + opts <- youtubeDlOpts + [ Param url + , Param "--get-filename" + , Param "--no-warnings" + , Param "--no-playlist" + ] + cmd <- youtubeDlCommand + let p = (proc cmd (toCommand opts)) + { std_out = CreatePipe + , std_err = CreatePipe + } + liftIO $ withCreateProcess p waitproc + + waitproc Nothing (Just o) (Just e) pid = do + errt <- async $ discardstderr pid e + output <- hGetContentsStrict o + ok <- liftIO $ checkSuccessProcess pid + wait errt + return $ case (ok, lines output) of + (True, (f:_)) | not (null f) -> Right f + _ -> nomedia + waitproc _ _ _ _ = error "internal" + + discardstderr pid e = hGetLineUntilExitOrEOF pid e >>= \case + Nothing -> return () + Just _ -> discardstderr pid e + + nomedia = Left "no media in url" + +youtubeDlOpts :: [CommandParam] -> Annex [CommandParam] +youtubeDlOpts addopts = do + opts <- map Param . annexYoutubeDlOptions <$> Annex.getGitConfig + return (opts ++ addopts) + +youtubeDlCommand :: Annex String +youtubeDlCommand = annexYoutubeDlCommand <$> Annex.getGitConfig >>= \case + Just c -> pure c + Nothing -> ifM (liftIO $ inSearchPath "yt-dlp") + ( return "yt-dlp" + , return "youtube-dl" + ) + +supportedScheme :: UrlOptions -> URLString -> Bool +supportedScheme uo url = case parseURIRelaxed url of + Nothing -> False + Just u -> case uriScheme u of + -- avoid ugly message from youtube-dl about not supporting file: + "file:" -> False + -- ftp indexes may look like html pages, and there's no point + -- involving youtube-dl in a ftp download + "ftp:" -> False + _ -> allowedScheme uo u + +progressTemplate :: String +progressTemplate = "ANNEX %(progress.downloaded_bytes)i %(progress.total_bytes_estimate)i %(progress.total_bytes)i ANNEX" + +{- The progressTemplate makes output look like "ANNEX 10 100 NA ANNEX" or + - "ANNEX 10 NA 100 ANNEX" depending on whether the total bytes are estimated + - or known. That makes parsing much easier (and less fragile) than parsing + - the usual progress output. + -} +parseYtdlpProgress :: ProgressParser +parseYtdlpProgress = go [] . reverse . progresschunks + where + delim = '\r' + + progresschunks = splitc delim + + go remainder [] = (Nothing, Nothing, remainder) + go remainder (x:xs) = case splitc ' ' x of + ("ANNEX":downloaded_bytes_s:total_bytes_estimate_s:total_bytes_s:"ANNEX":[]) -> + case (readMaybe downloaded_bytes_s, readMaybe total_bytes_estimate_s, readMaybe total_bytes_s) of + (Just downloaded_bytes, Nothing, Just total_bytes) -> + ( Just (BytesProcessed downloaded_bytes) + , Just (TotalSize total_bytes) + , remainder + ) + (Just downloaded_bytes, Just total_bytes_estimate, _) -> + ( Just (BytesProcessed downloaded_bytes) + , Just (TotalSize total_bytes_estimate) + , remainder + ) + _ -> go (remainder++x) xs + _ -> go (remainder++x) xs + +{- youtube-dl is deprecated, parsing its progress was attempted before but + - was buggy and is no longer done. -} +parseYoutubeDlProgress :: ProgressParser +parseYoutubeDlProgress _ = (Nothing, Nothing, "") + +{- List the items that yt-dlp can download from an url. + - + - Note that this does not check youtubeDlAllowed because it does not + - download content. + -} +youtubePlaylist :: URLString -> Annex (Either String [YoutubePlaylistItem]) +youtubePlaylist url = do + cmd <- youtubeDlCommand + if cmd == "yt-dlp" + then liftIO $ youtubePlaylist' url cmd + else return $ Left $ "Scraping needs yt-dlp, but git-annex has been configured to use " ++ cmd + +youtubePlaylist' :: URLString -> String -> IO (Either String [YoutubePlaylistItem]) +youtubePlaylist' url cmd = withTmpFile "yt-dlp" $ \tmpfile h -> do + hClose h + (outerr, ok) <- processTranscript cmd + [ "--simulate" + , "--flat-playlist" + -- Skip live videos in progress + , "--match-filter", "!is_live" + , "--print-to-file" + -- Write json with selected fields. + , "%(.{" ++ intercalate "," youtubePlaylistItemFields ++ "})j" + , tmpfile + , url + ] + Nothing + if ok + then flip catchIO (pure . Left . show) $ do + v <- map Aeson.eitherDecodeStrict . B8.lines + <$> B.readFile tmpfile + return $ case partitionEithers v of + ((parserr:_), _) -> + Left $ "yt-dlp json parse error: " ++ parserr + ([], r) -> Right r + else return $ Left $ if null outerr + then "yt-dlp failed" + else "yt-dlp failed: " ++ outerr + +-- There are other fields that yt-dlp can extract, but these are similar to +-- the information from an RSS feed. +youtubePlaylistItemFields :: [String] +youtubePlaylistItemFields = + [ "playlist_title" + , "playlist_uploader" + , "title" + , "description" + , "license" + , "url" + , "timestamp" + ] + +-- Parse JSON generated by yt-dlp for playlist. Note that any field +-- may be omitted when that information is not supported for a given website. +data YoutubePlaylistItem = YoutubePlaylistItem + { youtube_playlist_title :: Maybe String + , youtube_playlist_uploader :: Maybe String + , youtube_title :: Maybe String + , youtube_description :: Maybe String + , youtube_license :: Maybe String + , youtube_url :: Maybe String + , youtube_timestamp :: Maybe Integer -- ^ unix timestamp + } deriving (Generic, Show) + +instance Aeson.FromJSON YoutubePlaylistItem + where + parseJSON = Aeson.genericParseJSON Aeson.defaultOptions + { Aeson.fieldLabelModifier = drop (length "youtube_") } + diff --git a/Assistant.hs b/Assistant.hs new file mode 100644 index 0000000000..2e50a79ff1 --- /dev/null +++ b/Assistant.hs @@ -0,0 +1,194 @@ +{- git-annex assistant daemon + - + - Copyright 2012-2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Assistant where + +import qualified Annex +import Assistant.Common +import Assistant.DaemonStatus +import Assistant.NamedThread +import Assistant.Types.ThreadedMonad +import Assistant.Threads.DaemonStatus +import Assistant.Threads.Watcher +import Assistant.Threads.Committer +import Assistant.Threads.Pusher +import Assistant.Threads.Exporter +import Assistant.Threads.Merger +import Assistant.Threads.TransferWatcher +import Assistant.Threads.Transferrer +import Assistant.Threads.RemoteControl +import Assistant.Threads.SanityChecker +import Assistant.Threads.Cronner +import Assistant.Threads.ProblemFixer +#ifndef mingw32_HOST_OS +import Assistant.Threads.MountWatcher +#endif +import Assistant.Threads.NetWatcher +import Assistant.Threads.Upgrader +import Assistant.Threads.UpgradeWatcher +import Assistant.Threads.TransferScanner +import Assistant.Threads.TransferPoller +import Assistant.Threads.ConfigMonitor +import Assistant.Threads.Glacier +#ifdef WITH_WEBAPP +import Assistant.WebApp +import Assistant.Threads.WebApp +#ifdef WITH_PAIRING +import Assistant.Threads.PairListener +#endif +#else +import Assistant.Types.UrlRenderer +#endif +import qualified Utility.Daemon +import Utility.ThreadScheduler +import Utility.HumanTime +import Annex.Perms +import Annex.BranchState +import Utility.LogFile +import Annex.Path +#ifdef mingw32_HOST_OS +import Utility.Env +import System.Environment (getArgs) +#endif +import qualified Utility.Debug as Debug + +import Network.Socket (HostName, PortNumber) + +stopDaemon :: Annex () +stopDaemon = liftIO . Utility.Daemon.stopDaemon . fromRawFilePath + =<< fromRepo gitAnnexPidFile + +{- Starts the daemon. If the daemon is run in the foreground, once it's + - running, can start the browser. + - + - startbrowser is passed the url and html shim file, as well as the original + - stdout and stderr descriptors. -} +startDaemon :: Bool -> Bool -> Maybe Duration -> Maybe String -> Maybe HostName -> Maybe PortNumber -> Maybe (Maybe Handle -> Maybe Handle -> String -> FilePath -> IO ()) -> Annex () +startDaemon assistant foreground startdelay cannotrun listenhost listenport startbrowser = do + Annex.changeState $ \s -> s { Annex.daemon = True } + enableInteractiveBranchAccess + pidfile <- fromRepo gitAnnexPidFile + logfile <- fromRepo gitAnnexDaemonLogFile + liftIO $ Debug.debug "Assistant" $ "logging to " ++ fromRawFilePath logfile + createAnnexDirectory (parentDir pidfile) +#ifndef mingw32_HOST_OS + createAnnexDirectory (parentDir logfile) + let logfd = handleToFd =<< openLog (fromRawFilePath logfile) + if foreground + then do + origout <- liftIO $ catchMaybeIO $ + fdToHandle =<< dup stdOutput + origerr <- liftIO $ catchMaybeIO $ + fdToHandle =<< dup stdError + let undaemonize = Utility.Daemon.foreground logfd (Just (fromRawFilePath pidfile)) + start undaemonize $ + case startbrowser of + Nothing -> Nothing + Just a -> Just $ a origout origerr + else do + git_annex <- liftIO programPath + ps <- gitAnnexDaemonizeParams + start (Utility.Daemon.daemonize git_annex ps logfd (Just (fromRawFilePath pidfile)) False) Nothing +#else + -- Windows doesn't daemonize, but does redirect output to the + -- log file. The only way to do so is to restart the program. + when (foreground || not foreground) $ do + let flag = "GIT_ANNEX_OUTPUT_REDIR" + createAnnexDirectory (parentDir logfile) + ifM (liftIO $ isNothing <$> getEnv flag) + ( liftIO $ withNullHandle $ \nullh -> do + loghandle <- openLog (fromRawFilePath logfile) + e <- getEnvironment + cmd <- programPath + ps <- getArgs + let p = (proc cmd ps) + { env = Just (addEntry flag "1" e) + , std_in = UseHandle nullh + , std_out = UseHandle loghandle + , std_err = UseHandle loghandle + } + exitcode <- withCreateProcess p $ \_ _ _ pid -> + waitForProcess pid + exitWith exitcode + , start (Utility.Daemon.foreground (Just (fromRawFilePath pidfile))) $ + case startbrowser of + Nothing -> Nothing + Just a -> Just $ a Nothing Nothing + ) +#endif + where + start daemonize webappwaiter = withThreadState $ \st -> do + checkCanWatch + dstatus <- startDaemonStatus + logfile <- fromRepo gitAnnexDaemonLogFile + liftIO $ Debug.debug "Assistant" $ "logging to " ++ fromRawFilePath logfile + liftIO $ daemonize $ + flip runAssistant (go webappwaiter) + =<< newAssistantData st dstatus + +#ifdef WITH_WEBAPP + go webappwaiter = do + d <- getAssistant id +#else + go _webappwaiter = do +#endif + urlrenderer <- liftIO newUrlRenderer +#ifdef WITH_WEBAPP + let webappthread = [ assist $ webAppThread d urlrenderer False cannotrun Nothing listenhost listenport webappwaiter ] +#else + let webappthread = [] +#endif + let threads = if isJust cannotrun + then webappthread + else webappthread ++ + [ watch commitThread +#ifdef WITH_WEBAPP +#ifdef WITH_PAIRING + , assist $ pairListenerThread urlrenderer +#endif +#endif + , assist pushThread + , assist pushRetryThread + , assist exportThread + , assist exportRetryThread + , assist mergeThread + , assist transferWatcherThread + , assist transferPollerThread + , assist transfererThread + , assist remoteControlThread + , assist daemonStatusThread + , assist $ sanityCheckerDailyThread urlrenderer + , assist sanityCheckerHourlyThread + , assist $ problemFixerThread urlrenderer +#ifndef mingw32_HOST_OS + , assist $ mountWatcherThread urlrenderer +#endif + , assist netWatcherThread + , assist $ upgraderThread urlrenderer + , assist $ upgradeWatcherThread urlrenderer + , assist netWatcherFallbackThread + , assist $ transferScannerThread urlrenderer + , assist $ cronnerThread urlrenderer + , assist configMonitorThread + , assist glacierThread + , watch watchThread + -- must come last so that all threads that wait + -- on it have already started waiting + , watch $ sanityCheckerStartupThread startdelay + ] + + mapM_ (startthread urlrenderer) threads + liftIO waitForTermination + + watch a = (True, a) + assist a = (False, a) + startthread urlrenderer (watcher, t) + | watcher || assistant = startNamedThread urlrenderer t + | otherwise = noop diff --git a/Assistant/Alert.hs b/Assistant/Alert.hs new file mode 100644 index 0000000000..ead791dcc9 --- /dev/null +++ b/Assistant/Alert.hs @@ -0,0 +1,460 @@ +{- git-annex assistant alerts + - + - Copyright 2012-2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings, CPP, BangPatterns #-} + +module Assistant.Alert where + +import Annex.Common +import Assistant.Types.Alert +import Assistant.Alert.Utility +import qualified Remote +import Utility.Tense +import Types.Transfer +import Types.Distribution +import Git.Types (RemoteName) + +import Data.String +import qualified Data.Text as T +import qualified Control.Exception as E + +#ifdef WITH_WEBAPP +import Assistant.DaemonStatus +import Assistant.WebApp.Types +import Assistant.WebApp (renderUrl) +#endif +import Assistant.Monad +import Assistant.Types.UrlRenderer + +{- Makes a button for an alert that opens a Route. + - + - If autoclose is set, the button will close the alert it's + - attached to when clicked. -} +#ifdef WITH_WEBAPP +mkAlertButton :: Bool -> T.Text -> UrlRenderer -> Route WebApp -> Assistant AlertButton +mkAlertButton autoclose label urlrenderer route = do + close <- asIO1 removeAlert + url <- liftIO $ renderUrl urlrenderer route [] + return $ AlertButton + { buttonLabel = label + , buttonUrl = url + , buttonAction = if autoclose then Just close else Nothing + , buttonPrimary = True + } +#endif + +renderData :: Alert -> TenseText +renderData = tenseWords . alertData + +baseActivityAlert :: Alert +baseActivityAlert = Alert + { alertClass = Activity + , alertHeader = Nothing + , alertMessageRender = renderData + , alertData = [] + , alertCounter = 0 + , alertBlockDisplay = False + , alertClosable = False + , alertPriority = Medium + , alertIcon = Just ActivityIcon + , alertCombiner = Nothing + , alertName = Nothing + , alertButtons = [] + } + +warningAlert :: String -> String -> Alert +warningAlert name msg = Alert + { alertClass = Warning + , alertHeader = Just $ tenseWords ["warning"] + , alertMessageRender = renderData + , alertData = [UnTensed $ T.pack msg] + , alertCounter = 0 + , alertBlockDisplay = True + , alertClosable = True + , alertPriority = High + , alertIcon = Just ErrorIcon + , alertCombiner = Just $ dataCombiner $ \_old new -> new + , alertName = Just $ WarningAlert name + , alertButtons = [] + } + +errorAlert :: String -> [AlertButton] -> Alert +errorAlert msg buttons = Alert + { alertClass = Error + , alertHeader = Nothing + , alertMessageRender = renderData + , alertData = [UnTensed $ T.pack msg] + , alertCounter = 0 + , alertBlockDisplay = True + , alertClosable = True + , alertPriority = Pinned + , alertIcon = Just ErrorIcon + , alertCombiner = Nothing + , alertName = Nothing + , alertButtons = buttons + } + +activityAlert :: Maybe TenseText -> [TenseChunk] -> Alert +activityAlert header dat = baseActivityAlert + { alertHeader = header + , alertData = dat + } + +startupScanAlert :: Alert +startupScanAlert = activityAlert Nothing + [Tensed "Performing" "Performed", "startup scan"] + +{- Displayed when a shutdown is occurring, so will be seen after shutdown + - has happened. -} +shutdownAlert :: Alert +shutdownAlert = warningAlert "shutdown" "git-annex has been shut down" + +commitAlert :: Alert +commitAlert = activityAlert Nothing + [Tensed "Committing" "Committed", "changes to git"] + +showRemotes :: [RemoteName] -> TenseChunk +showRemotes = UnTensed . T.intercalate ", " . map T.pack + +syncAlert :: [Remote] -> Alert +syncAlert = syncAlert' . map Remote.name + +syncAlert' :: [RemoteName] -> Alert +syncAlert' rs = baseActivityAlert + { alertName = Just SyncAlert + , alertHeader = Just $ tenseWords + [Tensed "Syncing" "Synced", "with", showRemotes rs] + , alertPriority = Low + , alertIcon = Just SyncIcon + } + +syncResultAlert :: [Remote] -> [Remote] -> Alert +syncResultAlert succeeded failed = syncResultAlert' + (map Remote.name succeeded) + (map Remote.name failed) + +syncResultAlert' :: [RemoteName] -> [RemoteName] -> Alert +syncResultAlert' succeeded failed = makeAlertFiller (not $ null succeeded) $ + baseActivityAlert + { alertName = Just SyncAlert + , alertHeader = Just $ tenseWords msg + } + where + msg + | null succeeded = ["Failed to sync with", showRemotes failed] + | null failed = ["Synced with", showRemotes succeeded] + | otherwise = + [ "Synced with", showRemotes succeeded + , "but not with", showRemotes failed + ] + +sanityCheckAlert :: Alert +sanityCheckAlert = activityAlert + (Just $ tenseWords [Tensed "Running" "Ran", "daily sanity check"]) + ["to make sure everything is ok."] + +sanityCheckFixAlert :: String -> Alert +sanityCheckFixAlert msg = Alert + { alertClass = Warning + , alertHeader = Just $ tenseWords ["Fixed a problem"] + , alertMessageRender = render + , alertData = [UnTensed $ T.pack msg] + , alertCounter = 0 + , alertBlockDisplay = True + , alertPriority = High + , alertClosable = True + , alertIcon = Just ErrorIcon + , alertName = Just SanityCheckFixAlert + , alertCombiner = Just $ dataCombiner (++) + , alertButtons = [] + } + where + render alert = tenseWords $ alerthead : alertData alert ++ [alertfoot] + alerthead = "The daily sanity check found and fixed a problem:" + alertfoot = "If these problems persist, consider filing a bug report." + +fsckingAlert :: AlertButton -> Maybe Remote -> Alert +fsckingAlert button mr = baseActivityAlert + { alertData = case mr of + Nothing -> [ UnTensed $ T.pack $ "Consistency check in progress" ] + Just r -> [ UnTensed $ T.pack $ "Consistency check of " ++ Remote.name r ++ " in progress"] + , alertButtons = [button] + } + +showFscking :: UrlRenderer -> Maybe Remote -> IO (Either E.SomeException a) -> Assistant a +showFscking urlrenderer mr a = do +#ifdef WITH_WEBAPP + button <- mkAlertButton False (T.pack "Configure") urlrenderer ConfigFsckR + r <- alertDuring (fsckingAlert button mr) $ + liftIO a +#else + r <- liftIO a +#endif + either (liftIO . E.throwIO) return r + +notFsckedNudge :: UrlRenderer -> Maybe Remote -> Assistant () +#ifdef WITH_WEBAPP +notFsckedNudge urlrenderer mr = do + button <- mkAlertButton True (T.pack "Configure") urlrenderer ConfigFsckR + void $ addAlert (notFsckedAlert mr button) +#else +notFsckedNudge _ _ = noop +#endif + +notFsckedAlert :: Maybe Remote -> AlertButton -> Alert +notFsckedAlert mr button = Alert + { alertHeader = Just $ fromString $ concat + [ "You should enable consistency checking to protect your data" + , maybe "" (\r -> " in " ++ Remote.name r) mr + , "." + ] + , alertIcon = Just InfoIcon + , alertPriority = High + , alertButtons = [button] + , alertClosable = True + , alertClass = Message + , alertMessageRender = renderData + , alertCounter = 0 + , alertBlockDisplay = True + , alertName = Just NotFsckedAlert + , alertCombiner = Just $ dataCombiner $ \_old new -> new + , alertData = [] + } + +baseUpgradeAlert :: [AlertButton] -> TenseText -> Alert +baseUpgradeAlert buttons message = Alert + { alertHeader = Just message + , alertIcon = Just UpgradeIcon + , alertPriority = High + , alertButtons = buttons + , alertClosable = True + , alertClass = Message + , alertMessageRender = renderData + , alertCounter = 0 + , alertBlockDisplay = True + , alertName = Just UpgradeAlert + , alertCombiner = Just $ fullCombiner $ \new _old -> new + , alertData = [] + } + +canUpgradeAlert :: AlertPriority -> GitAnnexVersion -> AlertButton -> Alert +canUpgradeAlert priority version button = + (baseUpgradeAlert [button] $ fromString msg) + { alertPriority = priority + , alertData = [fromString $ " (version " ++ version ++ ")"] + } + where + msg = if priority >= High + then "An important upgrade of git-annex is available!" + else "An upgrade of git-annex is available." + +upgradeReadyAlert :: AlertButton -> Alert +upgradeReadyAlert button = baseUpgradeAlert [button] $ + fromString "A new version of git-annex has been installed." + +upgradingAlert :: Alert +upgradingAlert = activityAlert Nothing [ fromString "Upgrading git-annex" ] + +upgradeFinishedAlert :: Maybe AlertButton -> GitAnnexVersion -> Alert +upgradeFinishedAlert button version = + baseUpgradeAlert (maybeToList button) $ fromString $ + "Finished upgrading git-annex to version " ++ version + +upgradeFailedAlert :: String -> Alert +upgradeFailedAlert msg = (errorAlert msg []) + { alertHeader = Just $ fromString "Upgrade failed." } + +unusedFilesAlert :: [AlertButton] -> String -> Alert +unusedFilesAlert buttons message = Alert + { alertHeader = Just $ fromString $ unwords + [ "Old and deleted files are piling up --" + , message + ] + , alertIcon = Just InfoIcon + , alertPriority = High + , alertButtons = buttons + , alertClosable = True + , alertClass = Message + , alertMessageRender = renderData + , alertCounter = 0 + , alertBlockDisplay = True + , alertName = Just UnusedFilesAlert + , alertCombiner = Just $ fullCombiner $ \new _old -> new + , alertData = [] + } + +brokenRepositoryAlert :: [AlertButton] -> Alert +brokenRepositoryAlert = errorAlert "Serious problems have been detected with your repository. This needs your immediate attention!" + +repairingAlert :: String -> Alert +repairingAlert repodesc = activityAlert Nothing + [ Tensed "Attempting to repair" "Repaired" + , UnTensed $ T.pack repodesc + ] + +pairingAlert :: AlertButton -> Alert +pairingAlert button = baseActivityAlert + { alertData = [ UnTensed "Pairing in progress" ] + , alertPriority = High + , alertButtons = [button] + } + +pairRequestReceivedAlert :: String -> AlertButton -> Alert +pairRequestReceivedAlert who button = Alert + { alertClass = Message + , alertHeader = Nothing + , alertMessageRender = renderData + , alertData = [UnTensed $ T.pack $ who ++ " is sending a pair request."] + , alertCounter = 0 + , alertBlockDisplay = False + , alertPriority = High + , alertClosable = True + , alertIcon = Just InfoIcon + , alertName = Just $ PairAlert who + , alertCombiner = Just $ dataCombiner $ \_old new -> new + , alertButtons = [button] + } + +pairRequestAcknowledgedAlert :: String -> Maybe AlertButton -> Alert +pairRequestAcknowledgedAlert who button = baseActivityAlert + { alertData = ["Pairing with", UnTensed (T.pack who), Tensed "in progress" "complete"] + , alertPriority = High + , alertName = Just $ PairAlert who + , alertCombiner = Just $ dataCombiner $ \_old new -> new + , alertButtons = maybeToList button + } + +connectionNeededAlert :: AlertButton -> Alert +connectionNeededAlert button = Alert + { alertHeader = Just "Share with friends, and keep your devices in sync across the cloud." + , alertIcon = Just ConnectionIcon + , alertPriority = High + , alertButtons = [button] + , alertClosable = True + , alertClass = Message + , alertMessageRender = renderData + , alertCounter = 0 + , alertBlockDisplay = True + , alertName = Just ConnectionNeededAlert + , alertCombiner = Just $ dataCombiner $ \_old new -> new + , alertData = [] + } + +cloudRepoNeededAlert :: Maybe String -> AlertButton -> Alert +cloudRepoNeededAlert friendname button = Alert + { alertHeader = Just $ fromString $ unwords + [ "Unable to download files from" + , (fromMaybe "your other devices" friendname) ++ "." + ] + , alertIcon = Just ErrorIcon + , alertPriority = High + , alertButtons = [button] + , alertClosable = True + , alertClass = Message + , alertMessageRender = renderData + , alertCounter = 0 + , alertBlockDisplay = True + , alertName = Just $ CloudRepoNeededAlert + , alertCombiner = Just $ dataCombiner $ \_old new -> new + , alertData = [] + } + +remoteRemovalAlert :: String -> AlertButton -> Alert +remoteRemovalAlert desc button = Alert + { alertHeader = Just $ fromString $ + "The repository \"" ++ desc ++ + "\" has been emptied, and can now be removed." + , alertIcon = Just InfoIcon + , alertPriority = High + , alertButtons = [button] + , alertClosable = True + , alertClass = Message + , alertMessageRender = renderData + , alertCounter = 0 + , alertBlockDisplay = True + , alertName = Just $ RemoteRemovalAlert desc + , alertCombiner = Just $ dataCombiner $ \_old new -> new + , alertData = [] + } + +{- Show a message that relates to a list of files. + - + - The most recent several files are shown, and a count of any others. -} +fileAlert :: TenseChunk -> [FilePath] -> Alert +fileAlert msg files = (activityAlert Nothing shortfiles) + { alertName = Just $ FileAlert msg + , alertMessageRender = renderer + , alertCounter = counter + , alertCombiner = Just $ fullCombiner combiner + } + where + maxfilesshown = 10 + + (!somefiles, !counter) = splitcounter (dedupadjacent files) + !shortfiles = map (fromString . shortFile . takeFileName) somefiles + + renderer alert = tenseWords $ msg : alertData alert ++ showcounter + where + showcounter = case alertCounter alert of + 0 -> [] + _ -> [fromString $ "and " ++ show (alertCounter alert) ++ " other files"] + + dedupadjacent (x:y:rest) + | x == y = dedupadjacent (y:rest) + | otherwise = x : dedupadjacent (y:rest) + dedupadjacent (x:[]) = [x] + dedupadjacent [] = [] + + {- Note that this ensures the counter is never 1; no need to say + - "1 file" when the filename could be shown. -} + splitcounter l + | length l <= maxfilesshown = (l, 0) + | otherwise = + let (keep, rest) = splitAt (maxfilesshown - 1) l + in (keep, length rest) + + combiner new old = + let (!fs, n) = splitcounter $ + dedupadjacent $ alertData new ++ alertData old + !cnt = n + alertCounter new + alertCounter old + in old + { alertData = fs + , alertCounter = cnt + } + +addFileAlert :: [FilePath] -> Alert +addFileAlert = fileAlert (Tensed "Adding" "Added") + +{- This is only used as a success alert after a transfer, not during it. -} +transferFileAlert :: Direction -> Bool -> FilePath -> Alert +transferFileAlert direction True file + | direction == Upload = fileAlert "Uploaded" [file] + | otherwise = fileAlert "Downloaded" [file] +transferFileAlert direction False file + | direction == Upload = fileAlert "Upload failed" [file] + | otherwise = fileAlert "Download failed" [file] + +dataCombiner :: ([TenseChunk] -> [TenseChunk] -> [TenseChunk]) -> AlertCombiner +dataCombiner combiner = fullCombiner $ + \new old -> old { alertData = alertData new `combiner` alertData old } + +fullCombiner :: (Alert -> Alert -> Alert) -> AlertCombiner +fullCombiner combiner new old + | alertClass new /= alertClass old = Nothing + | alertName new == alertName old = + Just $! new `combiner` old + | otherwise = Nothing + +shortFile :: FilePath -> String +shortFile f + | len < maxlen = f + | otherwise = take half f ++ ".." ++ drop (len - half) f + where + len = length f + maxlen = 20 + half = (maxlen - 2) `div` 2 + diff --git a/Assistant/Alert/Utility.hs b/Assistant/Alert/Utility.hs new file mode 100644 index 0000000000..8016d620a4 --- /dev/null +++ b/Assistant/Alert/Utility.hs @@ -0,0 +1,129 @@ +{- git-annex assistant alert utilities + - + - Copyright 2012, 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Alert.Utility where + +import Annex.Common +import Assistant.Types.Alert +import Utility.Tense + +import qualified Data.Text as T +import Data.Text (Text) +import qualified Data.Map.Strict as M + +{- This is as many alerts as it makes sense to display at a time. + - A display might be smaller, or larger, the point is to not overwhelm the + - user with a ton of alerts. -} +displayAlerts :: Int +displayAlerts = 6 + +{- This is not a hard maximum, but there's no point in keeping a great + - many filler alerts in an AlertMap, so when there's more than this many, + - they start being pruned, down toward displayAlerts. -} +maxAlerts :: Int +maxAlerts = displayAlerts * 2 + +type AlertPair = (AlertId, Alert) + +{- The desired order is the reverse of: + - + - - Pinned alerts + - - High priority alerts, newest first + - - Medium priority Activity, newest first (mostly used for Activity) + - - Low priority alerts, newest first + - - Filler priority alerts, newest first + - - Ties are broken by the AlertClass, with Errors etc coming first. + -} +compareAlertPairs :: AlertPair -> AlertPair -> Ordering +compareAlertPairs + (aid, Alert { alertClass = aclass, alertPriority = aprio }) + (bid, Alert { alertClass = bclass, alertPriority = bprio }) + = compare aprio bprio + `mappend` compare aid bid + `mappend` compare aclass bclass + +sortAlertPairs :: [AlertPair] -> [AlertPair] +sortAlertPairs = sortBy compareAlertPairs + +{- Renders an alert's header for display, if it has one. -} +renderAlertHeader :: Alert -> Maybe Text +renderAlertHeader alert = renderTense (alertTense alert) <$> alertHeader alert + +{- Renders an alert's message for display. -} +renderAlertMessage :: Alert -> Text +renderAlertMessage alert = renderTense (alertTense alert) $ + (alertMessageRender alert) alert + +showAlert :: Alert -> String +showAlert alert = T.unpack $ T.unwords $ catMaybes + [ renderAlertHeader alert + , Just $ renderAlertMessage alert + ] + +alertTense :: Alert -> Tense +alertTense alert + | alertClass alert == Activity = Present + | otherwise = Past + +{- Checks if two alerts display the same. -} +effectivelySameAlert :: Alert -> Alert -> Bool +effectivelySameAlert x y = all id + [ alertClass x == alertClass y + , alertHeader x == alertHeader y + , alertData x == alertData y + , alertBlockDisplay x == alertBlockDisplay y + , alertClosable x == alertClosable y + , alertPriority x == alertPriority y + ] + +makeAlertFiller :: Bool -> Alert -> Alert +makeAlertFiller success alert + | isFiller alert = alert + | otherwise = alert + { alertClass = if c == Activity then c' else c + , alertPriority = Filler + , alertClosable = True + , alertButtons = [] + , alertIcon = Just $ if success then SuccessIcon else ErrorIcon + } + where + c = alertClass alert + c' + | success = Success + | otherwise = Error + +isFiller :: Alert -> Bool +isFiller alert = alertPriority alert == Filler + +{- Updates the Alertmap, adding or updating an alert. + - + - Any old filler that looks the same as the alert is removed. + - + - Or, if the alert has an alertCombiner that combines it with + - an old alert, the old alert is replaced with the result, and the + - alert is removed. + - + - Old filler alerts are pruned once maxAlerts is reached. + -} +mergeAlert :: AlertId -> Alert -> AlertMap -> AlertMap +mergeAlert i al m = maybe updatePrune updateCombine (alertCombiner al) + where + pruneSame k al' = k == i || not (effectivelySameAlert al al') + pruneBloat m' + | bloat > 0 = M.fromList $ pruneold $ M.toList m' + | otherwise = m' + where + bloat = M.size m' - maxAlerts + pruneold l = + let (f, rest) = partition (\(_, a) -> isFiller a) l + in drop bloat f ++ rest + updatePrune = pruneBloat $ M.filterWithKey pruneSame $ M.insert i al m + updateCombine combiner = + let combined = M.mapMaybe (combiner al) m + in if M.null combined + then updatePrune + else M.delete i $ M.union combined m diff --git a/Assistant/BranchChange.hs b/Assistant/BranchChange.hs new file mode 100644 index 0000000000..b78e13a015 --- /dev/null +++ b/Assistant/BranchChange.hs @@ -0,0 +1,19 @@ +{- git-annex assistant git-annex branch change tracking + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.BranchChange where + +import Assistant.Common +import Assistant.Types.BranchChange + +import Control.Concurrent.MSampleVar + +branchChanged :: Assistant () +branchChanged = flip writeSV () <<~ (fromBranchChangeHandle . branchChangeHandle) + +waitBranchChange :: Assistant () +waitBranchChange = readSV <<~ (fromBranchChangeHandle . branchChangeHandle) diff --git a/Assistant/Changes.hs b/Assistant/Changes.hs new file mode 100644 index 0000000000..4a20850fa0 --- /dev/null +++ b/Assistant/Changes.hs @@ -0,0 +1,47 @@ +{- git-annex assistant change tracking + - + - Copyright 2012-2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Changes where + +import Assistant.Common +import Assistant.Types.Changes +import Utility.TList + +import Data.Time.Clock +import Control.Concurrent.STM + +{- Handlers call this when they made a change that needs to get committed. -} +madeChange :: FilePath -> ChangeInfo -> Assistant (Maybe Change) +madeChange f t = Just <$> (Change <$> liftIO getCurrentTime <*> pure f <*> pure t) + +noChange :: Assistant (Maybe Change) +noChange = return Nothing + +{- Indicates an add needs to be done, but has not started yet. -} +pendingAddChange :: FilePath -> Assistant (Maybe Change) +pendingAddChange f = Just <$> (PendingAddChange <$> liftIO getCurrentTime <*> pure f) + +{- Gets all unhandled changes. + - Blocks until at least one change is made. -} +getChanges :: Assistant [Change] +getChanges = (atomically . getTList) <<~ changePool + +{- Gets all unhandled changes, without blocking. -} +getAnyChanges :: Assistant [Change] +getAnyChanges = (atomically . takeTList) <<~ changePool + +{- Puts unhandled changes back into the pool. + - Note: Original order is not preserved. -} +refillChanges :: [Change] -> Assistant () +refillChanges cs = (atomically . flip appendTList cs) <<~ changePool + +{- Records a change to the pool. -} +recordChange :: Change -> Assistant () +recordChange c = (atomically . flip snocTList c) <<~ changePool + +recordChanges :: [Change] -> Assistant () +recordChanges = refillChanges diff --git a/Assistant/Commits.hs b/Assistant/Commits.hs new file mode 100644 index 0000000000..09c3172ad4 --- /dev/null +++ b/Assistant/Commits.hs @@ -0,0 +1,32 @@ +{- git-annex assistant commit tracking + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Commits where + +import Assistant.Common +import Assistant.Types.Commits +import Utility.TList + +import Control.Concurrent.STM + +{- Gets all unhandled commits. + - Blocks until at least one commit is made. -} +getCommits :: Assistant [Commit] +getCommits = (atomically . getTList) <<~ commitChan + +{- Records a commit in the channel. -} +recordCommit :: Assistant () +recordCommit = (atomically . flip consTList Commit) <<~ commitChan + +{- Gets all unhandled export commits. + - Blocks until at least one export commit is made. -} +getExportCommits :: Assistant [Commit] +getExportCommits = (atomically . getTList) <<~ exportCommitChan + +{- Records an export commit in the channel. -} +recordExportCommit :: Assistant () +recordExportCommit = (atomically . flip consTList Commit) <<~ exportCommitChan diff --git a/Assistant/Common.hs b/Assistant/Common.hs new file mode 100644 index 0000000000..33532ec216 --- /dev/null +++ b/Assistant/Common.hs @@ -0,0 +1,14 @@ +{- Common infrastructure for the git-annex assistant. + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Common (module X) where + +import Annex.Common as X hiding (debug) +import Assistant.Monad as X +import Assistant.Types.DaemonStatus as X +import Assistant.Types.NamedThread as X +import Assistant.Types.Alert as X diff --git a/Assistant/CredPairCache.hs b/Assistant/CredPairCache.hs new file mode 100644 index 0000000000..eba3d2b779 --- /dev/null +++ b/Assistant/CredPairCache.hs @@ -0,0 +1,53 @@ +{- git-annex assistant CredPair cache. + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns #-} + +module Assistant.CredPairCache ( + cacheCred, + getCachedCred, + expireCachedCred, +) where + +import Assistant.Types.CredPairCache +import Types.Creds +import Assistant.Common +import Utility.ThreadScheduler + +import qualified Data.Map as M +import Control.Concurrent + +{- Caches a CredPair, but only for a limited time, after which it + - will expire. + - + - Note that repeatedly caching the same CredPair + - does not reset its expiry time. + -} +cacheCred :: CredPair -> Seconds -> Assistant () +cacheCred (login, password) expireafter = do + cache <- getAssistant credPairCache + liftIO $ do + changeStrict cache $ M.insert login password + void $ forkIO $ do + threadDelaySeconds expireafter + changeStrict cache $ M.delete login + +getCachedCred :: Login -> Assistant (Maybe Password) +getCachedCred login = do + cache <- getAssistant credPairCache + liftIO $ M.lookup login <$> readMVar cache + +expireCachedCred :: Login -> Assistant () +expireCachedCred login = do + cache <- getAssistant credPairCache + liftIO $ changeStrict cache $ M.delete login + +{- Update map strictly to avoid keeping references to old creds in memory. -} +changeStrict :: CredPairCache -> (M.Map Login Password -> M.Map Login Password) -> IO () +changeStrict cache a = modifyMVar_ cache $ \m -> do + let !m' = a m + return m' diff --git a/Assistant/DaemonStatus.hs b/Assistant/DaemonStatus.hs new file mode 100644 index 0000000000..68edd95c47 --- /dev/null +++ b/Assistant/DaemonStatus.hs @@ -0,0 +1,273 @@ +{- git-annex assistant daemon status + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns #-} + +module Assistant.DaemonStatus where + +import Assistant.Common +import Assistant.Alert.Utility +import Utility.Tmp +import Utility.NotificationBroadcaster +import Types.Availability +import Types.Transfer +import Logs.Transfer +import Logs.Trust +import Utility.TimeStamp +import qualified Remote +import qualified Types.Remote as Remote +import Config.DynamicConfig +import Annex.SpecialRemote.Config + +import Control.Concurrent.STM +import System.Posix.Types +import Data.Time.Clock.POSIX +import qualified Data.Map.Strict as M +import qualified Data.Set as S + +getDaemonStatus :: Assistant DaemonStatus +getDaemonStatus = (atomically . readTVar) <<~ daemonStatusHandle + +modifyDaemonStatus_ :: (DaemonStatus -> DaemonStatus) -> Assistant () +modifyDaemonStatus_ a = modifyDaemonStatus $ \s -> (a s, ()) + +modifyDaemonStatus :: (DaemonStatus -> (DaemonStatus, b)) -> Assistant b +modifyDaemonStatus a = do + dstatus <- getAssistant daemonStatusHandle + liftIO $ do + (s, b) <- atomically $ do + r@(!s, _) <- a <$> readTVar dstatus + writeTVar dstatus s + return r + sendNotification $ changeNotifier s + return b + +{- Returns a function that updates the lists of syncable remotes + - and other associated information. -} +calcSyncRemotes :: Annex (DaemonStatus -> DaemonStatus) +calcSyncRemotes = do + rs <- filterM (liftIO . getDynamicConfig . remoteAnnexSync . Remote.gitconfig) + =<< (concat . Remote.byCost <$> Remote.remoteList) + alive <- trustExclude DeadTrusted (map Remote.uuid rs) + let good r = Remote.uuid r `elem` alive + let syncable = filter good rs + contentremotes <- filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . Remote.gitconfig) $ + filter (\r -> Remote.uuid r /= NoUUID) syncable + let (exportremotes, nonexportremotes) = partition (exportTree . Remote.config) contentremotes + let isimport r = importTree (Remote.config r) || Remote.thirdPartyPopulated (Remote.remotetype r) + let dataremotes = filter (not . isimport) nonexportremotes + tocloud <- anyM iscloud contentremotes + + return $ \dstatus -> dstatus + { syncRemotes = syncable + , syncGitRemotes = filter Remote.gitSyncableRemote syncable + , syncDataRemotes = dataremotes + , exportRemotes = exportremotes + , downloadRemotes = contentremotes + , syncingToCloudRemote = tocloud + } + where + iscloud r + | Remote.readonly r = pure False + | otherwise = tryNonAsync (Remote.availability r) >>= return . \case + Right GloballyAvailable -> True + _ -> False + +{- Updates the syncRemotes list from the list of all remotes in Annex state. -} +updateSyncRemotes :: Assistant () +updateSyncRemotes = do + modifyDaemonStatus_ =<< liftAnnex calcSyncRemotes + status <- getDaemonStatus + liftIO $ sendNotification $ syncRemotesNotifier status + + when (syncingToCloudRemote status) $ + updateAlertMap $ + M.filter $ \alert -> + alertName alert /= Just CloudRepoNeededAlert + +changeCurrentlyConnected :: (S.Set UUID -> S.Set UUID) -> Assistant () +changeCurrentlyConnected sm = do + modifyDaemonStatus_ $ \ds -> ds + { currentlyConnectedRemotes = sm (currentlyConnectedRemotes ds) + } + v <- currentlyConnectedRemotes <$> getDaemonStatus + debug [show v] + liftIO . sendNotification =<< syncRemotesNotifier <$> getDaemonStatus + +updateScheduleLog :: Assistant () +updateScheduleLog = + liftIO . sendNotification =<< scheduleLogNotifier <$> getDaemonStatus + +{- Load any previous daemon status file, and store it in a MVar for this + - process to use as its DaemonStatus. Also gets current transfer status. -} +startDaemonStatus :: Annex DaemonStatusHandle +startDaemonStatus = do + file <- fromRepo gitAnnexDaemonStatusFile + status <- liftIO $ + flip catchDefaultIO (readDaemonStatusFile file) =<< newDaemonStatus + transfers <- M.fromList <$> getTransfers + addsync <- calcSyncRemotes + liftIO $ atomically $ newTVar $ addsync $ status + { scanComplete = False + , sanityCheckRunning = False + , currentTransfers = transfers + } + +{- Don't just dump out the structure, because it will change over time, + - and parts of it are not relevant. -} +writeDaemonStatusFile :: FilePath -> DaemonStatus -> IO () +writeDaemonStatusFile file status = + viaTmp writeFile file =<< serialized <$> getPOSIXTime + where + serialized now = unlines + [ "lastRunning:" ++ show now + , "scanComplete:" ++ show (scanComplete status) + , "sanityCheckRunning:" ++ show (sanityCheckRunning status) + , "lastSanityCheck:" ++ maybe "" show (lastSanityCheck status) + ] + +readDaemonStatusFile :: FilePath -> IO DaemonStatus +readDaemonStatusFile file = parse <$> newDaemonStatus <*> readFile file + where + parse status = foldr parseline status . lines + parseline line status + | key == "lastRunning" = parseval parsePOSIXTime $ \v -> + status { lastRunning = Just v } + | key == "scanComplete" = parseval readish $ \v -> + status { scanComplete = v } + | key == "sanityCheckRunning" = parseval readish $ \v -> + status { sanityCheckRunning = v } + | key == "lastSanityCheck" = parseval parsePOSIXTime $ \v -> + status { lastSanityCheck = Just v } + | otherwise = status -- unparsable line + where + (key, value) = separate (== ':') line + parseval parser a = maybe status a (parser value) + +{- Checks if a time stamp was made after the daemon was lastRunning. + - + - Some slop is built in; this really checks if the time stamp was made + - at least ten minutes after the daemon was lastRunning. This is to + - ensure the daemon shut down cleanly, and deal with minor clock skew. + - + - If the daemon has never ran before, this always returns False. + -} +afterLastDaemonRun :: EpochTime -> DaemonStatus -> Bool +afterLastDaemonRun timestamp status = maybe False (< t) (lastRunning status) + where + t = realToFrac (timestamp + slop) :: POSIXTime + slop = fromIntegral tenMinutes + +tenMinutes :: Int +tenMinutes = 10 * 60 + +{- Mutates the transfer map. Runs in STM so that the transfer map can + - be modified in the same transaction that modifies the transfer queue. + - Note that this does not send a notification of the change; that's left + - to the caller. -} +adjustTransfersSTM :: DaemonStatusHandle -> (TransferMap -> TransferMap) -> STM () +adjustTransfersSTM dstatus a = do + s <- readTVar dstatus + let !v = a (currentTransfers s) + writeTVar dstatus $ s { currentTransfers = v } + +{- Checks if a transfer is currently running. -} +checkRunningTransferSTM :: DaemonStatusHandle -> Transfer -> STM Bool +checkRunningTransferSTM dstatus t = M.member t . currentTransfers + <$> readTVar dstatus + +{- Alters a transfer's info, if the transfer is in the map. -} +alterTransferInfo :: Transfer -> (TransferInfo -> TransferInfo) -> Assistant () +alterTransferInfo t a = updateTransferInfo' $ M.adjust a t + +{- Updates a transfer's info. Adds the transfer to the map if necessary, + - or if already present, updates it while preserving the old transferTid, + - transferPaused, and bytesComplete values, which are not written to disk. -} +updateTransferInfo :: Transfer -> TransferInfo -> Assistant () +updateTransferInfo t info = updateTransferInfo' $ M.insertWith merge t info + where + merge new old = new + { transferTid = maybe (transferTid new) Just (transferTid old) + , transferPaused = transferPaused new || transferPaused old + , bytesComplete = maybe (bytesComplete new) Just (bytesComplete old) + } + +updateTransferInfo' :: (TransferMap -> TransferMap) -> Assistant () +updateTransferInfo' a = notifyTransfer `after` modifyDaemonStatus_ update + where + update s = s { currentTransfers = a (currentTransfers s) } + +{- Removes a transfer from the map, and returns its info. -} +removeTransfer :: Transfer -> Assistant (Maybe TransferInfo) +removeTransfer t = notifyTransfer `after` modifyDaemonStatus remove + where + remove s = + let (info, ts) = M.updateLookupWithKey + (\_k _v -> Nothing) + t (currentTransfers s) + in (s { currentTransfers = ts }, info) + +{- Send a notification when a transfer is changed. -} +notifyTransfer :: Assistant () +notifyTransfer = do + dstatus <- getAssistant daemonStatusHandle + liftIO $ sendNotification + =<< transferNotifier <$> atomically (readTVar dstatus) + +{- Send a notification when alerts are changed. -} +notifyAlert :: Assistant () +notifyAlert = do + dstatus <- getAssistant daemonStatusHandle + liftIO $ sendNotification + =<< alertNotifier <$> atomically (readTVar dstatus) + +{- Returns the alert's identifier, which can be used to remove it. -} +addAlert :: Alert -> Assistant AlertId +addAlert alert = notifyAlert `after` modifyDaemonStatus add + where + add s = (s { lastAlertId = i, alertMap = m }, i) + where + !i = nextAlertId $ lastAlertId s + !m = mergeAlert i alert (alertMap s) + +removeAlert :: AlertId -> Assistant () +removeAlert i = updateAlert i (const Nothing) + +updateAlert :: AlertId -> (Alert -> Maybe Alert) -> Assistant () +updateAlert i a = updateAlertMap $ \m -> M.update a i m + +updateAlertMap :: (AlertMap -> AlertMap) -> Assistant () +updateAlertMap a = notifyAlert `after` modifyDaemonStatus_ update + where + update s = + let !m = a (alertMap s) + in s { alertMap = m } + +{- Displays an alert while performing an activity that returns True on + - success. + - + - The alert is left visible afterwards, as filler. + - Old filler is pruned, to prevent the map growing too large. -} +alertWhile :: Alert -> Assistant Bool -> Assistant Bool +alertWhile alert a = alertWhile' alert $ do + r <- a + return (r, r) + +{- Like alertWhile, but allows the activity to return a value too. -} +alertWhile' :: Alert -> Assistant (Bool, a) -> Assistant a +alertWhile' alert a = do + let alert' = alert { alertClass = Activity } + i <- addAlert alert' + (ok, r) <- a + updateAlertMap $ mergeAlert i $ makeAlertFiller ok alert' + return r + +{- Displays an alert while performing an activity, then removes it. -} +alertDuring :: Alert -> Assistant a -> Assistant a +alertDuring alert a = do + i <- addAlert $ alert { alertClass = Activity } + removeAlert i `after` a diff --git a/Assistant/DeleteRemote.hs b/Assistant/DeleteRemote.hs new file mode 100644 index 0000000000..fa788f086f --- /dev/null +++ b/Assistant/DeleteRemote.hs @@ -0,0 +1,89 @@ +{- git-annex assistant remote deletion utilities + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Assistant.DeleteRemote where + +import Assistant.Common +import Assistant.Types.UrlRenderer +import Assistant.TransferQueue +import Types.Transfer +import Logs.Location +import Assistant.DaemonStatus +import qualified Remote +import Remote.List.Util +import qualified Git.Remote.Remove +import Logs.Trust +import qualified Annex + +#ifdef WITH_WEBAPP +import Assistant.WebApp.Types +import Assistant.Alert +import qualified Data.Text as T +#endif + +{- Removes a remote (but leave the repository as-is), and returns the old + - Remote data. -} +disableRemote :: UUID -> Assistant Remote +disableRemote uuid = do + remote <- fromMaybe (giveup "unknown remote") + <$> liftAnnex (Remote.remoteFromUUID uuid) + liftAnnex $ do + inRepo $ Git.Remote.Remove.remove (Remote.name remote) + remotesChanged + updateSyncRemotes + return remote + +{- Removes a remote, marking it dead .-} +removeRemote :: UUID -> Assistant Remote +removeRemote uuid = do + liftAnnex $ trustSet uuid DeadTrusted + disableRemote uuid + +{- Called when a Remote is probably empty, to remove it. + - + - This does one last check for any objects remaining in the Remote, + - and if there are any, queues Downloads of them, and defers removing + - the remote for later. This is to catch any objects not referred to + - in keys in the current branch. + -} +removableRemote :: UrlRenderer -> UUID -> Assistant () +removableRemote urlrenderer uuid = getkeys >>= \case + Just keys + | null keys -> finishRemovingRemote urlrenderer uuid + | otherwise -> do + r <- fromMaybe (giveup "unknown remote") + <$> liftAnnex (Remote.remoteFromUUID uuid) + mapM_ (queueremaining r) keys + Nothing -> noop + where + queueremaining r k = + queueTransferWhenSmall "remaining object in unwanted remote" + (AssociatedFile Nothing) (Transfer Download uuid (fromKey id k)) r + {- Scanning for keys can take a long time; do not tie up + - the Annex monad while doing it, so other threads continue to + - run. -} + getkeys = do + a <- liftAnnex $ Annex.withCurrentState $ loggedKeysFor uuid + liftIO a + +{- With the webapp, this asks the user to click on a button to finish + - removing the remote. + - + - Without the webapp, just do the removal now. + -} +finishRemovingRemote :: UrlRenderer -> UUID -> Assistant () +#ifdef WITH_WEBAPP +finishRemovingRemote urlrenderer uuid = do + desc <- liftAnnex $ Remote.prettyUUID uuid + button <- mkAlertButton True (T.pack "Finish deletion process") urlrenderer $ + FinishDeleteRepositoryR uuid + void $ addAlert $ remoteRemovalAlert desc button +#else +finishRemovingRemote _ uuid = void $ removeRemote uuid +#endif diff --git a/Assistant/Drop.hs b/Assistant/Drop.hs new file mode 100644 index 0000000000..06b09951ac --- /dev/null +++ b/Assistant/Drop.hs @@ -0,0 +1,30 @@ +{- git-annex assistant dropping of unwanted content + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Drop ( + handleDrops, + handleDropsFrom, +) where + +import Assistant.Common +import Assistant.DaemonStatus +import Annex.Drop (handleDropsFrom, Reason) +import Logs.Location +import CmdLine.Action +import Types.NumCopies +import Types.Command + +{- Drop from local and/or remote when allowed by the preferred content and + - numcopies settings. -} +handleDrops :: Reason -> Bool -> Key -> AssociatedFile -> [VerifiedCopy] -> Assistant () +handleDrops reason fromhere key f preverified = do + syncrs <- syncDataRemotes <$> getDaemonStatus + locs <- liftAnnex $ loggedLocations key + liftAnnex $ handleDropsFrom + locs syncrs reason fromhere key f + (SeekInput []) + preverified callCommandAction diff --git a/Assistant/Fsck.hs b/Assistant/Fsck.hs new file mode 100644 index 0000000000..067c7d1a10 --- /dev/null +++ b/Assistant/Fsck.hs @@ -0,0 +1,50 @@ +{- git-annex assistant fscking + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Fsck where + +import Assistant.Common +import Types.ScheduledActivity +import qualified Types.Remote as Remote +import Annex.UUID +import Assistant.Alert +import Assistant.Types.UrlRenderer +import Logs.Schedule +import qualified Annex + +import qualified Data.Set as S + +{- Displays a nudge in the webapp if a fsck is not configured for + - the specified remote, or for the local repository. -} +fsckNudge :: UrlRenderer -> Maybe Remote -> Assistant () +fsckNudge urlrenderer mr + | maybe True fsckableRemote mr = + whenM (liftAnnex $ annexFsckNudge <$> Annex.getGitConfig) $ + unlessM (liftAnnex $ checkFscked mr) $ + notFsckedNudge urlrenderer mr + | otherwise = noop + +fsckableRemote :: Remote -> Bool +fsckableRemote = isJust . Remote.remoteFsck + +{- Checks if the remote, or the local repository, has a fsck scheduled. + - Only looks at fscks configured to run via the local repository, not + - other repositories. -} +checkFscked :: Maybe Remote -> Annex Bool +checkFscked mr = any wanted . S.toList <$> (scheduleGet =<< getUUID) + where + wanted = case mr of + Nothing -> isSelfFsck + Just r -> flip isFsckOf (Remote.uuid r) + +isSelfFsck :: ScheduledActivity -> Bool +isSelfFsck (ScheduledSelfFsck _ _) = True +isSelfFsck _ = False + +isFsckOf :: ScheduledActivity -> UUID -> Bool +isFsckOf (ScheduledRemoteFsck u _ _) u' = u == u' +isFsckOf _ _ = False diff --git a/Assistant/Gpg.hs b/Assistant/Gpg.hs new file mode 100644 index 0000000000..01226e0640 --- /dev/null +++ b/Assistant/Gpg.hs @@ -0,0 +1,38 @@ +{- git-annex assistant gpg stuff + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Gpg where + +import Utility.Gpg +import Utility.UserInfo +import Types.Remote (RemoteConfigField) +import Annex.SpecialRemote.Config +import Types.ProposedAccepted + +import qualified Data.Map as M +import Control.Applicative +import Prelude + +{- Generates a gpg user id that is not used by any existing secret key -} +newUserId :: GpgCmd -> IO UserId +newUserId cmd = do + oldkeys <- secretKeys cmd + username <- either (const "unknown") id <$> myUserName + let basekeyname = username ++ "'s git-annex encryption key" + return $ Prelude.head $ filter (\n -> M.null $ M.filter (== n) oldkeys) + ( basekeyname + : map (\n -> basekeyname ++ show n) ([2..] :: [Int]) + ) + +data EnableEncryption = HybridEncryption | SharedEncryption | NoEncryption + deriving (Eq) + +{- Generates Remote configuration for encryption. -} +configureEncryption :: EnableEncryption -> (RemoteConfigField, ProposedAccepted String) +configureEncryption SharedEncryption = (encryptionField, Proposed "shared") +configureEncryption NoEncryption = (encryptionField, Proposed "none") +configureEncryption HybridEncryption = (encryptionField, Proposed "hybrid") diff --git a/Assistant/Install.hs b/Assistant/Install.hs new file mode 100644 index 0000000000..c11b6d5585 --- /dev/null +++ b/Assistant/Install.hs @@ -0,0 +1,175 @@ +{- Assistant installation + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Assistant.Install where + +import Assistant.Common +import Assistant.Install.AutoStart +import Config.Files +import Utility.FileMode +import Utility.Shell +import Utility.Tmp +import Utility.Env +import Utility.SshConfig + +#ifdef darwin_HOST_OS +import Utility.OSX +#else +import Utility.FreeDesktop +import Assistant.Install.Menu +import Utility.UserInfo +import Utility.Android +#endif + +import System.PosixCompat.Files (ownerExecuteMode) + +standaloneAppBase :: IO (Maybe FilePath) +standaloneAppBase = getEnv "GIT_ANNEX_APP_BASE" + +{- The standalone app does not have an installation process. + - So when it's run, it needs to set up autostarting of the assistant + - daemon, as well as writing the programFile, and putting the + - git-annex-shell and git-annex-wrapper wrapper scripts into ~/.ssh + - + - Note that this is done every time it's started, so if the user moves + - it around, the paths this sets up won't break. + - + - File manager hook script installation is done even for + - packaged apps, since it has to go into the user's home directory. + -} +ensureInstalled :: IO () +ensureInstalled = ifM (isJust <$> getEnv "GIT_ANNEX_PACKAGE_INSTALL") + ( go Nothing + , go =<< standaloneAppBase + ) + where + go Nothing = installFileManagerHooks "git-annex" + go (Just base) = do + let program = base "git-annex" + programfile <- programFile + createDirectoryIfMissing True $ + fromRawFilePath (parentDir (toRawFilePath programfile)) + writeFile programfile program + +#ifdef darwin_HOST_OS + autostartfile <- userAutoStart osxAutoStartLabel + installAutoStart program autostartfile +#else + ifM osAndroid + ( do + -- Integration with the Termux:Boot app. + home <- myHomeDir + let bootfile = home ".termux" "boot" "git-annex" + unlessM (doesFileExist bootfile) $ do + createDirectoryIfMissing True (takeDirectory bootfile) + writeFile bootfile "git-annex assistant --autostart" + , do + menufile <- desktopMenuFilePath "git-annex" <$> userDataDir + icondir <- iconDir <$> userDataDir + installMenu program menufile base icondir + autostartfile <- autoStartPath "git-annex" <$> userConfigDir + installAutoStart program autostartfile + ) +#endif + + sshdir <- sshDir + let runshell var = "exec " ++ base "runshell " ++ var + let rungitannexshell var = runshell $ "git-annex-shell -c \"" ++ var ++ "\"" + + installWrapper (sshdir "git-annex-shell") $ unlines + [ shebang + , "set -e" + , "if [ \"x$SSH_ORIGINAL_COMMAND\" != \"x\" ]; then" + , rungitannexshell "$SSH_ORIGINAL_COMMAND" + , "else" + , rungitannexshell "$@" + , "fi" + ] + installWrapper (sshdir "git-annex-wrapper") $ unlines + [ shebang + , "set -e" + , runshell "\"$@\"" + ] + + installFileManagerHooks program + +installWrapper :: FilePath -> String -> IO () +installWrapper file content = do + curr <- catchDefaultIO "" $ readFileStrict file + when (curr /= content) $ do + createDirectoryIfMissing True (fromRawFilePath (parentDir (toRawFilePath file))) + viaTmp writeFile file content + modifyFileMode (toRawFilePath file) $ + addModes [ownerExecuteMode] + +installFileManagerHooks :: FilePath -> IO () +#ifdef linux_HOST_OS +installFileManagerHooks program = unlessM osAndroid $ do + let actions = ["get", "drop", "undo"] + + -- Gnome + nautilusScriptdir <- (\d -> d "nautilus" "scripts") <$> userDataDir + createDirectoryIfMissing True nautilusScriptdir + forM_ actions $ + genNautilusScript nautilusScriptdir + + -- KDE + userdata <- userDataDir + let kdeServiceMenusdir = userdata "kservices5" "ServiceMenus" + createDirectoryIfMissing True kdeServiceMenusdir + writeFile (kdeServiceMenusdir "git-annex.desktop") + (kdeDesktopFile actions) + where + genNautilusScript scriptdir action = + installscript (scriptdir scriptname action) $ unlines + [ shebang + , autoaddedcomment + , "exec " ++ program ++ " " ++ action ++ " --notify-start --notify-finish -- \"$@\"" + ] + scriptname action = "git-annex " ++ action + installscript f c = whenM (safetoinstallscript f) $ do + writeFile f c + modifyFileMode (toRawFilePath f) $ addModes [ownerExecuteMode] + safetoinstallscript f = catchDefaultIO True $ + elem autoaddedcomment . lines <$> readFileStrict f + autoaddedcomment = "# " ++ autoaddedmsg ++ " (To disable, chmod 600 this file.)" + autoaddedmsg = "Automatically added by git-annex, do not edit." + + kdeDesktopFile actions = unlines $ concat $ + kdeDesktopHeader actions : map kdeDesktopAction actions + kdeDesktopHeader actions = + [ "# " ++ autoaddedmsg + , "[Desktop Entry]" + , "Type=Service" + , "ServiceTypes=all/allfiles" + , "MimeType=all/all;" + , "Actions=" ++ intercalate ";" (map kdeDesktopSection actions) + , "X-KDE-Priority=TopLevel" + , "X-KDE-Submenu=Git-Annex" + , "X-KDE-Icon=git-annex" + , "X-KDE-ServiceTypes=KonqPopupMenu/Plugin" + ] + kdeDesktopSection command = "GitAnnex" ++ command + kdeDesktopAction command = + [ "" + , "[Desktop Action " ++ kdeDesktopSection command ++ "]" + , "Name=" ++ command + , "Icon=git-annex" + , unwords + [ "Exec=sh -c 'cd \"$(dirname \"$1\")\" &&" + , program + , command + , "--notify-start --notify-finish -- \"$1\"'" + , "false" -- this becomes $0 in sh, so unused + , "%f" + ] + ] +#else +installFileManagerHooks _ = noop +#endif diff --git a/Assistant/Install/AutoStart.hs b/Assistant/Install/AutoStart.hs new file mode 100644 index 0000000000..59fb7b674d --- /dev/null +++ b/Assistant/Install/AutoStart.hs @@ -0,0 +1,41 @@ +{- Assistant autostart file installation + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} + +module Assistant.Install.AutoStart where + +import Utility.FreeDesktop +#ifdef darwin_HOST_OS +import Utility.OSX +import Utility.Path +import Utility.SystemDirectory +import Utility.FileSystemEncoding +#endif + +installAutoStart :: FilePath -> FilePath -> IO () +installAutoStart command file = do +#ifdef darwin_HOST_OS + createDirectoryIfMissing True (fromRawFilePath (parentDir (toRawFilePath file))) + writeFile file $ genOSXAutoStartFile osxAutoStartLabel command + ["assistant", "--autostart"] +#else + writeDesktopMenuFile (fdoAutostart command) file +#endif + +osxAutoStartLabel :: String +osxAutoStartLabel = "com.branchable.git-annex.assistant" + +fdoAutostart :: FilePath -> DesktopEntry +fdoAutostart command = genDesktopEntry + "Git Annex Assistant" + "Autostart" + False + (command ++ " assistant --autostart") + Nothing + [] diff --git a/Assistant/Install/Menu.hs b/Assistant/Install/Menu.hs new file mode 100644 index 0000000000..91fcd3baf5 --- /dev/null +++ b/Assistant/Install/Menu.hs @@ -0,0 +1,54 @@ +{- Assistant menu installation. + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} + +module Assistant.Install.Menu where + +import Utility.FreeDesktop +import Utility.FileSystemEncoding +import Utility.Path + +import System.IO +import Utility.SystemDirectory +#ifndef darwin_HOST_OS +import System.FilePath +#endif + +installMenu :: FilePath -> FilePath -> FilePath -> FilePath -> IO () +#ifdef darwin_HOST_OS +installMenu _command _menufile _iconsrcdir _icondir = return () +#else +installMenu command menufile iconsrcdir icondir = do + writeDesktopMenuFile (fdoDesktopMenu command) menufile + installIcon (iconsrcdir "logo.svg") $ + iconFilePath (iconBaseName ++ ".svg") "scalable" icondir + installIcon (iconsrcdir "logo_16x16.png") $ + iconFilePath (iconBaseName ++ ".png") "16x16" icondir +#endif + +{- The command can be either just "git-annex", or the full path to use + - to run it. -} +fdoDesktopMenu :: FilePath -> DesktopEntry +fdoDesktopMenu command = genDesktopEntry + "Git Annex" + "Track and sync the files in your Git Annex" + False + (command ++ " webapp") + (Just iconBaseName) + ["Network", "FileTransfer"] + +installIcon :: FilePath -> FilePath -> IO () +installIcon src dest = do + createDirectoryIfMissing True (fromRawFilePath (parentDir (toRawFilePath dest))) + withBinaryFile src ReadMode $ \hin -> + withBinaryFile dest WriteMode $ \hout -> + hGetContents hin >>= hPutStr hout + +iconBaseName :: String +iconBaseName = "git-annex" diff --git a/Assistant/MakeRemote.hs b/Assistant/MakeRemote.hs new file mode 100644 index 0000000000..3d3c178f65 --- /dev/null +++ b/Assistant/MakeRemote.hs @@ -0,0 +1,184 @@ +{- git-annex assistant remote creation utilities + - + - Copyright 2012, 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Assistant.MakeRemote where + +import Assistant.Common +import Assistant.Ssh +import qualified Types.Remote as R +import qualified Remote +import Remote.List.Util +import qualified Remote.Rsync as Rsync +import qualified Remote.GCrypt as GCrypt +import qualified Git +import qualified Git.Command +import qualified Annex +import qualified Annex.SpecialRemote +import Annex.SpecialRemote.Config +import Logs.UUID +import Logs.Remote +import Git.Remote +import Git.Types (RemoteName) +import Creds +import Assistant.Gpg +import Utility.Gpg (KeyId) +import Types.GitConfig +import Config +import Types.ProposedAccepted + +import qualified Data.Map as M + +{- Sets up a new git or rsync remote, accessed over ssh. -} +makeSshRemote :: SshData -> Annex RemoteName +makeSshRemote sshdata = maker (sshRepoName sshdata) (genSshUrl sshdata) + where + maker + | onlyCapability sshdata RsyncCapable = makeRsyncRemote + | otherwise = makeGitRemote + +{- Runs an action that returns a name of the remote, and finishes adding it. -} +addRemote :: Annex RemoteName -> Annex Remote +addRemote a = do + name <- a + remotesChanged + maybe (giveup "failed to add remote") return + =<< Remote.byName (Just name) + +{- Inits a rsync special remote, and returns its name. -} +makeRsyncRemote :: RemoteName -> String -> Annex String +makeRsyncRemote name location = makeRemote name location $ const $ void $ + go =<< Annex.SpecialRemote.findExisting name + where + go [] = setupSpecialRemote name Rsync.remote config Nothing + (Nothing, R.Init, Annex.SpecialRemote.newConfig name Nothing mempty mempty) Nothing + go ((u, c, mcu):_) = setupSpecialRemote name Rsync.remote config Nothing + (Just u, R.Enable c, c) mcu + config = M.fromList + [ (encryptionField, Proposed "shared") + , (Proposed "rsyncurl", Proposed location) + , (typeField, Proposed "rsync") + ] + +{- Inits a gcrypt special remote, and returns its name. -} +makeGCryptRemote :: RemoteName -> String -> KeyId -> Annex RemoteName +makeGCryptRemote remotename location keyid = + initSpecialRemote remotename GCrypt.remote Nothing $ M.fromList + [ (typeField, Proposed "gcrypt") + , (Proposed "gitrepo", Proposed location) + , configureEncryption HybridEncryption + , (Proposed "keyid", Proposed keyid) + ] + +type SpecialRemoteMaker = RemoteName -> RemoteType -> Maybe CredPair -> R.RemoteConfig -> Annex RemoteName + +{- Inits a new special remote. The name is used as a suggestion, but + - will be changed if there is already a special remote with that name. -} +initSpecialRemote :: SpecialRemoteMaker +initSpecialRemote name remotetype mcreds config = go 0 + where + go :: Int -> Annex RemoteName + go n = do + let fullname = if n == 0 then name else name ++ show n + Annex.SpecialRemote.findExisting fullname >>= \case + [] -> setupSpecialRemote fullname remotetype config mcreds + (Nothing, R.Init, Annex.SpecialRemote.newConfig fullname Nothing mempty mempty) Nothing + _ -> go (n + 1) + +{- Enables an existing special remote. -} +enableSpecialRemote :: SpecialRemoteMaker +enableSpecialRemote name remotetype mcreds config = + Annex.SpecialRemote.findExisting name >>= \case + [] -> giveup $ "Cannot find a special remote named " ++ name + ((u, c, mcu):_) -> setupSpecialRemote' False name remotetype config mcreds (Just u, R.Enable c, c) mcu + +setupSpecialRemote :: RemoteName -> RemoteType -> R.RemoteConfig -> Maybe CredPair -> (Maybe UUID, R.SetupStage, R.RemoteConfig) -> Maybe (Annex.SpecialRemote.ConfigFrom UUID) -> Annex RemoteName +setupSpecialRemote = setupSpecialRemote' True + +setupSpecialRemote' :: Bool -> RemoteName -> RemoteType -> R.RemoteConfig -> Maybe CredPair -> (Maybe UUID, R.SetupStage, R.RemoteConfig) -> Maybe (Annex.SpecialRemote.ConfigFrom UUID) -> Annex RemoteName +setupSpecialRemote' setdesc name remotetype config mcreds (mu, ss, c) mcu = do + {- Currently, only 'weak' ciphers can be generated from the + - assistant, because otherwise GnuPG may block once the entropy + - pool is drained, and as of now there's no way to tell the user + - to perform IO actions to refill the pool. -} + let weakc = M.insert (Proposed "highRandomQuality") (Proposed "false") (M.union config c) + dummycfg <- liftIO dummyRemoteGitConfig + (c', u) <- R.setup remotetype ss mu mcreds weakc dummycfg + case mcu of + Nothing -> + configSet u c' + Just (Annex.SpecialRemote.ConfigFrom cu) -> do + setConfig (remoteAnnexConfig c' "config-uuid") (fromUUID cu) + configSet cu c' + when setdesc $ + whenM (isNothing . M.lookup u <$> uuidDescMap) $ + describeUUID u (toUUIDDesc name) + return name + +{- Returns the name of the git remote it created. If there's already a + - remote at the location, returns its name. -} +makeGitRemote :: String -> String -> Annex RemoteName +makeGitRemote basename location = makeRemote basename location $ \name -> + void $ inRepo $ Git.Command.runBool + [Param "remote", Param "add", Param name, Param location] + +{- If there's not already a remote at the location, adds it using the + - action, which is passed the name of the remote to make. + - + - Returns the name of the remote. -} +makeRemote :: String -> String -> (RemoteName -> Annex ()) -> Annex RemoteName +makeRemote basename location a = do + rs <- Annex.getGitRemotes + if not (any samelocation rs) + then do + let name = uniqueRemoteName basename 0 rs + a name + return name + else return basename + where + samelocation x = Git.repoLocation x == location + +{- Given a list of all remotes, generate an unused name for a new + - remote, adding a number if necessary. + - + - Ensures that the returned name is a legal git remote name. -} +uniqueRemoteName :: String -> Int -> [Git.Repo] -> RemoteName +uniqueRemoteName basename n rs + | null namecollision = name + | otherwise = uniqueRemoteName legalbasename (succ n) rs + where + namecollision = filter samename rs + samename x = Git.remoteName x == Just name + name + | n == 0 = legalbasename + | otherwise = legalbasename ++ show n + legalbasename = makeLegalName basename + +{- Finds a CredPair belonging to any Remote that is of a given type + - and matches some other criteria. + - + - This can be used as a default when another repository is being set up + - using the same service. + - + - A function must be provided that returns the CredPairStorage + - to use for a particular Remote's uuid. + -} +previouslyUsedCredPair + :: (UUID -> CredPairStorage) + -> RemoteType + -> (Remote -> Bool) + -> Annex (Maybe CredPair) +previouslyUsedCredPair getstorage remotetype criteria = + getM fromstorage + =<< filter criteria . filter sametype + <$> Remote.remoteList + where + sametype r = R.typename (R.remotetype r) == R.typename remotetype + fromstorage r = do + let storage = getstorage (R.uuid r) + getRemoteCredPair (R.config r) (R.gitconfig r) storage diff --git a/Assistant/MakeRepo.hs b/Assistant/MakeRepo.hs new file mode 100644 index 0000000000..47bf5488a6 --- /dev/null +++ b/Assistant/MakeRepo.hs @@ -0,0 +1,99 @@ +{- making local repositories + - + - Copyright 2012-2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Assistant.MakeRepo where + +import Assistant.WebApp.Common +import Annex.Init +import qualified Git.Construct +import qualified Git.Config +import qualified Git.Command +import qualified Git.Branch +import qualified Annex +import Annex.UUID +import Annex.AdjustedBranch +import Annex.Action +import Annex.Startup +import Types.StandardGroups +import Logs.PreferredContent +import qualified Annex.Branch +import Utility.Process.Transcript +import Config + +{- Makes a new git repository. Or, if a git repository already + - exists, returns False. -} +makeRepo :: FilePath -> Bool -> IO Bool +makeRepo path bare = ifM (probeRepoExists path) + ( return False + , do + (transcript, ok) <- + processTranscript "git" (toCommand params) Nothing + unless ok $ + giveup $ "git init failed!\nOutput:\n" ++ transcript + return True + ) + where + baseparams = [Param "init", Param "--quiet"] + params + | bare = baseparams ++ [Param "--bare", File path] + | otherwise = baseparams ++ [File path] + +{- Runs an action in the git repository in the specified directory. -} +inDir :: FilePath -> Annex a -> IO a +inDir dir a = do + state <- Annex.new + =<< Git.Config.read + =<< Git.Construct.fromPath (toRawFilePath dir) + Annex.eval state $ a `finally` quiesce True + +{- Creates a new repository, and returns its UUID. -} +initRepo :: Bool -> Bool -> FilePath -> Maybe String -> Maybe StandardGroup -> IO UUID +initRepo True primary_assistant_repo dir desc mgroup = inDir dir $ do + initRepo' desc mgroup + {- Initialize the master branch, so things that expect + - to have it will work, before any files are added. -} + unlessM (fromMaybe False . Git.Config.isBare <$> gitRepo) $ do + cmode <- annexCommitMode <$> Annex.getGitConfig + void $ inRepo $ Git.Branch.commitCommand cmode + (Git.Branch.CommitQuiet True) + [ Param "--allow-empty" + , Param "-m" + , Param "created repository" + ] + {- Repositories directly managed by the assistant use + - an adjusted unlocked branch with annex.thin set. + - + - Automatic gc is disabled, as it can be slow. Instead, gc is done + - once a day. + -} + when primary_assistant_repo $ do + void $ enterAdjustedBranch (LinkAdjustment UnlockAdjustment) + setConfig (annexConfig "thin") (Git.Config.boolConfig True) + inRepo $ Git.Command.run + [Param "config", Param "gc.auto", Param "0"] + getUUID +{- Repo already exists, could be a non-git-annex repo though so + - still initialize it. -} +initRepo False _ dir desc mgroup = inDir dir $ do + initRepo' desc mgroup + getUUID + +initRepo' :: Maybe String -> Maybe StandardGroup -> Annex () +initRepo' desc mgroup = unlessM isInitialized $ do + initialize startupAnnex desc Nothing + u <- getUUID + maybe noop (defaultStandardGroup u) mgroup + {- Ensure branch gets committed right away so it is + - available for merging immediately. -} + Annex.Branch.commit =<< Annex.Branch.commitMessage + +{- Checks if a git repo exists at a location. -} +probeRepoExists :: FilePath -> IO Bool +probeRepoExists dir = isJust <$> + catchDefaultIO Nothing (Git.Construct.checkForRepo (encodeBS dir)) diff --git a/Assistant/Monad.hs b/Assistant/Monad.hs new file mode 100644 index 0000000000..dcdbeaf4dd --- /dev/null +++ b/Assistant/Monad.hs @@ -0,0 +1,144 @@ +{- git-annex assistant monad + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE GeneralizedNewtypeDeriving, MultiParamTypeClasses #-} +{-# LANGUAGE PackageImports #-} + +module Assistant.Monad ( + Assistant, + AssistantData(..), + newAssistantData, + runAssistant, + getAssistant, + LiftAnnex, + liftAnnex, + (<~>), + (<<~), + asIO, + asIO1, + asIO2, + ThreadName, + debug, +) where + +import "mtl" Control.Monad.Reader +import qualified Control.Monad.Fail as Fail + +import Annex.Common hiding (debug) +import Assistant.Types.ThreadedMonad +import Assistant.Types.DaemonStatus +import Assistant.Types.ScanRemotes +import Assistant.Types.TransferQueue +import Assistant.Types.TransferSlots +import Assistant.Types.Pushes +import Assistant.Types.BranchChange +import Assistant.Types.Commits +import Assistant.Types.Changes +import Assistant.Types.RepoProblem +import Assistant.Types.ThreadName +import Assistant.Types.RemoteControl +import Assistant.Types.CredPairCache +import qualified Utility.Debug as Debug + +newtype Assistant a = Assistant { mkAssistant :: ReaderT AssistantData IO a } + deriving ( + Monad, + MonadIO, + MonadReader AssistantData, + MonadCatch, + MonadThrow, + MonadMask, + Fail.MonadFail, + Functor, + Applicative + ) + +data AssistantData = AssistantData + { threadName :: ThreadName + , threadState :: ThreadState + , daemonStatusHandle :: DaemonStatusHandle + , scanRemoteMap :: ScanRemoteMap + , transferQueue :: TransferQueue + , transferSlots :: TransferSlots + , failedPushMap :: FailedPushMap + , failedExportMap :: FailedPushMap + , commitChan :: CommitChan + , exportCommitChan :: CommitChan + , changePool :: ChangePool + , repoProblemChan :: RepoProblemChan + , branchChangeHandle :: BranchChangeHandle + , remoteControl :: RemoteControl + , credPairCache :: CredPairCache + } + +newAssistantData :: ThreadState -> DaemonStatusHandle -> IO AssistantData +newAssistantData st dstatus = AssistantData + <$> pure (ThreadName "main") + <*> pure st + <*> pure dstatus + <*> newScanRemoteMap + <*> newTransferQueue + <*> newTransferSlots + <*> newFailedPushMap + <*> newFailedPushMap + <*> newCommitChan + <*> newCommitChan + <*> newChangePool + <*> newRepoProblemChan + <*> newBranchChangeHandle + <*> newRemoteControl + <*> newCredPairCache + +runAssistant :: AssistantData -> Assistant a -> IO a +runAssistant d a = runReaderT (mkAssistant a) d + +getAssistant :: (AssistantData -> a) -> Assistant a +getAssistant = reader + +{- Using a type class for lifting into the annex monad allows + - easily lifting to it from multiple different monads. -} +class LiftAnnex m where + liftAnnex :: Annex a -> m a + +{- Runs an action in the git-annex monad. Note that the same monad state + - is shared among all assistant threads, so only one of these can run at + - a time. Therefore, long-duration actions should be avoided. -} +instance LiftAnnex Assistant where + liftAnnex a = do + st <- reader threadState + liftIO $ runThreadState st a + +{- Runs an IO action, passing it an IO action that runs an Assistant action. -} +(<~>) :: (IO a -> IO b) -> Assistant a -> Assistant b +io <~> a = do + d <- reader id + liftIO $ io $ runAssistant d a + +{- Creates an IO action that will run an Assistant action when run. -} +asIO :: Assistant a -> Assistant (IO a) +asIO a = do + d <- reader id + return $ runAssistant d a + +asIO1 :: (a -> Assistant b) -> Assistant (a -> IO b) +asIO1 a = do + d <- reader id + return $ \v -> runAssistant d $ a v + +asIO2 :: (a -> b -> Assistant c) -> Assistant (a -> b -> IO c) +asIO2 a = do + d <- reader id + return $ \v1 v2 -> runAssistant d (a v1 v2) + +{- Runs an IO action on a selected field of the AssistantData. -} +(<<~) :: (a -> IO b) -> (AssistantData -> a) -> Assistant b +io <<~ v = reader v >>= liftIO . io + +debug :: [String] -> Assistant () +debug ws = do + ThreadName name <- getAssistant threadName + liftIO $ Debug.debug (Debug.DebugSource (encodeBS name)) (unwords ws) diff --git a/Assistant/NamedThread.hs b/Assistant/NamedThread.hs new file mode 100644 index 0000000000..7abe274d58 --- /dev/null +++ b/Assistant/NamedThread.hs @@ -0,0 +1,99 @@ +{- git-annex assistant named threads. + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Assistant.NamedThread where + +import Annex.Common +import Assistant.Types.NamedThread +import Assistant.Types.ThreadName +import Assistant.Types.DaemonStatus +import Assistant.Types.UrlRenderer +import Assistant.DaemonStatus +import Assistant.Monad +import Utility.NotificationBroadcaster + +import Control.Concurrent +import Control.Concurrent.Async +import qualified Data.Map.Strict as M +import qualified Control.Exception as E + +#ifdef WITH_WEBAPP +import Assistant.WebApp.Types +import Assistant.Types.Alert +import Assistant.Alert +import qualified Data.Text as T +#endif + +{- Starts a named thread, if it's not already running. + - + - Named threads are run by a management thread, so if they crash + - an alert is displayed, allowing the thread to be restarted. -} +startNamedThread :: UrlRenderer -> NamedThread -> Assistant () +startNamedThread urlrenderer (NamedThread afterstartupsanitycheck name a) = + M.lookup name . startedThreads <$> getDaemonStatus >>= \case + Nothing -> start + Just (aid, _) -> do + r <- liftIO (E.try (poll aid) :: IO (Either E.SomeException (Maybe (Either E.SomeException ())))) + case r of + Right Nothing -> noop + _ -> start + where + start + | afterstartupsanitycheck = do + status <- getDaemonStatus + h <- liftIO $ newNotificationHandle False $ + startupSanityCheckNotifier status + startwith $ runmanaged $ + liftIO $ waitNotification h + | otherwise = startwith $ runmanaged noop + startwith runner = do + d <- getAssistant id + aid <- liftIO $ runner $ d { threadName = name } + restart <- asIO $ startNamedThread urlrenderer (NamedThread False name a) + modifyDaemonStatus_ $ \s -> s + { startedThreads = M.insert name (aid, restart) (startedThreads s) } + runmanaged first d = do + aid <- async $ runAssistant d $ do + void first + a + void $ forkIO $ manager d aid + return aid + manager d aid = (E.try (wait aid) :: IO (Either E.SomeException ())) >>= \case + Right _ -> noop + Left e -> do + let msg = unwords + [ fromThreadName $ threadName d + , "crashed:", show e + ] + hPutStrLn stderr msg +#ifdef WITH_WEBAPP + button <- runAssistant d $ mkAlertButton True + (T.pack "Restart Thread") + urlrenderer + (RestartThreadR name) + runAssistant d $ void $ addAlert $ + (warningAlert (fromThreadName name) msg) + { alertButtons = [button] } +#endif + +namedThreadId :: NamedThread -> Assistant (Maybe ThreadId) +namedThreadId (NamedThread _ name _) = do + m <- startedThreads <$> getDaemonStatus + return $ asyncThreadId . fst <$> M.lookup name m + +{- Waits for all named threads that have been started to finish. + - + - Note that if a named thread crashes, it will probably + - cause this to crash as well. Also, named threads that are started + - after this is called will not be waited on. -} +waitNamedThreads :: Assistant () +waitNamedThreads = do + m <- startedThreads <$> getDaemonStatus + liftIO $ mapM_ (wait . fst) $ M.elems m + diff --git a/Assistant/Pairing.hs b/Assistant/Pairing.hs new file mode 100644 index 0000000000..aa700f5998 --- /dev/null +++ b/Assistant/Pairing.hs @@ -0,0 +1,97 @@ +{- git-annex assistant repo pairing, core data types + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Pairing where + +import Annex.Common +import Utility.Verifiable +import Assistant.Ssh + +import Control.Concurrent +import Network.Socket +import Data.Char +import qualified Data.Text as T + +data PairStage + {- "I'll pair with anybody who shares the secret that can be used + - to verify this request." -} + = PairReq + {- "I've verified your request, and you can verify this to see + - that I know the secret. I set up your ssh key already. + - Here's mine for you to set up." -} + | PairAck + {- "I saw your PairAck; you can stop sending them." -} + | PairDone + deriving (Eq, Read, Show, Ord, Enum) + +newtype PairMsg = PairMsg (Verifiable (PairStage, PairData, SomeAddr)) + deriving (Eq, Read, Show) + +verifiedPairMsg :: PairMsg -> PairingInProgress -> Bool +verifiedPairMsg (PairMsg m) pip = verify m $ inProgressSecret pip + +fromPairMsg :: PairMsg -> Verifiable (PairStage, PairData, SomeAddr) +fromPairMsg (PairMsg m) = m + +pairMsgStage :: PairMsg -> PairStage +pairMsgStage (PairMsg (Verifiable (s, _, _) _)) = s + +pairMsgData :: PairMsg -> PairData +pairMsgData (PairMsg (Verifiable (_, d, _) _)) = d + +pairMsgAddr :: PairMsg -> SomeAddr +pairMsgAddr (PairMsg (Verifiable (_, _, a) _)) = a + +data PairData = PairData + -- uname -n output, not a full domain name + { remoteHostName :: Maybe HostName + , remoteUserName :: UserName + , remoteDirectory :: FilePath + , remoteSshPubKey :: SshPubKey + , pairUUID :: UUID + } + deriving (Eq, Read, Show) + +checkSane :: PairData -> Bool +checkSane p = all (not . any isControl) + [ fromMaybe "" (remoteHostName p) + , remoteUserName p + , remoteDirectory p + , remoteSshPubKey p + , fromUUID (pairUUID p) + ] + +type UserName = String + +{- A pairing that is in progress has a secret, a thread that is + - broadcasting pairing messages, and a SshKeyPair that has not yet been + - set up on disk. -} +data PairingInProgress = PairingInProgress + { inProgressSecret :: Secret + , inProgressThreadId :: Maybe ThreadId + , inProgressSshKeyPair :: SshKeyPair + , inProgressPairData :: PairData + , inProgressPairStage :: PairStage + } + deriving (Show) + +data AddrClass = IPv4AddrClass | IPv6AddrClass + +data SomeAddr = IPv4Addr HostAddress + | IPv6Addr HostAddress6 + deriving (Ord, Eq, Read, Show) + +{- This contains the whole secret, just lightly obfuscated to make it not + - too obvious. It's only displayed in the user's web browser. -} +newtype SecretReminder = SecretReminder [Int] + deriving (Show, Eq, Ord, Read) + +toSecretReminder :: T.Text -> SecretReminder +toSecretReminder = SecretReminder . map ord . T.unpack + +fromSecretReminder :: SecretReminder -> T.Text +fromSecretReminder (SecretReminder s) = T.pack $ map chr s diff --git a/Assistant/Pairing/MakeRemote.hs b/Assistant/Pairing/MakeRemote.hs new file mode 100644 index 0000000000..69402e2e3d --- /dev/null +++ b/Assistant/Pairing/MakeRemote.hs @@ -0,0 +1,98 @@ +{- git-annex assistant pairing remote creation + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Pairing.MakeRemote where + +import Assistant.Common +import Assistant.Ssh +import Assistant.Pairing +import Assistant.Pairing.Network +import Assistant.MakeRemote +import Assistant.Sync +import Config.Cost +import Config +import qualified Types.Remote as Remote + +import Network.Socket +import qualified Data.Text as T + +{- Authorized keys are set up before pairing is complete, so that the other + - side can immediately begin syncing. -} +setupAuthorizedKeys :: PairMsg -> FilePath -> IO () +setupAuthorizedKeys msg repodir = case validateSshPubKey $ remoteSshPubKey $ pairMsgData msg of + Left err -> giveup err + Right pubkey -> do + absdir <- fromRawFilePath <$> absPath (toRawFilePath repodir) + unlessM (liftIO $ addAuthorizedKeys True absdir pubkey) $ + giveup "failed setting up ssh authorized keys" + +{- When local pairing is complete, this is used to set up the remote for + - the host we paired with. -} +finishedLocalPairing :: PairMsg -> SshKeyPair -> Assistant () +finishedLocalPairing msg keypair = do + sshdata <- liftIO $ installSshKeyPair keypair =<< pairMsgToSshData msg + {- Ensure that we know the ssh host key for the host we paired with. + - If we don't, ssh over to get it. -} + liftIO $ unlessM (knownHost $ sshHostName sshdata) $ + void $ sshTranscript + [ sshOpt "StrictHostKeyChecking" "no" + , sshOpt "NumberOfPasswordPrompts" "0" + , "-n" + ] + (genSshHost (sshHostName sshdata) (sshUserName sshdata)) + ("git-annex-shell -c configlist " ++ T.unpack (sshDirectory sshdata)) + Nothing + r <- liftAnnex $ addRemote $ makeSshRemote sshdata + repo <- liftAnnex $ Remote.getRepo r + liftAnnex $ setRemoteCost repo semiExpensiveRemoteCost + syncRemote r + +{- Mostly a straightforward conversion. Except: + - * Determine the best hostname to use to contact the host. + - * Strip leading ~/ from the directory name. + -} +pairMsgToSshData :: PairMsg -> IO SshData +pairMsgToSshData msg = do + let d = pairMsgData msg + hostname <- liftIO $ bestHostName msg + let dir = case remoteDirectory d of + ('~':'/':v) -> v + v -> v + return SshData + { sshHostName = T.pack hostname + , sshUserName = Just (T.pack $ remoteUserName d) + , sshDirectory = T.pack dir + , sshRepoName = genSshRepoName hostname dir + , sshPort = 22 + , needsPubKey = True + , sshCapabilities = [GitAnnexShellCapable, GitCapable, RsyncCapable] + , sshRepoUrl = Nothing + } + +{- Finds the best hostname to use for the host that sent the PairMsg. + - + - If remoteHostName is set, tries to use a .local address based on it. + - That's the most robust, if this system supports .local. + - Otherwise, looks up the hostname in the DNS for the remoteAddress, + - if any. May fall back to remoteAddress if there's no DNS. Ugh. -} +bestHostName :: PairMsg -> IO HostName +bestHostName msg = case remoteHostName $ pairMsgData msg of + Just h -> do + let localname = h ++ ".local" + addrs <- catchDefaultIO [] $ + getAddrInfo Nothing (Just localname) Nothing + maybe fallback (const $ return localname) (headMaybe addrs) + Nothing -> fallback + where + fallback = do + let a = pairMsgAddr msg + let sockaddr = case a of + IPv4Addr addr -> SockAddrInet (fromInteger 0) addr + IPv6Addr addr -> SockAddrInet6 (fromInteger 0) 0 addr 0 + fromMaybe (showAddr a) + <$> catchDefaultIO Nothing + (fst <$> getNameInfo [] True False sockaddr) diff --git a/Assistant/Pairing/Network.hs b/Assistant/Pairing/Network.hs new file mode 100644 index 0000000000..62a4ea02e8 --- /dev/null +++ b/Assistant/Pairing/Network.hs @@ -0,0 +1,132 @@ +{- git-annex assistant pairing network code + - + - All network traffic is sent over multicast UDP. For reliability, + - each message is repeated until acknowledged. This is done using a + - thread, that gets stopped before the next message is sent. + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Pairing.Network where + +import Assistant.Common +import Assistant.Pairing +import Assistant.DaemonStatus +import Utility.ThreadScheduler +import Utility.Verifiable + +import Network.Multicast +import Network.Info +import Network.Socket +import qualified Network.Socket.ByteString as B +import qualified Data.ByteString.UTF8 as BU8 +import qualified Data.Map as M +import Control.Concurrent + +{- This is an arbitrary port in the dynamic port range, that could + - conceivably be used for some other broadcast messages. + - If so, hope they ignore the garbage from us; we'll certainly + - ignore garbage from them. Wild wild west. -} +pairingPort :: PortNumber +pairingPort = 55556 + +{- Goal: Reach all hosts on the same network segment. + - Method: Use same address that avahi uses. Other broadcast addresses seem + - to not be let through some routers. -} +multicastAddress :: AddrClass -> HostName +multicastAddress IPv4AddrClass = "224.0.0.251" +multicastAddress IPv6AddrClass = "ff02::fb" + +{- Multicasts a message repeatedly on all interfaces, with a 2 second + - delay between each transmission. The message is repeated forever + - unless a number of repeats is specified. + - + - The remoteHostAddress is set to the interface's IP address. + - + - Note that new sockets are opened each time. This is hardly efficient, + - but it allows new network interfaces to be used as they come up. + - On the other hand, the expensive DNS lookups are cached. + -} +multicastPairMsg :: Maybe Int -> Secret -> PairData -> PairStage -> IO () +multicastPairMsg repeats secret pairdata stage = go M.empty repeats + where + go _ (Just 0) = noop + go cache n = do + addrs <- activeNetworkAddresses + let cache' = updatecache cache addrs + mapM_ (sendinterface cache') addrs + threadDelaySeconds (Seconds 2) + go cache' $ pred <$> n + {- The multicast library currently chokes on ipv6 addresses. -} + sendinterface _ (IPv6Addr _) = noop + sendinterface cache i = void $ tryIO $ + withSocketsDo $ bracket setup cleanup use + where + setup = multicastSender (multicastAddress IPv4AddrClass) pairingPort + cleanup (sock, _) = close sock -- FIXME does not work + use (sock, addr) = do + setInterface sock (showAddr i) + maybe noop + (\s -> void $ B.sendTo sock (BU8.fromString s) addr) + (M.lookup i cache) + updatecache cache [] = cache + updatecache cache (i:is) + | M.member i cache = updatecache cache is + | otherwise = updatecache (M.insert i (show $ mkmsg i) cache) is + mkmsg addr = PairMsg $ + mkVerifiable (stage, pairdata, addr) secret + +startSending :: PairingInProgress -> PairStage -> (PairStage -> IO ()) -> Assistant () +startSending pip stage sender = do + a <- asIO start + void $ liftIO $ forkIO a + where + start = do + tid <- liftIO myThreadId + let pip' = pip { inProgressPairStage = stage, inProgressThreadId = Just tid } + oldpip <- modifyDaemonStatus $ + \s -> (s { pairingInProgress = Just pip' }, pairingInProgress s) + maybe noop stopold oldpip + liftIO $ sender stage + stopold = maybe noop (liftIO . killThread) . inProgressThreadId + +stopSending :: PairingInProgress -> Assistant () +stopSending pip = do + maybe noop (liftIO . killThread) $ inProgressThreadId pip + modifyDaemonStatus_ $ \s -> s { pairingInProgress = Nothing } + +class ToSomeAddr a where + toSomeAddr :: a -> SomeAddr + +instance ToSomeAddr IPv4 where + toSomeAddr (IPv4 a) = IPv4Addr a + +instance ToSomeAddr IPv6 where + toSomeAddr (IPv6 o1 o2 o3 o4) = IPv6Addr (o1, o2, o3, o4) + +showAddr :: SomeAddr -> HostName +showAddr (IPv4Addr a) = show $ IPv4 a +showAddr (IPv6Addr (o1, o2, o3, o4)) = show $ IPv6 o1 o2 o3 o4 + +activeNetworkAddresses :: IO [SomeAddr] +activeNetworkAddresses = filter (not . all (`elem` "0.:") . showAddr) + . concatMap (\ni -> [toSomeAddr $ ipv4 ni, toSomeAddr $ ipv6 ni]) + <$> getNetworkInterfaces + +{- A human-visible description of the repository being paired with. + - Note that the repository's description is not shown to the user, because + - it could be something like "my repo", which is confusing when pairing + - with someone else's repo. However, this has the same format as the + - default description of a repo. -} +pairRepo :: PairMsg -> String +pairRepo msg = concat + [ remoteUserName d + , "@" + , fromMaybe (showAddr $ pairMsgAddr msg) (remoteHostName d) + , ":" + , remoteDirectory d + ] + where + d = pairMsgData msg diff --git a/Assistant/Pushes.hs b/Assistant/Pushes.hs new file mode 100644 index 0000000000..f1fccb5d66 --- /dev/null +++ b/Assistant/Pushes.hs @@ -0,0 +1,37 @@ +{- git-annex assistant push tracking + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Pushes where + +import Assistant.Common +import Assistant.Types.Pushes + +import Control.Concurrent.STM +import Data.Time.Clock +import qualified Data.Map as M + +{- Blocks until there are failed pushes. + - Returns Remotes whose pushes failed a given time duration or more ago. + - (This may be an empty list.) -} +getFailedPushesBefore :: NominalDiffTime -> FailedPushMap -> Assistant [Remote] +getFailedPushesBefore duration v = liftIO $ do + m <- atomically $ readTMVar v + now <- getCurrentTime + return $ M.keys $ M.filter (not . toorecent now) m + where + toorecent now time = now `diffUTCTime` time < duration + +{- Modifies the map. -} +changeFailedPushMap :: FailedPushMap -> (PushMap -> PushMap) -> Assistant () +changeFailedPushMap v f = liftIO $ atomically $ + store . f . fromMaybe M.empty =<< tryTakeTMVar v + where + {- tryTakeTMVar empties the TMVar; refill it only if + - the modified map is not itself empty -} + store m + | m == M.empty = noop + | otherwise = putTMVar v $! m diff --git a/Assistant/RemoteControl.hs b/Assistant/RemoteControl.hs new file mode 100644 index 0000000000..d61b3e16d3 --- /dev/null +++ b/Assistant/RemoteControl.hs @@ -0,0 +1,21 @@ +{- git-annex assistant RemoteDaemon control + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.RemoteControl ( + sendRemoteControl, + RemoteDaemon.Consumed(..) +) where + +import Assistant.Common +import qualified RemoteDaemon.Types as RemoteDaemon + +import Control.Concurrent + +sendRemoteControl :: RemoteDaemon.Consumed -> Assistant () +sendRemoteControl msg = do + clicker <- getAssistant remoteControl + liftIO $ writeChan clicker msg diff --git a/Assistant/Repair.hs b/Assistant/Repair.hs new file mode 100644 index 0000000000..02ebab3cae --- /dev/null +++ b/Assistant/Repair.hs @@ -0,0 +1,162 @@ +{- git-annex assistant repository repair + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Assistant.Repair where + +import Assistant.Common +import Command.Repair (repairAnnexBranch, trackingOrSyncBranch) +import Git.Fsck (FsckResults, foundBroken) +import Git.Repair (runRepairOf) +import qualified Git +import qualified Remote +import qualified Types.Remote as Remote +import Logs.FsckResults +import Annex.UUID +import Utility.Batch +import Annex.Path +import Assistant.Sync +import Assistant.Alert +import Assistant.DaemonStatus +import Assistant.Types.UrlRenderer +#ifdef WITH_WEBAPP +import Assistant.WebApp.Types +import qualified Data.Text as T +#endif +import qualified Utility.Lsof as Lsof +import Utility.ThreadScheduler +import qualified Utility.RawFilePath as R + +import Control.Concurrent.Async + +{- When the FsckResults require a repair, tries to do a non-destructive + - repair. If that fails, pops up an alert. -} +repairWhenNecessary :: UrlRenderer -> UUID -> Maybe Remote -> FsckResults -> Assistant Bool +repairWhenNecessary urlrenderer u mrmt fsckresults + | foundBroken fsckresults = do + liftAnnex $ writeFsckResults u fsckresults + repodesc <- liftAnnex $ Remote.prettyUUID u + ok <- alertWhile (repairingAlert repodesc) + (runRepair u mrmt False) +#ifdef WITH_WEBAPP + unless ok $ do + button <- mkAlertButton True (T.pack "Click Here") urlrenderer $ + RepairRepositoryR u + void $ addAlert $ brokenRepositoryAlert [button] +#endif + return ok + | otherwise = return False + +runRepair :: UUID -> Maybe Remote -> Bool -> Assistant Bool +runRepair u mrmt destructiverepair = do + fsckresults <- liftAnnex $ readFsckResults u + myu <- liftAnnex getUUID + ok <- if u == myu + then localrepair fsckresults + else remoterepair fsckresults + liftAnnex $ clearFsckResults u + debug [ "Repaired", show u, show ok ] + + return ok + where + localrepair fsckresults = do + -- Stop the watcher from running while running repairs. + changeSyncable Nothing False + + -- This intentionally runs the repair inside the Annex + -- monad, which is not strictly necessary, but keeps + -- other threads that might be trying to use the Annex + -- from running until it completes. + ok <- liftAnnex $ repair fsckresults Nothing + + -- Run a background fast fsck if a destructive repair had + -- to be done, to ensure that the git-annex branch + -- reflects the current state of the repo. + when destructiverepair $ + backgroundfsck [ Param "--fast" ] + + -- Start the watcher running again. This also triggers it to + -- do a startup scan, which is especially important if the + -- git repo repair removed files from the index file. Those + -- files will be seen as new, and re-added to the repository. + when (ok || destructiverepair) $ + changeSyncable Nothing True + + return ok + + remoterepair fsckresults = case Remote.repairRepo =<< mrmt of + Nothing -> return False + Just mkrepair -> do + thisrepopath <- liftIO . absPath + =<< liftAnnex (fromRepo Git.repoPath) + a <- liftAnnex $ mkrepair $ + repair fsckresults (Just (fromRawFilePath thisrepopath)) + liftIO $ catchBoolIO a + + repair fsckresults referencerepo = do + (ok, modifiedbranches) <- inRepo $ + runRepairOf fsckresults trackingOrSyncBranch destructiverepair referencerepo + when destructiverepair $ + repairAnnexBranch modifiedbranches + return ok + + backgroundfsck params = liftIO $ void $ async $ do + program <- programPath + batchCommand program (Param "fsck" : params) + +{- Detect when a git lock file exists and has no git process currently + - writing to it. This strongly suggests it is a stale lock file. + - + - However, this could be on a network filesystem. Which is not very safe + - anyway (the assistant relies on being able to check when files have + - no writers to know when to commit them). Also, a few lock-file-ish + - things used by git are not kept open, particularly MERGE_HEAD. + - + - So, just in case, when the lock file appears stale, we delay for one + - minute, and check its size. If the size changed, delay for another + - minute, and so on. This will at work to detect when another machine + - is writing out a new index file, since git does so by writing the + - new content to index.lock. + - + - Returns true if locks were cleaned up. + -} +repairStaleGitLocks :: Git.Repo -> Assistant Bool +repairStaleGitLocks r = do + lockfiles <- liftIO $ filter islock + <$> emptyWhenDoesNotExist (findgitfiles r) + repairStaleLocks lockfiles + return $ not $ null lockfiles + where + findgitfiles = dirContentsRecursiveSkipping (== dropTrailingPathSeparator (fromRawFilePath annexDir)) True . fromRawFilePath . Git.localGitDir + islock f + | "gc.pid" `isInfixOf` f = False + | ".lock" `isSuffixOf` f = True + | takeFileName f == "MERGE_HEAD" = True + | otherwise = False + +repairStaleLocks :: [FilePath] -> Assistant () +repairStaleLocks lockfiles = go =<< getsizes + where + getsize lf = catchMaybeIO $ (\s -> (lf, s)) + <$> getFileSize (toRawFilePath lf) + getsizes = liftIO $ catMaybes <$> mapM getsize lockfiles + go [] = return () + go l = ifM (liftIO $ null <$> Lsof.query ("--" : map fst l)) + ( do + waitforit "to check stale git lock file" + l' <- getsizes + if l' == l + then liftIO $ mapM_ (removeWhenExistsWith R.removeLink . toRawFilePath . fst) l + else go l' + , do + waitforit "for git lock file writer" + go =<< getsizes + ) + waitforit why = do + debug ["Waiting for 60 seconds", why] + liftIO $ threadDelaySeconds $ Seconds 60 diff --git a/Assistant/RepoProblem.hs b/Assistant/RepoProblem.hs new file mode 100644 index 0000000000..cf7318b7e6 --- /dev/null +++ b/Assistant/RepoProblem.hs @@ -0,0 +1,34 @@ +{- git-annex assistant remote problem handling + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.RepoProblem where + +import Assistant.Common +import Assistant.Types.RepoProblem +import Utility.TList + +import Control.Concurrent.STM + +{- Gets all repositories that have problems. Blocks until there is at + - least one. -} +getRepoProblems :: Assistant [RepoProblem] +getRepoProblems = nubBy sameRepoProblem + <$> (atomically . getTList) <<~ repoProblemChan + +{- Indicates that there was a problem with a repository, and the problem + - appears to not be a transient (eg network connection) problem. + - + - If the problem is able to be repaired, the passed action will be run. + - (However, if multiple problems are reported with a single repository, + - only a single action will be run.) + -} +repoHasProblem :: UUID -> Assistant () -> Assistant () +repoHasProblem u afterrepair = do + rp <- RepoProblem + <$> pure u + <*> asIO afterrepair + (atomically . flip consTList rp) <<~ repoProblemChan diff --git a/Assistant/Restart.hs b/Assistant/Restart.hs new file mode 100644 index 0000000000..65b6fe64aa --- /dev/null +++ b/Assistant/Restart.hs @@ -0,0 +1,119 @@ +{- git-annex assistant restarting + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Assistant.Restart where + +import Assistant.Common +import Assistant.Threads.Watcher +import Assistant.DaemonStatus +import Assistant.NamedThread +import Utility.ThreadScheduler +import Utility.NotificationBroadcaster +import Utility.Url +import Utility.Url.Parse +import Utility.PID +import qualified Utility.RawFilePath as R +import qualified Git.Construct +import qualified Git.Config +import qualified Annex +import qualified Git +import Annex.Path + +import Control.Concurrent +#ifndef mingw32_HOST_OS +import System.Posix (signalProcess, sigTERM) +#else +import System.Win32.Process (terminateProcessById) +#endif +import Network.URI + +{- Before the assistant can be restarted, have to remove our + - gitAnnexUrlFile and our gitAnnexPidFile. Pausing the watcher is also + - a good idea, to avoid fighting when two assistants are running in the + - same repo. + -} +prepRestart :: Assistant () +prepRestart = do + liftIO . maybe noop (`throwTo` PauseWatcher) =<< namedThreadId watchThread + liftIO . removeWhenExistsWith R.removeLink =<< liftAnnex (fromRepo gitAnnexUrlFile) + liftIO . removeWhenExistsWith R.removeLink =<< liftAnnex (fromRepo gitAnnexPidFile) + +{- To finish a restart, send a global redirect to the new url + - to any web browsers that are displaying the webapp. + - + - Wait for browser to update before terminating this process. -} +postRestart :: URLString -> Assistant () +postRestart url = do + modifyDaemonStatus_ $ \status -> status { globalRedirUrl = Just url } + liftIO . sendNotification . globalRedirNotifier =<< getDaemonStatus + void $ liftIO $ forkIO $ do + threadDelaySeconds (Seconds 120) + terminateSelf + +terminateSelf :: IO () +terminateSelf = +#ifndef mingw32_HOST_OS + signalProcess sigTERM =<< getPID +#else + terminateProcessById =<< getPID +#endif + +runRestart :: Assistant URLString +runRestart = liftIO . newAssistantUrl + =<< liftAnnex (Git.repoLocation <$> Annex.gitRepo) + +{- Starts up the assistant in the repository, and waits for it to create + - a gitAnnexUrlFile. Waits for the assistant to be up and listening for + - connections by testing the url. -} +newAssistantUrl :: FilePath -> IO URLString +newAssistantUrl repo = do + startAssistant repo + geturl + where + geturl = do + r <- Git.Config.read =<< Git.Construct.fromPath (toRawFilePath repo) + waiturl $ fromRawFilePath $ gitAnnexUrlFile r + waiturl urlfile = do + v <- tryIO $ readFile urlfile + case v of + Left _ -> delayed $ waiturl urlfile + Right url -> ifM (assistantListening url) + ( return url + , delayed $ waiturl urlfile + ) + delayed a = do + threadDelay 100000 -- 1/10th of a second + a + +{- Checks if the assistant is listening on an url. + - + - Always checks http, because https with self-signed cert is problematic. + - warp-tls listens to http, in order to show an error page, so this works. + -} +assistantListening :: URLString -> IO Bool +assistantListening url = catchBoolIO $ do + uo <- defUrlOptions + (== Right True) <$> exists url' uo + where + url' = case parseURIPortable url of + Nothing -> url + Just uri -> show $ uri + { uriScheme = "http:" + } + +{- Does not wait for assistant to be listening for web connections. + - + - On windows, the assistant does not daemonize, which is why the forkIO is + - done. + -} +startAssistant :: FilePath -> IO () +startAssistant repo = void $ forkIO $ do + program <- programPath + let p = (proc program ["assistant"]) { cwd = Just repo } + withCreateProcess p $ \_ _ _ pid -> void $ checkSuccessProcess pid diff --git a/Assistant/ScanRemotes.hs b/Assistant/ScanRemotes.hs new file mode 100644 index 0000000000..7cecc62cd6 --- /dev/null +++ b/Assistant/ScanRemotes.hs @@ -0,0 +1,41 @@ +{- git-annex assistant remotes needing scanning + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.ScanRemotes where + +import Assistant.Common +import Assistant.Types.ScanRemotes +import qualified Types.Remote as Remote + +import Data.Function +import Control.Concurrent.STM +import qualified Data.Map as M + +{- Blocks until there is a remote or remotes that need to be scanned. + - + - The list has higher priority remotes listed first. -} +getScanRemote :: Assistant [(Remote, ScanInfo)] +getScanRemote = do + v <- getAssistant scanRemoteMap + liftIO $ atomically $ + reverse . sortBy (compare `on` scanPriority . snd) . M.toList + <$> takeTMVar v + +{- Adds new remotes that need scanning. -} +addScanRemotes :: Bool -> [Remote] -> Assistant () +addScanRemotes _ [] = noop +addScanRemotes full rs = do + v <- getAssistant scanRemoteMap + liftIO $ atomically $ do + m <- fromMaybe M.empty <$> tryTakeTMVar v + putTMVar v $ M.unionWith merge (M.fromList $ zip rs (map info rs)) m + where + info r = ScanInfo (-1 * Remote.cost r) full + merge x y = ScanInfo + { scanPriority = max (scanPriority x) (scanPriority y) + , fullScan = fullScan x || fullScan y + } diff --git a/Assistant/Ssh.hs b/Assistant/Ssh.hs new file mode 100644 index 0000000000..3f472a5332 --- /dev/null +++ b/Assistant/Ssh.hs @@ -0,0 +1,412 @@ +{- git-annex assistant ssh utilities + - + - Copyright 2012-2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Ssh where + +import Annex.Common +import Utility.Tmp +import Utility.Tmp.Dir +import Utility.Shell +import Utility.Rsync +import Utility.FileMode +import Utility.SshConfig +import Git.Remote +import Utility.SshHost +import Utility.Process.Transcript + +import Data.Text (Text) +import qualified Data.Text as T +import Data.Char +import Network.URI + +data SshData = SshData + { sshHostName :: Text + , sshUserName :: Maybe Text + , sshDirectory :: Text + , sshRepoName :: String + , sshPort :: Int + , needsPubKey :: Bool + , sshCapabilities :: [SshServerCapability] + , sshRepoUrl :: Maybe String + } + deriving (Read, Show, Eq) + +data SshServerCapability + = GitAnnexShellCapable -- server has git-annex-shell installed + | GitCapable -- server has git installed + | RsyncCapable -- server supports raw rsync access (not only via git-annex-shell) + | PushCapable -- repo on server is set up already, and ready to accept pushes + deriving (Read, Show, Eq) + +hasCapability :: SshData -> SshServerCapability -> Bool +hasCapability d c = c `elem` sshCapabilities d + +addCapability :: SshData -> SshServerCapability -> SshData +addCapability d c = d { sshCapabilities = c : sshCapabilities d } + +onlyCapability :: SshData -> SshServerCapability -> Bool +onlyCapability d c = all (== c) (sshCapabilities d) + +type SshPubKey = String +type SshPrivKey = String + +data SshKeyPair = SshKeyPair + { sshPubKey :: SshPubKey + , sshPrivKey :: SshPrivKey + } + +instance Show SshKeyPair where + show = sshPubKey + +{- ssh -ofoo=bar command-line option -} +sshOpt :: String -> String -> String +sshOpt k v = concat ["-o", k, "=", v] + +{- user@host or host -} +genSshHost :: Text -> Maybe Text -> SshHost +genSshHost host user = either giveup id $ mkSshHost $ + maybe "" (\v -> T.unpack v ++ "@") user ++ T.unpack host + +{- Generates a ssh or rsync url from a SshData. -} +genSshUrl :: SshData -> String +genSshUrl sshdata = case sshRepoUrl sshdata of + Just repourl -> repourl + Nothing -> addtrailingslash $ T.unpack $ T.concat $ + if (onlyCapability sshdata RsyncCapable) + then [u, h, T.pack ":", sshDirectory sshdata] + else [T.pack "ssh://", u, h, d] + where + u = maybe (T.pack "") (\v -> T.concat [v, T.pack "@"]) $ sshUserName sshdata + h = sshHostName sshdata + d + | T.pack "/" `T.isPrefixOf` sshDirectory sshdata = sshDirectory sshdata + | T.pack "~/" `T.isPrefixOf` sshDirectory sshdata = T.concat [T.pack "/", sshDirectory sshdata] + | otherwise = T.concat [T.pack "/~/", sshDirectory sshdata] + addtrailingslash s + | "/" `isSuffixOf` s = s + | otherwise = s ++ "/" + +{- Reverses genSshUrl -} +parseSshUrl :: String -> Maybe SshData +parseSshUrl u + | "ssh://" `isPrefixOf` u = fromssh (drop (length "ssh://") u) + | otherwise = fromrsync u + where + mkdata (userhost, dir) = Just $ SshData + { sshHostName = T.pack host + , sshUserName = if null user then Nothing else Just $ T.pack user + , sshDirectory = T.pack dir + , sshRepoName = genSshRepoName host dir + -- dummy values, cannot determine from url + , sshPort = 22 + , needsPubKey = True + , sshCapabilities = [] + , sshRepoUrl = Nothing + } + where + (user, host) = if '@' `elem` userhost + then separate (== '@') userhost + else ("", userhost) + fromrsync s + | not (rsyncUrlIsShell u) = Nothing + | otherwise = mkdata $ separate (== ':') s + fromssh = mkdata . break (== '/') + +{- Generates a git remote name, like host_dir or host -} +genSshRepoName :: String -> FilePath -> String +genSshRepoName host dir + | null dir = makeLegalName host + | otherwise = makeLegalName $ host ++ "_" ++ dir + +{- The output of ssh, including both stdout and stderr. -} +sshTranscript :: [String] -> SshHost -> String -> (Maybe String) -> IO (String, Bool) +sshTranscript opts sshhost cmd input = processTranscript "ssh" + (opts ++ [fromSshHost sshhost, cmd]) input + +{- Ensure that the ssh public key doesn't include any ssh options, like + - command=foo, or other weirdness. + - + - The returned version of the key has its comment removed. + -} +validateSshPubKey :: SshPubKey -> Either String SshPubKey +validateSshPubKey pubkey + | length (lines pubkey) == 1 = check $ words pubkey + | otherwise = Left "too many lines in ssh public key" + where + check (prefix:key:_) = checkprefix prefix (unwords [prefix, key]) + check _ = err "wrong number of words in ssh public key" + + err msg = Left $ unwords [msg, pubkey] + + checkprefix prefix validpubkey + | ssh == "ssh" && all isAlphaNum keytype = Right validpubkey + | otherwise = err "bad ssh public key prefix" + where + (ssh, keytype) = separate (== '-') prefix + +addAuthorizedKeys :: Bool -> FilePath -> SshPubKey -> IO Bool +addAuthorizedKeys gitannexshellonly dir pubkey = boolSystem "sh" + [ Param "-c" , Param $ addAuthorizedKeysCommand gitannexshellonly dir pubkey ] + +{- Should only be used within the same process that added the line; + - the layout of the line is not kepy stable across versions. -} +removeAuthorizedKeys :: Bool -> FilePath -> SshPubKey -> IO () +removeAuthorizedKeys gitannexshellonly dir pubkey = do + let keyline = authorizedKeysLine gitannexshellonly dir pubkey + sshdir <- sshDir + let keyfile = sshdir "authorized_keys" + tryWhenExists (lines <$> readFileStrict keyfile) >>= \case + Just ls -> viaTmp writeSshConfig keyfile $ + unlines $ filter (/= keyline) ls + Nothing -> noop + +{- Implemented as a shell command, so it can be run on remote servers over + - ssh. + - + - The ~/.ssh/git-annex-shell wrapper script is created if not already + - present. + -} +addAuthorizedKeysCommand :: Bool -> FilePath -> SshPubKey -> String +addAuthorizedKeysCommand gitannexshellonly dir pubkey = intercalate "&&" + [ "mkdir -p ~/.ssh" + , intercalate "; " + [ "if [ ! -e " ++ wrapper ++ " ]" + , "then (" ++ intercalate ";" (map echoval script) ++ ") > " ++ wrapper + , "fi" + ] + , "chmod 700 " ++ wrapper + , "touch ~/.ssh/authorized_keys" + , "chmod 600 ~/.ssh/authorized_keys" + , unwords + [ "echo" + , shellEscape $ authorizedKeysLine gitannexshellonly dir pubkey + , ">>~/.ssh/authorized_keys" + ] + ] + where + echoval v = "echo " ++ shellEscape v + wrapper = "~/.ssh/git-annex-shell" + script = + [ shebang + , "set -e" + , "if [ \"x$SSH_ORIGINAL_COMMAND\" != \"x\" ]; then" + , runshell "$SSH_ORIGINAL_COMMAND" + , "else" + , runshell "$@" + , "fi" + ] + runshell var = "exec git-annex-shell -c \"" ++ var ++ "\"" + +authorizedKeysLine :: Bool -> FilePath -> SshPubKey -> String +authorizedKeysLine gitannexshellonly dir pubkey + | gitannexshellonly = limitcommand ++ pubkey + {- TODO: Locking down rsync is difficult, requiring a rather + - long perl script. -} + | otherwise = pubkey + where + limitcommand = "command=\"env GIT_ANNEX_SHELL_DIRECTORY="++shellEscape dir++" ~/.ssh/git-annex-shell\",no-agent-forwarding,no-port-forwarding,no-X11-forwarding,no-pty " + +{- Generates a ssh key pair. -} +genSshKeyPair :: IO SshKeyPair +genSshKeyPair = withTmpDir "git-annex-keygen" $ \dir -> do + ok <- boolSystem "ssh-keygen" + [ Param "-P", Param "" -- no password + , Param "-f", File $ dir "key" + ] + unless ok $ + giveup "ssh-keygen failed" + SshKeyPair + <$> readFile (dir "key.pub") + <*> readFile (dir "key") + +{- Installs a ssh key pair, and sets up ssh config with a mangled hostname + - that will enable use of the key. This way we avoid changing the user's + - regular ssh experience at all. Returns a modified SshData containing the + - mangled hostname. + - + - Note that the key files are put in ~/.ssh/git-annex/, rather than directly + - in ssh because of an **INSANE** behavior of gnome-keyring: It loads + - ~/.ssh/ANYTHING.pub, and uses them indiscriminately. But using this key + - for a normal login to the server will force git-annex-shell to run, + - and locks the user out. Luckily, it does not recurse into subdirectories. + - + - Similarly, IdentitiesOnly is set in the ssh config to prevent the + - ssh-agent from forcing use of a different key. + - + - Force strict host key checking to avoid repeated prompts + - when git-annex and git try to access the remote, if its + - host key has changed. + -} +installSshKeyPair :: SshKeyPair -> SshData -> IO SshData +installSshKeyPair sshkeypair sshdata = do + sshdir <- sshDir + createDirectoryIfMissing True $ fromRawFilePath $ + parentDir $ toRawFilePath $ sshdir sshPrivKeyFile sshdata + + unlessM (doesFileExist $ sshdir sshPrivKeyFile sshdata) $ + writeFileProtected (toRawFilePath (sshdir sshPrivKeyFile sshdata)) (sshPrivKey sshkeypair) + unlessM (doesFileExist $ sshdir sshPubKeyFile sshdata) $ + writeFile (sshdir sshPubKeyFile sshdata) (sshPubKey sshkeypair) + + setSshConfig sshdata + [ ("IdentityFile", "~/.ssh/" ++ sshPrivKeyFile sshdata) + , ("IdentitiesOnly", "yes") + , ("StrictHostKeyChecking", "yes") + ] + +sshPrivKeyFile :: SshData -> FilePath +sshPrivKeyFile sshdata = "git-annex" "key." ++ mangleSshHostName sshdata + +sshPubKeyFile :: SshData -> FilePath +sshPubKeyFile sshdata = sshPrivKeyFile sshdata ++ ".pub" + +{- Generates an installs a new ssh key pair if one is not already + - installed. Returns the modified SshData that will use the key pair, + - and the key pair. -} +setupSshKeyPair :: SshData -> IO (SshData, SshKeyPair) +setupSshKeyPair sshdata = do + sshdir <- sshDir + mprivkey <- catchMaybeIO $ readFile (sshdir sshPrivKeyFile sshdata) + mpubkey <- catchMaybeIO $ readFile (sshdir sshPubKeyFile sshdata) + keypair <- case (mprivkey, mpubkey) of + (Just privkey, Just pubkey) -> return $ SshKeyPair + { sshPubKey = pubkey + , sshPrivKey = privkey + } + _ -> genSshKeyPair + sshdata' <- installSshKeyPair keypair sshdata + return (sshdata', keypair) + +{- Fixes git-annex ssh key pairs configured in .ssh/config + - by old versions to set IdentitiesOnly. + - + - Strategy: Search for IdentityFile lines with key.git-annex + - in their names. These are for git-annex ssh key pairs. + - Add the IdentitiesOnly line immediately after them, if not already + - present. + -} +fixSshKeyPairIdentitiesOnly :: IO () +fixSshKeyPairIdentitiesOnly = changeUserSshConfig $ unlines . go [] . lines + where + go c [] = reverse c + go c (l:[]) + | all (`isInfixOf` l) indicators = go (fixedline l:l:c) [] + | otherwise = go (l:c) [] + go c (l:next:rest) + | all (`isInfixOf` l) indicators && not ("IdentitiesOnly" `isInfixOf` next) = + go (fixedline l:l:c) (next:rest) + | otherwise = go (l:c) (next:rest) + indicators = ["IdentityFile", "key.git-annex"] + fixedline tmpl = takeWhile isSpace tmpl ++ "IdentitiesOnly yes" + +{- Add StrictHostKeyChecking to any ssh config stanzas that were written + - by git-annex. -} +fixUpSshRemotes :: IO () +fixUpSshRemotes = modifyUserSshConfig (map go) + where + go c@(HostConfig h _) + | "git-annex-" `isPrefixOf` h = fixupconfig c + | otherwise = c + go other = other + + fixupconfig c = case findHostConfigKey c "StrictHostKeyChecking" of + Nothing -> addToHostConfig c "StrictHostKeyChecking" "yes" + Just _ -> c + +{- Setups up a ssh config with a mangled hostname. + - Returns a modified SshData containing the mangled hostname. -} +setSshConfig :: SshData -> [(String, String)] -> IO SshData +setSshConfig sshdata config = do + sshdir <- sshDir + createDirectoryIfMissing True sshdir + let configfile = sshdir "config" + unlessM (catchBoolIO $ isInfixOf mangledhost <$> readFile configfile) $ do + appendFile configfile $ unlines $ + [ "" + , "# Added automatically by git-annex" + , "Host " ++ mangledhost + ] ++ map (\(k, v) -> "\t" ++ k ++ " " ++ v) + (settings ++ config) + setSshConfigMode (toRawFilePath configfile) + + return $ sshdata + { sshHostName = T.pack mangledhost + , sshRepoUrl = replace orighost mangledhost + <$> sshRepoUrl sshdata + } + where + orighost = T.unpack $ sshHostName sshdata + mangledhost = mangleSshHostName sshdata + settings = + [ ("Hostname", orighost) + , ("Port", show $ sshPort sshdata) + ] + +{- This hostname is specific to a given repository on the ssh host, + - so it is based on the real hostname, the username, and the directory. + - + - The mangled hostname has the form: + - "git-annex-realhostname-username_port_dir" + - Note that "-" is only used in the realhostname and as a separator; + - this is necessary to allow unMangleSshHostName to work. + - + - Unusual characters are url encoded, but using "." rather than "%" + - (the latter has special meaning to ssh). + - + - In the username and directory, unusual characters are any + - non-alphanumerics, other than "_" + - + - The real hostname is not normally encoded at all. This is done for + - backwards compatibility and to avoid unnecessary ugliness in the + - filename. However, when it contains special characters + - (notably ":" which cannot be used on some filesystems), it is url + - encoded. To indicate it was encoded, the mangled hostname + - has the form + - "git-annex-.encodedhostname-username_port_dir" + -} +mangleSshHostName :: SshData -> String +mangleSshHostName sshdata = intercalate "-" + [ "git-annex" + , escapehostname (T.unpack (sshHostName sshdata)) + , escape extra + ] + where + extra = intercalate "_" $ map T.unpack $ catMaybes + [ sshUserName sshdata + , Just $ T.pack $ show $ sshPort sshdata + , Just $ sshDirectory sshdata + ] + safe c + | isAlphaNum c = True + | c == '_' = True + | otherwise = False + escape s = replace "%" "." $ escapeURIString safe s + escapehostname s + | all (\c -> c == '.' || safe c) s = s + | otherwise = '.' : escape s + +{- Extracts the real hostname from a mangled ssh hostname. -} +unMangleSshHostName :: String -> String +unMangleSshHostName h = case splitc '-' h of + ("git":"annex":rest) -> unescape (intercalate "-" (beginning rest)) + _ -> h + where + unescape ('.':s) = unEscapeString (replace "." "%" s) + unescape s = s + +{- Does ssh have known_hosts data for a hostname? -} +knownHost :: Text -> IO Bool +knownHost hostname = do + sshdir <- sshDir + ifM (doesFileExist $ sshdir "known_hosts") + ( not . null <$> checkhost + , return False + ) + where + {- ssh-keygen -F can crash on some old known_hosts file -} + checkhost = catchDefaultIO "" $ + readProcess "ssh-keygen" ["-F", T.unpack hostname] diff --git a/Assistant/Sync.hs b/Assistant/Sync.hs new file mode 100644 index 0000000000..48f805655d --- /dev/null +++ b/Assistant/Sync.hs @@ -0,0 +1,287 @@ +{- git-annex assistant repo syncing + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Assistant.Sync where + +import Assistant.Common +import Assistant.Pushes +import Assistant.Alert +import Assistant.Alert.Utility +import Assistant.DaemonStatus +import Assistant.ScanRemotes +import Assistant.RemoteControl +import qualified Command.Sync +import qualified Git +import qualified Git.Command +import qualified Remote +import qualified Types.Remote as Remote +import qualified Annex +import qualified Annex.Branch +import Remote.List.Util +import Annex.UUID +import Annex.TaggedPush +import Annex.Ssh +import Annex.CurrentBranch +import qualified Config +import Git.Config +import Config.DynamicConfig +import Assistant.NamedThread +import Assistant.Threads.Watcher (watchThread, WatcherControl(..)) +import Assistant.TransferSlots +import Assistant.TransferQueue +import Assistant.RepoProblem +import Assistant.Commits +import Types.Transfer +import Database.Export + +import Data.Time.Clock +import qualified Data.Map as M +import Control.Concurrent +import Control.Concurrent.Async + +{- Syncs with remotes that may have been disconnected for a while. + - + - First gets git in sync, and then prepares any necessary file transfers. + - + - An expensive full scan is queued when the git-annex branches of some of + - the remotes have diverged from the local git-annex branch. Otherwise, + - it's sufficient to requeue failed transfers. + - + - Also handles signaling any connectRemoteNotifiers, after the syncing is + - done, and records an export commit to make any exports be updated. + -} +reconnectRemotes :: [Remote] -> Assistant () +reconnectRemotes [] = recordExportCommit +reconnectRemotes rs = void $ do + rs' <- liftAnnex $ filterM (Remote.checkAvailable True) rs + unless (null rs') $ do + failedrs <- syncAction rs' (const go) + forM_ failedrs $ \r -> + whenM (liftAnnex $ Remote.checkAvailable False r) $ + repoHasProblem (Remote.uuid r) (syncRemote r) + mapM_ signal $ filter (`notElem` failedrs) rs' + recordExportCommit + where + gitremotes = liftAnnex $ + filterM (notspecialremote <$$> Remote.getRepo) rs + notspecialremote r + | Git.repoIsUrl r = True + | Git.repoIsLocal r = True + | Git.repoIsLocalUnknown r = True + | otherwise = False + syncbranch currentbranch@(Just _, _) = do + (failedpull, diverged) <- manualPull currentbranch =<< gitremotes + now <- liftIO getCurrentTime + failedpush <- pushToRemotes' now =<< gitremotes + return (nub $ failedpull ++ failedpush, diverged) + {- No local branch exists yet, but we can try pulling. -} + syncbranch (Nothing, _) = manualPull (Nothing, Nothing) =<< gitremotes + go = do + (failed, diverged) <- syncbranch =<< liftAnnex getCurrentBranch + addScanRemotes diverged =<< + filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . Remote.gitconfig) rs + return failed + signal r = liftIO . mapM_ (flip tryPutMVar ()) + =<< fromMaybe [] . M.lookup (Remote.uuid r) . connectRemoteNotifiers + <$> getDaemonStatus + +{- Pushes the local sync branch to all remotes, in + - parallel, along with the git-annex branch. This is the same + - as "git annex sync", except in parallel, and will co-exist with use of + - "git annex sync". + - + - Avoids running possibly long-duration commands in the Annex monad, so + - as not to block other threads. + - + - This can fail, when the remote's sync branch (or git-annex branch) has + - been updated by some other remote pushing into it, or by the remote + - itself. To handle failure, a manual pull and merge is done, and the push + - is retried. + - + - When there's a lot of activity, we may fail more than once. + - On the other hand, we may fail because the remote is not available. + - Rather than retrying indefinitely, after the first retry we enter a + - fallback mode, where our push is guaranteed to succeed if the remote is + - reachable. If the fallback fails, the push is queued to be retried + - later. + - + - Returns any remotes that it failed to push to. + -} +pushToRemotes :: [Remote] -> Assistant [Remote] +pushToRemotes remotes = do + now <- liftIO getCurrentTime + let remotes' = filter (wantpush . Remote.gitconfig) remotes + syncAction remotes' (pushToRemotes' now) + where + wantpush gc + | remoteAnnexReadOnly gc = False + | not (remoteAnnexPush gc) = False + | otherwise = True + +pushToRemotes' :: UTCTime -> [Remote] -> Assistant [Remote] +pushToRemotes' now remotes = do + (g, branch, u, ms) <- liftAnnex $ do + Annex.Branch.commit =<< Annex.Branch.commitMessage + (,,,) + <$> gitRepo + <*> getCurrentBranch + <*> getUUID + <*> Annex.getState Annex.output + ret <- go ms True branch g u remotes + return ret + where + go _ _ (Nothing, _) _ _ _ = return [] -- no branch, so nothing to do + go _ _ _ _ _ [] = return [] -- no remotes, so nothing to do + go ms shouldretry currbranch@(Just branch, _) g u rs = do + debug ["pushing to", show rs] + (succeeded, failed) <- parallelPush g rs (push ms branch) + updatemap succeeded [] + if null failed + then return [] + else if shouldretry + then retry ms currbranch g u failed + else fallback branch g u failed + + updatemap succeeded failed = do + v <- getAssistant failedPushMap + changeFailedPushMap v $ \m -> + M.union (makemap failed) $ + M.difference m (makemap succeeded) + makemap l = M.fromList $ zip l (repeat now) + + retry ms currbranch g u rs = do + debug ["trying manual pull to resolve failed pushes"] + void $ manualPull currbranch rs + go ms False currbranch g u rs + + fallback branch g u rs = do + debug ["fallback pushing to", show rs] + (succeeded, failed) <- parallelPush g rs (taggedPush u Nothing branch) + updatemap succeeded failed + return failed + + push ms branch remote = Command.Sync.pushBranch remote (Just branch) ms + +parallelPush :: Git.Repo -> [Remote] -> (Remote -> Git.Repo -> IO Bool)-> Assistant ([Remote], [Remote]) +parallelPush g rs a = do + rgs <- liftAnnex $ mapM topush rs + (succeededrgs, failedrgs) <- liftIO $ inParallel (uncurry a) rgs + return (map fst succeededrgs, map fst failedrgs) + where + topush r = (,) + <$> pure r + <*> (Remote.getRepo r >>= \repo -> + sshOptionsTo repo (Remote.gitconfig r) g) + +inParallel :: (v -> IO Bool) -> [v] -> IO ([v], [v]) +inParallel a l = (\(t,f) -> (map fst t, map fst f)) + . partition snd + . zip l <$> mapConcurrently a l + +{- Displays an alert while running an action that syncs with some remotes, + - and returns any remotes that it failed to sync with. + - + - Readonly remotes are also hidden (to hide the web special remote). + -} +syncAction :: [Remote] -> ([Remote] -> Assistant [Remote]) -> Assistant [Remote] +syncAction rs a + | null visibleremotes = a rs + | otherwise = do + i <- addAlert $ syncAlert visibleremotes + failed <- a rs + failed' <- filterM (not . Git.repoIsLocalUnknown <$$> liftAnnex . Remote.getRepo) failed + let succeeded = filter (`notElem` failed) visibleremotes + if null succeeded && null failed' + then removeAlert i + else updateAlertMap $ mergeAlert i $ + syncResultAlert succeeded failed' + return failed + where + visibleremotes = filter (not . Remote.readonly) rs + +{- Manually pull from remotes and merge their branches. Returns any + - remotes that it failed to pull from, and a Bool indicating + - whether the git-annex branches of the remotes and local had + - diverged before the pull. + -} +manualPull :: Command.Sync.CurrBranch -> [Remote] -> Assistant ([Remote], Bool) +manualPull currentbranch remotes = do + g <- liftAnnex gitRepo + -- Allow merging unrelated histories. + mc <- liftAnnex $ Command.Sync.mergeConfig True + failed <- forM remotes $ \r -> if wantpull $ Remote.gitconfig r + then do + g' <- liftAnnex $ do + repo <- Remote.getRepo r + sshOptionsTo repo (Remote.gitconfig r) g + ifM (liftIO $ Git.Command.runBool [Param "fetch", Param $ Remote.name r] g') + ( return Nothing + , return $ Just r + ) + else return Nothing + haddiverged <- liftAnnex Annex.Branch.forceUpdate >>= return . \case + u@(Annex.Branch.UpdateMade {}) -> Annex.Branch.refsWereMerged u + (Annex.Branch.UpdateFailedPermissions {}) -> True + forM_ remotes $ \r -> + liftAnnex $ Command.Sync.mergeRemote r + currentbranch mc def + when haddiverged $ + updateExportTreeFromLogAll + return (catMaybes failed, haddiverged) + where + wantpull gc = remoteAnnexPull gc + +{- Start syncing a remote, using a background thread. -} +syncRemote :: Remote -> Assistant () +syncRemote remote = do + updateSyncRemotes + thread <- asIO $ do + reconnectRemotes [remote] + addScanRemotes True [remote] + void $ liftIO $ forkIO $ thread + +{- Use Nothing to change autocommit setting; or a remote to change + - its sync setting. -} +changeSyncable :: Maybe Remote -> Bool -> Assistant () +changeSyncable Nothing enable = do + liftAnnex $ Config.setConfig key (boolConfig enable) + liftIO . maybe noop (`throwTo` signal) + =<< namedThreadId watchThread + where + key = Config.annexConfig "autocommit" + signal + | enable = ResumeWatcher + | otherwise = PauseWatcher +changeSyncable (Just r) True = do + liftAnnex $ changeSyncFlag r True + syncRemote r + sendRemoteControl RELOAD +changeSyncable (Just r) False = do + liftAnnex $ changeSyncFlag r False + updateSyncRemotes + {- Stop all transfers to or from this remote. + - XXX Can't stop any ongoing scan, or git syncs. -} + void $ dequeueTransfers tofrom + mapM_ (cancelTransfer False) =<< + filter tofrom . M.keys . currentTransfers <$> getDaemonStatus + where + tofrom t = transferUUID t == Remote.uuid r + +changeSyncFlag :: Remote -> Bool -> Annex () +changeSyncFlag r enabled = do + repo <- Remote.getRepo r + let key = Config.remoteAnnexConfig repo "sync" + Config.setConfig key (boolConfig enabled) + remotesChanged + +updateExportTreeFromLogAll :: Assistant () +updateExportTreeFromLogAll = do + rs <- exportRemotes <$> getDaemonStatus + forM_ rs $ \r -> liftAnnex $ + openDb (Remote.uuid r) >>= updateExportTreeFromLog diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs new file mode 100644 index 0000000000..229ad17d1a --- /dev/null +++ b/Assistant/Threads/Committer.hs @@ -0,0 +1,520 @@ +{- git-annex assistant commit thread + - + - Copyright 2012-2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP, OverloadedStrings #-} + +module Assistant.Threads.Committer where + +import Assistant.Common +import Assistant.Changes +import Assistant.Types.Changes +import Assistant.Commits +import Assistant.Alert +import Assistant.DaemonStatus +import Assistant.TransferQueue +import Assistant.Drop +import Types.Transfer +import Logs.Location +import Utility.ThreadScheduler +import qualified Utility.Lsof as Lsof +import qualified Utility.DirWatcher as DirWatcher +import Types.KeySource +import Types.Command +import Config +import Annex.Content +import Annex.Ingest +import Annex.Link +import Annex.Perms +import Annex.CatFile +import Annex.InodeSentinal +import Annex.CurrentBranch +import Annex.FileMatcher +import qualified Annex +import qualified Annex.Queue +import qualified Annex.Branch +import Utility.InodeCache +import qualified Database.Keys +import qualified Command.Sync +import qualified Command.Add +import Config.GitConfig +import qualified Git.Branch +import Utility.Tuple +import Utility.Metered +import qualified Utility.RawFilePath as R + +import Data.Time.Clock +import qualified Data.Set as S +import qualified Data.Map as M +import Data.Either +import Control.Concurrent +import System.PosixCompat.Files (fileID, deviceID, fileMode) + +{- This thread makes git commits at appropriate times. -} +commitThread :: NamedThread +commitThread = namedThread "Committer" $ do + havelsof <- liftIO $ inSearchPath "lsof" + delayadd <- liftAnnex $ + fmap Seconds . annexDelayAdd <$> Annex.getGitConfig + largefilematcher <- liftAnnex largeFilesMatcher + annexdotfiles <- liftAnnex $ getGitConfigVal annexDotFiles + msg <- liftAnnex Command.Sync.commitMsg + lockdowndir <- liftAnnex $ fromRepo gitAnnexTmpWatcherDir + liftAnnex $ do + -- Clean up anything left behind by a previous process + -- on unclean shutdown. + void $ liftIO $ tryIO $ removeDirectoryRecursive + (fromRawFilePath lockdowndir) + void $ createAnnexDirectory lockdowndir + waitChangeTime $ \(changes, time) -> do + readychanges <- handleAdds (fromRawFilePath lockdowndir) havelsof largefilematcher annexdotfiles delayadd $ + simplifyChanges changes + if shouldCommit False time (length readychanges) readychanges + then do + debug + [ "committing" + , show (length readychanges) + , "changes" + ] + void $ alertWhile commitAlert $ + liftAnnex $ commitStaged msg + recordCommit + recordExportCommit + let numchanges = length readychanges + mapM_ checkChangeContent readychanges + return numchanges + else do + refill readychanges + return 0 + +refill :: [Change] -> Assistant () +refill [] = noop +refill cs = do + debug ["delaying commit of", show (length cs), "changes"] + refillChanges cs + +{- Wait for one or more changes to arrive to be committed, and then + - runs an action to commit them. If more changes arrive while this is + - going on, they're handled intelligently, batching up changes into + - large commits where possible, doing rename detection, and + - committing immediately otherwise. -} +waitChangeTime :: (([Change], UTCTime) -> Assistant Int) -> Assistant () +waitChangeTime a = waitchanges 0 + where + waitchanges lastcommitsize = do + -- Wait one one second as a simple rate limiter. + liftIO $ threadDelaySeconds (Seconds 1) + -- Now, wait until at least one change is available for + -- processing. + cs <- getChanges + handlechanges cs lastcommitsize + handlechanges changes lastcommitsize = do + let len = length changes + -- See if now's a good time to commit. + now <- liftIO getCurrentTime + scanning <- not . scanComplete <$> getDaemonStatus + case (lastcommitsize >= maxCommitSize, shouldCommit scanning now len changes, possiblyrename changes) of + (True, True, _) + | len > maxCommitSize -> + a (changes, now) >>= waitchanges + | otherwise -> aftermaxcommit changes + (_, True, False) -> + a (changes, now) >>= waitchanges + (_, True, True) -> do + morechanges <- getrelatedchanges changes + a (changes ++ morechanges, now) >>= waitchanges + _ -> do + refill changes + waitchanges lastcommitsize + + {- Did we perhaps only get one of the AddChange and RmChange pair + - that make up a file rename? Or some of the pairs that make up + - a directory rename? + -} + possiblyrename = all renamepart + + renamepart (PendingAddChange _ _) = True + renamepart c = isRmChange c + + {- Gets changes related to the passed changes, without blocking + - very long. + - + - If there are multiple RmChanges, this is probably a directory + - rename, in which case it may be necessary to wait longer to get + - all the Changes involved. + -} + getrelatedchanges oldchanges + | length (filter isRmChange oldchanges) > 1 = + concat <$> getbatchchanges [] + | otherwise = do + liftIO humanImperceptibleDelay + getAnyChanges + getbatchchanges cs = do + liftIO $ threadDelay $ fromIntegral $ oneSecond `div` 10 + cs' <- getAnyChanges + if null cs' + then return cs + else getbatchchanges (cs':cs) + + {- The last commit was maximum size, so it's very likely there + - are more changes and we'd like to ensure we make another commit + - of maximum size if possible. + - + - But, it can take a while for the Watcher to wake back up + - after a commit. It can get blocked by another thread + - that is using the Annex state, such as a git-annex branch + - commit. Especially after such a large commit, this can + - take several seconds. When this happens, it defeats the + - normal commit batching, which sees some old changes the + - Watcher found while the commit was being prepared, and sees + - no recent ones, and wants to commit immediately. + - + - All that we need to do, then, is wait for the Watcher to + - wake up, and queue up one more change. + - + - However, it's also possible that we're at the end of changes for + - now. So to avoid waiting a really long time before committing + - those changes we have, poll for up to 30 seconds, and then + - commit them. + - + - Also, try to run something in Annex, to ensure we block + - longer if the Annex state is indeed blocked. + -} + aftermaxcommit oldchanges = loop (30 :: Int) + where + loop 0 = continue oldchanges + loop n = do + liftAnnex noop -- ensure Annex state is free + liftIO $ threadDelaySeconds (Seconds 1) + changes <- getAnyChanges + if null changes + then loop (n - 1) + else continue (oldchanges ++ changes) + continue cs + | null cs = waitchanges 0 + | otherwise = handlechanges cs 0 + +isRmChange :: Change -> Bool +isRmChange (Change { changeInfo = i }) | i == RmChange = True +isRmChange _ = False + +{- An amount of time that is hopefully imperceptably short for humans, + - while long enough for a computer to get some work done. + - Note that 0.001 is a little too short for rename change batching to + - work. -} +humanImperceptibleInterval :: NominalDiffTime +humanImperceptibleInterval = 0.01 + +humanImperceptibleDelay :: IO () +humanImperceptibleDelay = threadDelay $ + truncate $ humanImperceptibleInterval * fromIntegral oneSecond + +maxCommitSize :: Int +maxCommitSize = 5000 + +{- Decide if now is a good time to make a commit. + - Note that the list of changes has a random order. + - + - Current strategy: If there have been 10 changes within the past second, + - a batch activity is taking place, so wait for later. + -} +shouldCommit :: Bool -> UTCTime -> Int -> [Change] -> Bool +shouldCommit scanning now len changes + | scanning = len >= maxCommitSize + | len == 0 = False + | len >= maxCommitSize = True + | length recentchanges < 10 = True + | otherwise = False -- batch activity + where + thissecond c = timeDelta c <= 1 + recentchanges = filter thissecond changes + timeDelta c = now `diffUTCTime` changeTime c + +commitStaged :: String -> Annex Bool +commitStaged msg = do + {- This could fail if there's another commit being made by + - something else. -} + v <- tryNonAsync Annex.Queue.flush + case v of + Left _ -> return False + Right _ -> do + cmode <- annexCommitMode <$> Annex.getGitConfig + ok <- inRepo $ Git.Branch.commitCommand cmode + (Git.Branch.CommitQuiet True) + [ Param "-m" + , Param msg + ] + when ok $ + Command.Sync.updateBranches =<< getCurrentBranch + {- Commit the git-annex branch. This comes after + - the commit of the staged changes, so that + - annex.commitmessage-command can examine that + - commit. -} + Annex.Branch.commit =<< Annex.Branch.commitMessage + return ok + +{- If there are PendingAddChanges, or InProcessAddChanges, the files + - have not yet actually been added, and that has to be done + - now, before committing. + - + - Deferring the adds to this point causes batches to be bundled together, + - which allows faster checking with lsof that the files are not still open + - for write by some other process, and faster checking with git-ls-files + - that the files are not already checked into git. + - + - When a file is added in locked mode, Inotify will notice the new symlink. + - So this waits for additional Changes to arrive, so that the symlink has + - hopefully been staged before returning, and will be committed immediately. + - (OTOH, for kqueue, eventsCoalesce, so instead the symlink is directly + - created and staged.) + - + - Returns a list of all changes that are ready to be committed. + - Any pending adds that are not ready yet are put back into the ChangeChan, + - where they will be retried later. + -} +handleAdds :: FilePath -> Bool -> GetFileMatcher -> Bool -> Maybe Seconds -> [Change] -> Assistant [Change] +handleAdds lockdowndir havelsof largefilematcher annexdotfiles delayadd cs = returnWhen (null incomplete) $ do + let (pending, inprocess) = partition isPendingAddChange incomplete + let lockdownconfig = LockDownConfig + { lockingFile = False + , hardlinkFileTmpDir = Just (toRawFilePath lockdowndir) + , checkWritePerms = True + } + (postponed, toadd) <- partitionEithers + <$> safeToAdd lockdowndir lockdownconfig havelsof delayadd pending inprocess + + unless (null postponed) $ + refillChanges postponed + + returnWhen (null toadd) $ do + (addedpointerfiles, toaddrest) <- partitionEithers + <$> mapM checkpointerfile toadd + (toaddannexed, toaddsmall) <- partitionEithers + <$> mapM checksmall toaddrest + addsmall toaddsmall + addedannexed <- addaction toadd $ + catMaybes <$> addannexed toaddannexed + return $ addedannexed ++ toaddsmall ++ addedpointerfiles ++ otherchanges + where + (incomplete, otherchanges) = partition (\c -> isPendingAddChange c || isInProcessAddChange c) cs + + returnWhen c a + | c = return otherchanges + | otherwise = a + + checkpointerfile change = do + let file = toRawFilePath $ changeFile change + mk <- liftIO $ isPointerFile file + case mk of + Nothing -> return (Right change) + Just key -> do + mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus file + liftAnnex $ stagePointerFile file mode =<< hashPointerFile key + return $ Left $ Change + (changeTime change) + (changeFile change) + (LinkChange (Just key)) + + checksmall change + | not annexdotfiles && dotfile f = + return (Right change) + | otherwise = + ifM (liftAnnex $ checkFileMatcher largefilematcher f) + ( return (Left change) + , return (Right change) + ) + where + f = toRawFilePath (changeFile change) + + addsmall [] = noop + addsmall toadd = liftAnnex $ void $ tryIO $ + forM (map (toRawFilePath . changeFile) toadd) $ \f -> + Command.Add.addFile Command.Add.Small f + =<< liftIO (R.getSymbolicLinkStatus f) + + {- Avoid overhead of re-injesting a renamed unlocked file, by + - examining the other Changes to see if a removed file has the + - same InodeCache as the new file. If so, we can just update + - bookkeeping, and stage the file in git. + -} + addannexed :: [Change] -> Assistant [Maybe Change] + addannexed [] = return [] + addannexed toadd = do + ct <- liftAnnex compareInodeCachesWith + m <- liftAnnex $ removedKeysMap ct cs + delta <- liftAnnex getTSDelta + let cfg = LockDownConfig + { lockingFile = False + , hardlinkFileTmpDir = Just (toRawFilePath lockdowndir) + , checkWritePerms = True + } + if M.null m + then forM toadd (addannexed' cfg) + else forM toadd $ \c -> do + mcache <- liftIO $ genInodeCache (toRawFilePath (changeFile c)) delta + case mcache of + Nothing -> addannexed' cfg c + Just cache -> + case M.lookup (inodeCacheToKey ct cache) m of + Nothing -> addannexed' cfg c + Just k -> fastadd c k + + addannexed' :: LockDownConfig -> Change -> Assistant (Maybe Change) + addannexed' lockdownconfig change@(InProcessAddChange { lockedDown = ld }) = + catchDefaultIO Nothing <~> doadd + where + ks = keySource ld + doadd = sanitycheck ks $ do + (mkey, _mcache) <- liftAnnex $ do + showStartMessage (StartMessage "add" (ActionItemOther (Just (QuotedPath (keyFilename ks)))) (SeekInput [])) + ingest nullMeterUpdate (Just $ LockedDown lockdownconfig ks) Nothing + maybe (failedingest change) (done change $ fromRawFilePath $ keyFilename ks) mkey + addannexed' _ _ = return Nothing + + fastadd :: Change -> Key -> Assistant (Maybe Change) + fastadd change key = do + let source = keySource $ lockedDown change + liftAnnex $ finishIngestUnlocked key source + done change (fromRawFilePath $ keyFilename source) key + + removedKeysMap :: InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key) + removedKeysMap ct l = do + mks <- forM (filter isRmChange l) $ \c -> + catKeyFile $ toRawFilePath $ changeFile c + M.fromList . concat <$> mapM mkpairs (catMaybes mks) + where + mkpairs k = map (\c -> (inodeCacheToKey ct c, k)) <$> + Database.Keys.getInodeCaches k + + failedingest change = do + refill [retryChange change] + liftAnnex showEndFail + return Nothing + + done change file key = liftAnnex $ do + logStatus key InfoPresent + mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus (toRawFilePath file) + stagePointerFile (toRawFilePath file) mode =<< hashPointerFile key + showEndOk + return $ Just $ finishedChange change key + + {- Check that the keysource's keyFilename still exists, + - and is still a hard link to its contentLocation, + - before ingesting it. -} + sanitycheck keysource a = do + fs <- liftIO $ R.getSymbolicLinkStatus $ keyFilename keysource + ks <- liftIO $ R.getSymbolicLinkStatus $ contentLocation keysource + if deviceID ks == deviceID fs && fileID ks == fileID fs + then a + else do + -- remove the hard link + when (contentLocation keysource /= keyFilename keysource) $ + void $ liftIO $ tryIO $ removeFile $ fromRawFilePath $ contentLocation keysource + return Nothing + + {- Shown an alert while performing an action to add a file or + - files. When only a few files are added, their names are shown + - in the alert. When it's a batch add, the number of files added + - is shown. + - + - Add errors tend to be transient and will be + - automatically dealt with, so the alert is always told + - the add succeeded. + -} + addaction [] a = a + addaction toadd a = alertWhile' (addFileAlert $ map changeFile toadd) $ + (,) + <$> pure True + <*> a + +{- Files can Either be Right to be added now, + - or are unsafe, and must be Left for later. + - + - Check by running lsof on the repository. + -} +safeToAdd :: FilePath -> LockDownConfig -> Bool -> Maybe Seconds -> [Change] -> [Change] -> Assistant [Either Change Change] +safeToAdd _ _ _ _ [] [] = return [] +safeToAdd lockdowndir lockdownconfig havelsof delayadd pending inprocess = do + maybe noop (liftIO . threadDelaySeconds) delayadd + liftAnnex $ do + lockeddown <- forM pending $ lockDown lockdownconfig . changeFile + let inprocess' = inprocess ++ mapMaybe mkinprocess (zip pending lockeddown) + openfiles <- if havelsof + then S.fromList . map fst3 . filter openwrite <$> + findopenfiles (map (keySource . lockedDown) inprocess') + else pure S.empty + let checked = map (check openfiles) inprocess' + + {- If new events are received when files are closed, + - there's no need to retry any changes that cannot + - be done now. -} + if DirWatcher.closingTracked + then do + mapM_ canceladd $ lefts checked + allRight $ rights checked + else return checked + where + check openfiles change@(InProcessAddChange { lockedDown = ld }) + | S.member (fromRawFilePath (contentLocation (keySource ld))) openfiles = Left change + check _ change = Right change + + mkinprocess (c, Just ld) = Just InProcessAddChange + { changeTime = changeTime c + , lockedDown = ld + } + mkinprocess (_, Nothing) = Nothing + + canceladd (InProcessAddChange { lockedDown = ld }) = do + let ks = keySource ld + warning $ QuotedPath (keyFilename ks) + <> " still has writers, not adding" + -- remove the hard link + when (contentLocation ks /= keyFilename ks) $ + void $ liftIO $ tryIO $ removeFile $ fromRawFilePath $ contentLocation ks + canceladd _ = noop + + openwrite (_file, mode, _pid) + | mode == Lsof.OpenWriteOnly = True + | mode == Lsof.OpenReadWrite = True + | mode == Lsof.OpenUnknown = True + | otherwise = False + + allRight = return . map Right + + {- Normally the KeySources are locked down inside the lockdowndir, + - so can just lsof that, which is quite efficient. + - + - In crippled filesystem mode, there is no lock down, so must run lsof + - on each individual file. + -} + findopenfiles keysources = ifM crippledFileSystem + ( liftIO $ do + let segments = segmentXargsUnordered $ + map (fromRawFilePath . keyFilename) keysources + concat <$> forM segments (\fs -> Lsof.query $ "--" : fs) + , liftIO $ Lsof.queryDir lockdowndir + ) + +{- After a Change is committed, queue any necessary transfers or drops + - of the content of the key. + - + - This is not done during the startup scan, because the expensive + - transfer scan does the same thing then. + -} +checkChangeContent :: Change -> Assistant () +checkChangeContent change@(Change { changeInfo = i }) = + case changeInfoKey i of + Nothing -> noop + Just k -> whenM (scanComplete <$> getDaemonStatus) $ do + present <- liftAnnex $ inAnnex k + void $ if present + then queueTransfers "new file created" Next k af Upload + else queueTransfers "new or renamed file wanted" Next k af Download + handleDrops "file renamed" present k af [] + where + f = changeFile change + af = AssociatedFile (Just (toRawFilePath f)) +checkChangeContent _ = noop diff --git a/Assistant/Threads/ConfigMonitor.hs b/Assistant/Threads/ConfigMonitor.hs new file mode 100644 index 0000000000..9f1e03f8d1 --- /dev/null +++ b/Assistant/Threads/ConfigMonitor.hs @@ -0,0 +1,95 @@ +{- git-annex assistant config monitor thread + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Threads.ConfigMonitor where + +import Assistant.Common +import Assistant.BranchChange +import Assistant.DaemonStatus +import Assistant.Commits +import Utility.ThreadScheduler +import Logs +import Logs.UUID +import Logs.Trust +import Logs.PreferredContent +import Logs.Group +import Logs.NumCopies +import Remote.List.Util +import qualified Git.LsTree as LsTree +import Git.Types +import Git.FilePath +import qualified Annex.Branch +import Annex.FileMatcher + +import qualified Data.Set as S + +{- This thread detects when configuration changes have been made to the + - git-annex branch and reloads cached configuration. + - + - If the branch is frequently changing, it's checked for configuration + - changes no more often than once every 60 seconds. On the other hand, + - if the branch has not changed in a while, configuration changes will + - be detected immediately. + -} +configMonitorThread :: NamedThread +configMonitorThread = namedThread "ConfigMonitor" $ loop =<< getConfigs + where + loop old = do + waitBranchChange + new <- getConfigs + when (old /= new) $ do + let changedconfigs = new `S.difference` old + debug $ "reloading config" : + map (fromRawFilePath . fst) + (S.toList changedconfigs) + reloadConfigs new + {- Record a commit to get this config + - change pushed out to remotes. -} + recordCommit + liftIO $ threadDelaySeconds (Seconds 60) + loop new + +{- Config files, and their checksums. -} +type Configs = S.Set (RawFilePath, Sha) + +{- All git-annex's config files, and actions to run when they change. -} +configFilesActions :: [(RawFilePath, Assistant ())] +configFilesActions = + [ (uuidLog, void $ liftAnnex uuidDescMapLoad) + , (remoteLog, void $ liftAnnex remotesChanged) + , (trustLog, void $ liftAnnex trustMapLoad) + , (groupLog, void $ liftAnnex groupMapLoad) + , (numcopiesLog, void $ liftAnnex globalNumCopiesLoad) + , (mincopiesLog, void $ liftAnnex globalMinCopiesLoad) + , (scheduleLog, void updateScheduleLog) + -- Preferred and required content settings depend on most of the + -- other configs, so will be reloaded whenever any configs change. + , (preferredContentLog, noop) + , (requiredContentLog, noop) + , (groupPreferredContentLog, noop) + ] + +reloadConfigs :: Configs -> Assistant () +reloadConfigs changedconfigs = do + sequence_ as + void $ liftAnnex $ preferredRequiredMapsLoad preferredContentTokens + {- Changes to the remote log, or the trust log, can affect the + - syncRemotes list. Changes to the uuid log may affect its + - display so are also included. -} + when (any (`elem` fs) [remoteLog, trustLog, uuidLog]) + updateSyncRemotes + where + (fs, as) = unzip $ filter (flip S.member changedfiles . fst) + configFilesActions + changedfiles = S.map fst changedconfigs + +getConfigs :: Assistant Configs +getConfigs = S.fromList . map extract + <$> liftAnnex (inRepo $ LsTree.lsTreeFiles (LsTree.LsTreeLong False) Annex.Branch.fullname files) + where + files = map (fromRawFilePath . fst) configFilesActions + extract treeitem = (getTopFilePath $ LsTree.file treeitem, LsTree.sha treeitem) diff --git a/Assistant/Threads/Cronner.hs b/Assistant/Threads/Cronner.hs new file mode 100644 index 0000000000..c3dd8acfb5 --- /dev/null +++ b/Assistant/Threads/Cronner.hs @@ -0,0 +1,225 @@ +{- git-annex assistant scheduled jobs runner + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE DeriveDataTypeable #-} + +module Assistant.Threads.Cronner ( + cronnerThread +) where + +import Assistant.Common +import Assistant.DaemonStatus +import Utility.NotificationBroadcaster +import Annex.UUID +import Annex.Path +import Logs.Schedule +import Utility.Scheduled +import Types.ScheduledActivity +import Utility.ThreadScheduler +import Utility.HumanTime +import Utility.Batch +import Assistant.TransferQueue +import Annex.Content +import Types.Transfer +import Assistant.Types.UrlRenderer +import Assistant.Alert +import Remote +import qualified Types.Remote as Remote +import qualified Git +import qualified Git.Fsck +import Assistant.Fsck +import Assistant.Repair + +import Control.Concurrent.Async +import Control.Concurrent.MVar +import Data.Time.LocalTime +import Data.Time.Clock +import qualified Data.Map as M +import qualified Data.Set as S + +{- Loads schedules for this repository, and fires off one thread for each + - scheduled event that runs on this repository. Each thread sleeps until + - its event is scheduled to run. + - + - To handle events that run on remotes, which need to only run when + - their remote gets connected, threads are also started, and are passed + - a MVar to wait on, which is stored in the DaemonStatus's + - connectRemoteNotifiers. + - + - In the meantime the main thread waits for any changes to the + - schedules. When there's a change, compare the old and new list of + - schedules to find deleted and added ones. Start new threads for added + - ones, and kill the threads for deleted ones. -} +cronnerThread :: UrlRenderer -> NamedThread +cronnerThread urlrenderer = namedThreadUnchecked "Cronner" $ do + fsckNudge urlrenderer Nothing + dstatus <- getDaemonStatus + h <- liftIO $ newNotificationHandle False (scheduleLogNotifier dstatus) + go h M.empty M.empty + where + go h amap nmap = do + activities <- liftAnnex $ scheduleGet =<< getUUID + + let addedactivities = activities `S.difference` M.keysSet amap + let removedactivities = M.keysSet amap `S.difference` activities + + forM_ (S.toList removedactivities) $ \activity -> + case M.lookup activity amap of + Just a -> do + debug ["stopping removed job for", fromScheduledActivity activity, show (asyncThreadId a)] + liftIO $ cancel a + Nothing -> noop + + lastruntimes <- liftAnnex getLastRunTimes + started <- startactivities (S.toList addedactivities) lastruntimes + let addedamap = M.fromList $ map fst started + let addednmap = M.fromList $ catMaybes $ map snd started + + let removefiltered = M.filterWithKey (\k _ -> S.member k removedactivities) + let amap' = M.difference (M.union addedamap amap) (removefiltered amap) + let nmap' = M.difference (M.union addednmap nmap) (removefiltered nmap) + modifyDaemonStatus_ $ \s -> s { connectRemoteNotifiers = M.fromListWith (++) (M.elems nmap') } + + liftIO $ waitNotification h + debug ["reloading changed activities"] + go h amap' nmap' + startactivities as lastruntimes = forM as $ \activity -> + case connectActivityUUID activity of + Nothing -> do + runner <- asIO2 (sleepingActivityThread urlrenderer) + a <- liftIO $ async $ + runner activity (M.lookup activity lastruntimes) + return ((activity, a), Nothing) + Just u -> do + mvar <- liftIO newEmptyMVar + runner <- asIO2 (remoteActivityThread urlrenderer mvar) + a <- liftIO $ async $ + runner activity (M.lookup activity lastruntimes) + return ((activity, a), Just (activity, (u, [mvar]))) + +{- Calculate the next time the activity is scheduled to run, then + - sleep until that time, and run it. Then call setLastRunTime, and + - loop. + -} +sleepingActivityThread :: UrlRenderer -> ScheduledActivity -> Maybe LocalTime -> Assistant () +sleepingActivityThread urlrenderer activity lasttime = go lasttime =<< getnexttime lasttime + where + getnexttime = liftIO . nextTime schedule + go _ Nothing = debug ["no scheduled events left for", desc] + go l (Just (NextTimeExactly t)) = waitrun l t Nothing + go l (Just (NextTimeWindow windowstart windowend)) = + waitrun l windowstart (Just windowend) + desc = fromScheduledActivity activity + schedule = getSchedule activity + waitrun l t mmaxt = do + seconds <- liftIO $ secondsUntilLocalTime t + when (seconds > Seconds 0) $ do + debug ["waiting", show seconds, "for next scheduled", desc] + liftIO $ threadDelaySeconds seconds + now <- liftIO getCurrentTime + tz <- liftIO $ getTimeZone now + let nowt = utcToLocalTime tz now + if tolate nowt tz + then do + debug ["too late to run scheduled", desc] + go l =<< getnexttime l + else run nowt + where + tolate nowt tz = case mmaxt of + Just maxt -> nowt > maxt + -- allow the job to start 10 minutes late + Nothing -> diffUTCTime + (localTimeToUTC tz nowt) + (localTimeToUTC tz t) > 600 + run nowt = do + runActivity urlrenderer activity nowt + go (Just nowt) =<< getnexttime (Just nowt) + +{- Wait for the remote to become available by waiting on the MVar. + - Then check if the time is within a time window when activity + - is scheduled to run, and if so run it. + - Otherwise, just wait again on the MVar. + -} +remoteActivityThread :: UrlRenderer -> MVar () -> ScheduledActivity -> Maybe LocalTime -> Assistant () +remoteActivityThread urlrenderer mvar activity lasttime = do + liftIO $ takeMVar mvar + go =<< liftIO (nextTime (getSchedule activity) lasttime) + where + go (Just (NextTimeWindow windowstart windowend)) = do + now <- liftIO getCurrentTime + tz <- liftIO $ getTimeZone now + if now >= localTimeToUTC tz windowstart && now <= localTimeToUTC tz windowend + then do + let nowt = utcToLocalTime tz now + runActivity urlrenderer activity nowt + loop (Just nowt) + else loop lasttime + go _ = noop -- running at exact time not handled here + loop = remoteActivityThread urlrenderer mvar activity + +secondsUntilLocalTime :: LocalTime -> IO Seconds +secondsUntilLocalTime t = do + now <- getCurrentTime + tz <- getTimeZone now + let secs = truncate $ diffUTCTime (localTimeToUTC tz t) now + return $ if secs > 0 + then Seconds secs + else Seconds 0 + +runActivity :: UrlRenderer -> ScheduledActivity -> LocalTime -> Assistant () +runActivity urlrenderer activity nowt = do + debug ["starting", desc] + runActivity' urlrenderer activity + debug ["finished", desc] + liftAnnex $ setLastRunTime activity nowt + where + desc = fromScheduledActivity activity + +runActivity' :: UrlRenderer -> ScheduledActivity -> Assistant () +runActivity' urlrenderer (ScheduledSelfFsck _ d) = do + program <- liftIO programPath + g <- liftAnnex gitRepo + fsckresults <- showFscking urlrenderer Nothing $ tryNonAsync $ do + void $ batchCommand program (Param "fsck" : annexFsckParams d) + Git.Fsck.findBroken True False g + u <- liftAnnex getUUID + void $ repairWhenNecessary urlrenderer u Nothing fsckresults + mapM_ reget =<< liftAnnex (dirKeys gitAnnexBadDir) + where + reget k = queueTransfers "fsck found bad file; redownloading" Next k (AssociatedFile Nothing) Download +runActivity' urlrenderer (ScheduledRemoteFsck u s d) = dispatch =<< liftAnnex (remoteFromUUID u) + where + dispatch Nothing = debug ["skipping remote fsck of uuid without a configured remote", fromUUID u, fromSchedule s] + dispatch (Just rmt) = void $ case Remote.remoteFsck rmt of + Nothing -> go rmt $ do + program <- programPath + void $ batchCommand program $ + [ Param "fsck" + -- avoid downloading files + , Param "--fast" + , Param "--from" + , Param $ Remote.name rmt + ] ++ annexFsckParams d + Just mkfscker -> do + {- Note that having mkfsker return an IO action + - avoids running a long duration fsck in the + - Annex monad. -} + go rmt =<< liftAnnex (mkfscker (annexFsckParams d)) + go rmt annexfscker = do + repo <- liftAnnex $ Remote.getRepo rmt + fsckresults <- showFscking urlrenderer (Just rmt) $ tryNonAsync $ do + void annexfscker + if Git.repoIsLocal repo && not (Git.repoIsLocalUnknown repo) + then Just <$> Git.Fsck.findBroken True True repo + else pure Nothing + maybe noop (void . repairWhenNecessary urlrenderer u (Just rmt)) fsckresults + +annexFsckParams :: Duration -> [CommandParam] +annexFsckParams d = + [ Param "--incremental-schedule=1d" + , Param $ "--time-limit=" ++ fromDuration d + ] diff --git a/Assistant/Threads/DaemonStatus.hs b/Assistant/Threads/DaemonStatus.hs new file mode 100644 index 0000000000..0b14ca737f --- /dev/null +++ b/Assistant/Threads/DaemonStatus.hs @@ -0,0 +1,29 @@ +{- git-annex assistant daemon status thread + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Threads.DaemonStatus where + +import Assistant.Common +import Assistant.DaemonStatus +import Utility.ThreadScheduler +import Utility.NotificationBroadcaster + +{- This writes the daemon status to disk, when it changes, but no more + - frequently than once every ten minutes. + -} +daemonStatusThread :: NamedThread +daemonStatusThread = namedThread "DaemonStatus" $ do + notifier <- liftIO . newNotificationHandle False + =<< changeNotifier <$> getDaemonStatus + checkpoint + runEvery (Seconds tenMinutes) <~> do + liftIO $ waitNotification notifier + checkpoint + where + checkpoint = do + file <- liftAnnex $ fromRepo gitAnnexDaemonStatusFile + liftIO . writeDaemonStatusFile file =<< getDaemonStatus diff --git a/Assistant/Threads/Exporter.hs b/Assistant/Threads/Exporter.hs new file mode 100644 index 0000000000..20a252baff --- /dev/null +++ b/Assistant/Threads/Exporter.hs @@ -0,0 +1,80 @@ +{- git-annex assistant export updating thread + - + - Copyright 2017 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Threads.Exporter where + +import Assistant.Common +import Assistant.Commits +import Assistant.Pushes +import Assistant.DaemonStatus +import Annex.Concurrent +import Annex.CurrentBranch +import Utility.ThreadScheduler +import qualified Annex +import qualified Remote +import qualified Types.Remote as Remote +import qualified Command.Sync + +import Control.Concurrent.Async +import Data.Time.Clock +import qualified Data.Map as M + +{- This thread retries exports that failed before. -} +exportRetryThread :: NamedThread +exportRetryThread = namedThread "ExportRetrier" $ runEvery (Seconds halfhour) <~> do + -- We already waited half an hour, now wait until there are failed + -- exports to retry. + toexport <- getFailedPushesBefore (fromIntegral halfhour) + =<< getAssistant failedExportMap + unless (null toexport) $ do + debug ["retrying", show (length toexport), "failed exports"] + void $ exportToRemotes toexport + where + halfhour = 1800 + +{- This thread updates exports soon after git commits are made. -} +exportThread :: NamedThread +exportThread = namedThread "Exporter" $ runEvery (Seconds 30) <~> do + -- We already waited two seconds as a simple rate limiter. + -- Next, wait until at least one commit has been made + void getExportCommits + -- Now see if now's a good time to push. + void $ exportToRemotes =<< exportTargets + +{- We want to avoid exporting to remotes that are marked readonly. + - + - Also, avoid exporting to local remotes we can easily tell are not available, + - to avoid ugly messages when a removable drive is not attached. + -} +exportTargets :: Assistant [Remote] +exportTargets = liftAnnex . filterM (Remote.checkAvailable True) + =<< candidates <$> getDaemonStatus + where + candidates = filter (not . Remote.readonly) . exportRemotes + +exportToRemotes :: [Remote] -> Assistant () +exportToRemotes rs = do + -- This is a long-duration action which runs in the Annex monad, + -- so don't just liftAnnex to run it; fork the Annex state. + runner <- liftAnnex $ forkState $ + forM rs $ \r -> do + Annex.changeState $ \st -> st { Annex.errcounter = 0 } + start <- liftIO getCurrentTime + void $ Command.Sync.seekExportContent Nothing rs + =<< getCurrentBranch + -- Look at command error counter to see if the export + -- didn't work. + failed <- (> 0) <$> Annex.getState Annex.errcounter + Annex.changeState $ \st -> st { Annex.errcounter = 0 } + return $ if failed + then Just (r, start) + else Nothing + failed <- catMaybes + <$> (liftAnnex =<< liftIO . wait =<< liftIO (async runner)) + unless (null failed) $ do + v <- getAssistant failedExportMap + changeFailedPushMap v $ M.union $ M.fromList failed diff --git a/Assistant/Threads/Glacier.hs b/Assistant/Threads/Glacier.hs new file mode 100644 index 0000000000..d5a20e908f --- /dev/null +++ b/Assistant/Threads/Glacier.hs @@ -0,0 +1,44 @@ +{- git-annex assistant Amazon Glacier retrieval + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Assistant.Threads.Glacier where + +import Assistant.Common +import Utility.ThreadScheduler +import qualified Types.Remote as Remote +import qualified Remote.Glacier as Glacier +import Types.Transfer +import Logs.Transfer +import Assistant.DaemonStatus +import Assistant.TransferQueue + +import qualified Data.Set as S + +{- Wakes up every half hour and checks if any glacier remotes have failed + - downloads. If so, runs glacier-cli to check if the files are now + - available, and queues the downloads. -} +glacierThread :: NamedThread +glacierThread = namedThread "Glacier" $ runEvery (Seconds 3600) <~> go + where + isglacier r = Remote.remotetype r == Glacier.remote + go = do + rs <- filter isglacier . downloadRemotes <$> getDaemonStatus + forM_ rs $ \r -> + check r =<< liftAnnex (getFailedTransfers $ Remote.uuid r) + check _ [] = noop + check r l = do + let keys = map getkey l + (availkeys, failedkeys) <- liftAnnex $ Glacier.jobList r keys + let s = S.fromList (failedkeys ++ availkeys) + let l' = filter (\p -> S.member (getkey p) s) l + forM_ l' $ \(t, info) -> do + liftAnnex $ removeFailedTransfer t + queueTransferWhenSmall "object available from glacier" (associatedFile info) t r + getkey = transferKey . fst diff --git a/Assistant/Threads/Merger.hs b/Assistant/Threads/Merger.hs new file mode 100644 index 0000000000..7b9db70abf --- /dev/null +++ b/Assistant/Threads/Merger.hs @@ -0,0 +1,131 @@ +{- git-annex assistant git merge thread + - + - Copyright 2012-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Assistant.Threads.Merger where + +import Assistant.Common +import Assistant.TransferQueue +import Assistant.BranchChange +import Assistant.Sync +import Utility.DirWatcher +import Utility.DirWatcher.Types +import Utility.Directory.Create +import Annex.CurrentBranch +import Assistant.Commits +import qualified Annex +import qualified Annex.Branch +import qualified Git +import qualified Git.Branch +import qualified Git.Ref +import qualified Command.Sync + +import qualified System.FilePath.ByteString as P + +{- This thread watches for changes to .git/refs/, and handles incoming + - pushes. -} +mergeThread :: NamedThread +mergeThread = namedThread "Merger" $ do + g <- liftAnnex gitRepo + let gitd = Git.localGitDir g + let dir = gitd P. "refs" + liftIO $ createDirectoryUnder [gitd] dir + let hook a = Just <$> asIO2 (runHandler a) + changehook <- hook onChange + errhook <- hook onErr + let hooks = mkWatchHooks + { addHook = changehook + , modifyHook = changehook + , errHook = errhook + } + void $ liftIO $ watchDir (fromRawFilePath dir) (const False) True hooks id + debug ["watching", fromRawFilePath dir] + +type Handler = FilePath -> Assistant () + +{- Runs an action handler. + - + - Exceptions are ignored, otherwise a whole thread could be crashed. + -} +runHandler :: Handler -> FilePath -> Maybe FileStatus -> Assistant () +runHandler handler file _filestatus = + either (liftIO . print) (const noop) =<< tryIO <~> handler file + +{- Called when there's an error with inotify. -} +onErr :: Handler +onErr = giveup + +{- Called when a new branch ref is written, or a branch ref is modified. + - + - At startup, synthetic add events fire, causing this to run, but that's + - ok; it ensures that any changes pushed since the last time the assistant + - ran are merged in. + -} +onChange :: Handler +onChange file + | ".lock" `isSuffixOf` file = noop + | isAnnexBranch file = do + branchChanged + diverged <- liftAnnex Annex.Branch.forceUpdate >>= return . \case + u@(Annex.Branch.UpdateMade {}) -> Annex.Branch.refsWereMerged u + (Annex.Branch.UpdateFailedPermissions {}) -> True + when diverged $ do + updateExportTreeFromLogAll + queueDeferredDownloads "retrying deferred download" Later + | otherwise = mergecurrent + where + changedbranch = fileToBranch file + + mergecurrent = + mergecurrent' =<< liftAnnex getCurrentBranch + mergecurrent' currbranch@(Just b, _) + | changedbranch `isRelatedTo` b = do + whenM (liftAnnex $ inRepo $ Git.Branch.changed b changedbranch) $ do + debug + [ "merging", Git.fromRef changedbranch + , "into", Git.fromRef b + ] + void $ liftAnnex $ do + cmode <- annexCommitMode <$> Annex.getGitConfig + -- Allow merging unrelated histories. + mc <- Command.Sync.mergeConfig True + Command.Sync.merge + currbranch + mc + def + cmode + [changedbranch] + recordCommit + | changedbranch == b = + -- Record commit so the pusher pushes it out. + -- This makes sure pushes happen when + -- annex.autocommit=false + recordCommit + mergecurrent' _ = noop + +{- Is the first branch a synced branch or remote tracking branch related + - to the second branch, which should be merged into it? -} +isRelatedTo :: Git.Ref -> Git.Ref -> Bool +isRelatedTo x y + | basex /= takeDirectory basex ++ "/" ++ basey = False + | "/synced/" `isInfixOf` Git.fromRef x = True + | "refs/remotes/" `isPrefixOf` Git.fromRef x = True + | otherwise = False + where + basex = Git.fromRef $ Git.Ref.base x + basey = Git.fromRef $ Git.Ref.base y + +isAnnexBranch :: FilePath -> Bool +isAnnexBranch f = n `isSuffixOf` f + where + n = '/' : Git.fromRef Annex.Branch.name + +fileToBranch :: FilePath -> Git.Ref +fileToBranch f = Git.Ref $ encodeBS $ "refs" base + where + base = Prelude.last $ split "/refs/" f diff --git a/Assistant/Threads/MountWatcher.hs b/Assistant/Threads/MountWatcher.hs new file mode 100644 index 0000000000..11997fbd71 --- /dev/null +++ b/Assistant/Threads/MountWatcher.hs @@ -0,0 +1,182 @@ +{- git-annex assistant mount watcher, using either dbus or mtab polling + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Assistant.Threads.MountWatcher where + +import Assistant.Common +import Assistant.DaemonStatus +import Assistant.Sync +import qualified Annex +import qualified Git +import Utility.ThreadScheduler +import Utility.Mounts +import Remote.List +import qualified Types.Remote as Remote +import Assistant.Types.UrlRenderer +import Assistant.Fsck + +import qualified Data.Set as S + +#if WITH_DBUS +import Utility.DBus +import DBus.Client +import DBus +import Data.Word (Word32) +import Control.Concurrent +import qualified Control.Exception as E +#else +#ifdef linux_HOST_OS +#warning Building without dbus support; will use mtab polling +#endif +#endif + +mountWatcherThread :: UrlRenderer -> NamedThread +mountWatcherThread urlrenderer = namedThread "MountWatcher" $ +#if WITH_DBUS + dbusThread urlrenderer +#else + pollingThread urlrenderer +#endif + +#if WITH_DBUS + +dbusThread :: UrlRenderer -> Assistant () +dbusThread urlrenderer = do + runclient <- asIO1 go + r <- liftIO $ E.try $ runClient getSystemAddress runclient + either onerr (const noop) r + where + go client = ifM (checkMountMonitor client) + ( do + {- Store the current mount points in an MVar, to be + - compared later. We could in theory work out the + - mount point from the dbus message, but this is + - easier. -} + mvar <- liftIO $ newMVar =<< currentMountPoints + handleevent <- asIO1 $ \_event -> do + nowmounted <- liftIO $ currentMountPoints + wasmounted <- liftIO $ swapMVar mvar nowmounted + handleMounts urlrenderer wasmounted nowmounted + liftIO $ forM_ mountChanged $ \matcher -> + void $ addMatch client matcher handleevent + , do + liftAnnex $ + warning "No known volume monitor available through dbus; falling back to mtab polling" + pollingThread urlrenderer + ) + onerr :: E.SomeException -> Assistant () + onerr e = do + liftAnnex $ + warning $ UnquotedString $ "dbus failed; falling back to mtab polling (" ++ show e ++ ")" + pollingThread urlrenderer + +{- Examine the list of services connected to dbus, to see if there + - are any we can use to monitor mounts. If not, will attempt to start one. -} +checkMountMonitor :: Client -> Assistant Bool +checkMountMonitor client = do + running <- filter (`elem` usableservices) + <$> liftIO (listServiceNames client) + case running of + [] -> startOneService client startableservices + (service:_) -> do + debug [ "Using running DBUS service" + , service + , "to monitor mount events." + ] + return True + where + startableservices = [udisks2] + usableservices = startableservices + udisks2 = "org.freedesktop.UDisks2" + +startOneService :: Client -> [ServiceName] -> Assistant Bool +startOneService _ [] = return False +startOneService client (x:xs) = do + _ <- liftIO $ tryNonAsync $ callDBus client "StartServiceByName" + [toVariant x, toVariant (0 :: Word32)] + ifM (liftIO $ elem x <$> listServiceNames client) + ( do + debug + [ "Started DBUS service", x + , "to monitor mount events." + ] + return True + , startOneService client xs + ) + +{- Filter matching events received when drives are mounted and unmounted. -} +mountChanged :: [MatchRule] +mountChanged = [udisks2mount, udisks2umount] + where + udisks2mount = matchAny + { matchPath = Just "/org/freedesktop/UDisks2" + , matchInterface = Just "org.freedesktop.DBus.ObjectManager" + , matchMember = Just "InterfacesAdded" + } + udisks2umount = matchAny + { matchPath = Just "/org/freedesktop/UDisks2" + , matchInterface = Just "org.freedesktop.DBus.ObjectManager" + , matchMember = Just "InterfacesRemoved" + } +#endif + +pollingThread :: UrlRenderer -> Assistant () +pollingThread urlrenderer = go =<< liftIO currentMountPoints + where + go wasmounted = do + liftIO $ threadDelaySeconds (Seconds 10) + nowmounted <- liftIO currentMountPoints + handleMounts urlrenderer wasmounted nowmounted + go nowmounted + +handleMounts :: UrlRenderer -> MountPoints -> MountPoints -> Assistant () +handleMounts urlrenderer wasmounted nowmounted = + mapM_ (handleMount urlrenderer . mnt_dir) $ + S.toList $ newMountPoints wasmounted nowmounted + +handleMount :: UrlRenderer -> FilePath -> Assistant () +handleMount urlrenderer dir = do + debug ["detected mount of", dir] + rs <- filterM (Git.repoIsLocal <$$> liftAnnex . Remote.getRepo) + =<< remotesUnder dir + mapM_ (fsckNudge urlrenderer . Just) rs + reconnectRemotes rs + +{- Finds remotes located underneath the mount point. + - + - Updates state to include the remotes. + - + - The config of git remotes is re-read, as it may not have been available + - at startup time, or may have changed (it could even be a different + - repository at the same remote location..) + -} +remotesUnder :: FilePath -> Assistant [Remote] +remotesUnder dir = do + repotop <- liftAnnex $ fromRepo Git.repoPath + rs <- liftAnnex remoteList + pairs <- liftAnnex $ mapM (checkremote repotop) rs + let (waschanged, rs') = unzip pairs + when (or waschanged) $ do + liftAnnex $ Annex.changeState $ \s -> s { Annex.remotes = catMaybes rs' } + updateSyncRemotes + return $ mapMaybe snd $ filter fst pairs + where + checkremote repotop r = case Remote.localpath r of + Just p | dirContains (toRawFilePath dir) (absPathFrom repotop (toRawFilePath p)) -> + (,) <$> pure True <*> updateRemote r + _ -> return (False, Just r) + +type MountPoints = S.Set Mntent + +currentMountPoints :: IO MountPoints +currentMountPoints = S.fromList <$> getMounts + +newMountPoints :: MountPoints -> MountPoints -> MountPoints +newMountPoints old new = S.difference new old diff --git a/Assistant/Threads/NetWatcher.hs b/Assistant/Threads/NetWatcher.hs new file mode 100644 index 0000000000..ff64564519 --- /dev/null +++ b/Assistant/Threads/NetWatcher.hs @@ -0,0 +1,202 @@ +{- git-annex assistant network connection watcher, using dbus + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Assistant.Threads.NetWatcher where + +import Assistant.Common +import Assistant.Sync +import Utility.ThreadScheduler +import qualified Types.Remote as Remote +import Assistant.DaemonStatus +import Utility.NotificationBroadcaster + +#if WITH_DBUS +import Assistant.RemoteControl +import Utility.DBus +import DBus.Client +import DBus +#else +#ifdef linux_HOST_OS +#warning Building without dbus support; will poll for network connection changes +#endif +#endif + +netWatcherThread :: NamedThread +#if WITH_DBUS +netWatcherThread = thread dbusThread +#else +netWatcherThread = thread noop +#endif + where + thread = namedThread "NetWatcher" + +{- This is a fallback for when dbus cannot be used to detect + - network connection changes, but it also ensures that + - any networked remotes that may have not been routable for a + - while (despite the local network staying up), are synced with + - periodically. + - + - Note that it does not signal the RemoteControl, because it doesn't + - know that the network has changed. + -} +netWatcherFallbackThread :: NamedThread +netWatcherFallbackThread = namedThread "NetWatcherFallback" $ + runEvery (Seconds 3600) <~> handleConnection + +#if WITH_DBUS + +dbusThread :: Assistant () +dbusThread = do + handleerr <- asIO2 onerr + runclient <- asIO1 go + liftIO $ persistentClient getSystemAddress () handleerr runclient + where + go client = ifM (checkNetMonitor client) + ( do + callback <- asIO1 connchange + liftIO $ do + listenNMConnections client callback + listenNDConnections client callback + listenWicdConnections client callback + , do + liftAnnex $ + warning "No known network monitor available through dbus; falling back to polling" + ) + connchange False = do + debug ["detected network disconnection"] + sendRemoteControl LOSTNET + connchange True = do + debug ["detected network connection"] + handleConnection + sendRemoteControl RESUME + onerr e _ = do + liftAnnex $ + warning $ UnquotedString $ "lost dbus connection; falling back to polling (" ++ show e ++ ")" + {- Wait, in hope that dbus will come back -} + liftIO $ threadDelaySeconds (Seconds 60) + +{- Examine the list of services connected to dbus, to see if there + - are any we can use to monitor network connections. -} +checkNetMonitor :: Client -> Assistant Bool +checkNetMonitor client = do + running <- liftIO $ filter (`elem` manager_addresses) + <$> listServiceNames client + case running of + [] -> return False + (service:_) -> do + debug [ "Using running DBUS service" + , service + , "to monitor network connection events." + ] + return True + where + manager_addresses = [networkmanager, networkd, wicd] + networkmanager = "org.freedesktop.NetworkManager" + networkd = "org.freedesktop.network1" + wicd = "org.wicd.daemon" + +{- Listens for systemd-networkd connections and disconnections. + - + - Connection example (once fully connected): + - [Variant {"OperationalState": Variant "routable"}] + - + - Disconnection example: + - [Variant {"OperationalState": Variant _}] + -} +listenNDConnections :: Client -> (Bool -> IO ()) -> IO () +listenNDConnections client setconnected = + void $ addMatch client matcher + $ \event -> mapM_ handleevent + (map dictionaryItems $ mapMaybe fromVariant $ signalBody event) + where + matcher = matchAny + { matchInterface = Just "org.freedesktop.DBus.Properties" + , matchMember = Just "PropertiesChanged" + } + operational_state_key = toVariant ("OperationalState" :: String) + routable = toVariant $ toVariant ("routable" :: String) + handleevent m = case lookup operational_state_key m of + Just state -> if state == routable + then setconnected True + else setconnected False + Nothing -> noop + +{- Listens for NetworkManager connections and disconnections. + - + - Connection example (once fully connected): + - [Variant {"ActivatingConnection": Variant (ObjectPath "/"), "PrimaryConnection": Variant (ObjectPath "/org/freedesktop/NetworkManager/ActiveConnection/34"), "State": Variant 70}] + - + - Disconnection example: + - [Variant {"ActiveConnections": Variant []}] + -} +listenNMConnections :: Client -> (Bool -> IO ()) -> IO () +listenNMConnections client setconnected = + void $ addMatch client matcher + $ \event -> mapM_ handleevent + (map dictionaryItems $ mapMaybe fromVariant $ signalBody event) + where + matcher = matchAny + { matchInterface = Just "org.freedesktop.NetworkManager" + , matchMember = Just "PropertiesChanged" + } + nm_active_connections_key = toVariant ("ActiveConnections" :: String) + nm_activatingconnection_key = toVariant ("ActivatingConnection" :: String) + noconnections = Just $ toVariant $ toVariant ([] :: [ObjectPath]) + rootconnection = Just $ toVariant $ toVariant $ objectPath_ "/" + handleevent m + | lookup nm_active_connections_key m == noconnections = + setconnected False + | lookup nm_activatingconnection_key m == rootconnection = + setconnected True + | otherwise = noop + +{- Listens for Wicd connections and disconnections. + - + - Connection example: + - ConnectResultsSent: + - Variant "success" + - + - Disconnection example: + - StatusChanged + - [Variant 0, Variant [Variant ""]] + -} +listenWicdConnections :: Client -> (Bool -> IO ()) -> IO () +listenWicdConnections client setconnected = do + match connmatcher $ \event -> + when (any (== wicd_success) (signalBody event)) $ + setconnected True + match statusmatcher $ \event -> handleevent (signalBody event) + where + connmatcher = matchAny + { matchInterface = Just "org.wicd.daemon" + , matchMember = Just "ConnectResultsSent" + } + statusmatcher = matchAny + { matchInterface = Just "org.wicd.daemon" + , matchMember = Just "StatusChanged" + } + wicd_success = toVariant ("success" :: String) + wicd_disconnected = toVariant [toVariant ("" :: String)] + handleevent status + | any (== wicd_disconnected) status = setconnected False + | otherwise = noop + match matcher a = + void $ addMatch client matcher a +#endif + +handleConnection :: Assistant () +handleConnection = do + liftIO . sendNotification . networkConnectedNotifier =<< getDaemonStatus + reconnectRemotes =<< networkRemotes + +{- Network remotes to sync with. -} +networkRemotes :: Assistant [Remote] +networkRemotes = filter (isNothing . Remote.localpath) . syncRemotes + <$> getDaemonStatus diff --git a/Assistant/Threads/PairListener.hs b/Assistant/Threads/PairListener.hs new file mode 100644 index 0000000000..0199b79f84 --- /dev/null +++ b/Assistant/Threads/PairListener.hs @@ -0,0 +1,156 @@ +{- git-annex assistant thread to listen for incoming pairing traffic + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Assistant.Threads.PairListener where + +import Assistant.Common +import Assistant.Pairing +import Assistant.Pairing.Network +import Assistant.Pairing.MakeRemote +import Assistant.WebApp (UrlRenderer) +import Assistant.WebApp.Types +import Assistant.Alert +import Assistant.DaemonStatus +import Utility.ThreadScheduler +import Git + +import Network.Multicast +import Network.Socket +import qualified Data.ByteString as B +import qualified Data.ByteString.UTF8 as BU8 +import qualified Network.Socket.ByteString as B +import qualified Data.Text as T + +pairListenerThread :: UrlRenderer -> NamedThread +pairListenerThread urlrenderer = namedThread "PairListener" $ do + listener <- asIO1 $ go [] [] + liftIO $ withSocketsDo $ + runEvery (Seconds 60) $ void $ tryIO $ + listener =<< getsock + where + {- Note this can crash if there's no network interface, + - or only one like lo that doesn't support multicast. -} + getsock = multicastReceiver (multicastAddress IPv4AddrClass) pairingPort + + go reqs cache sock = liftIO (getmsg sock B.empty) >>= \msg -> case readish (BU8.toString msg) of + Nothing -> go reqs cache sock + Just m -> do + debug ["received", show msg] + (pip, verified) <- verificationCheck m + =<< (pairingInProgress <$> getDaemonStatus) + let wrongstage = maybe False (\p -> pairMsgStage m <= inProgressPairStage p) pip + let fromus = maybe False (\p -> remoteSshPubKey (pairMsgData m) == remoteSshPubKey (inProgressPairData p)) pip + case (wrongstage, fromus, checkSane (pairMsgData m), pairMsgStage m) of + (_, True, _, _) -> do + debug ["ignoring message that looped back"] + go reqs cache sock + (_, _, False, _) -> do + liftAnnex $ warning $ UnquotedString $ + "illegal control characters in pairing message; ignoring (" ++ show (pairMsgData m) ++ ")" + go reqs cache sock + -- PairReq starts a pairing process, so a + -- new one is always heeded, even if + -- some other pairing is in process. + (_, _, _, PairReq) -> if m `elem` reqs + then go reqs (invalidateCache m cache) sock + else do + pairReqReceived verified urlrenderer m + go (m:take 10 reqs) (invalidateCache m cache) sock + (True, _, _, _) -> do + debug + ["ignoring out of order message" + , show (pairMsgStage m) + , "expected" + , show (succ . inProgressPairStage <$> pip) + ] + go reqs cache sock + (_, _, _, PairAck) -> do + cache' <- pairAckReceived verified pip m cache + go reqs cache' sock + (_,_ , _, PairDone) -> do + pairDoneReceived verified pip m + go reqs cache sock + + {- As well as verifying the message using the shared secret, + - check its UUID against the UUID we have stored. If + - they're the same, someone is sending bogus messages, + - which could be an attempt to brute force the shared secret. -} + verificationCheck _ Nothing = return (Nothing, False) + verificationCheck m (Just pip) + | not verified && sameuuid = do + liftAnnex $ warning + "detected possible pairing brute force attempt; disabled pairing" + stopSending pip + return (Nothing, False) + | otherwise = return (Just pip, verified && sameuuid) + where + verified = verifiedPairMsg m pip + sameuuid = pairUUID (inProgressPairData pip) == pairUUID (pairMsgData m) + + {- PairReqs invalidate the cache of recently finished pairings. + - This is so that, if a new pairing is started with the + - same secret used before, a bogus PairDone is not sent. -} + invalidateCache msg = filter (not . verifiedPairMsg msg) + + getmsg sock c = do + (msg, _) <- B.recvFrom sock chunksz + if B.length msg < chunksz + then return $ c <> msg + else getmsg sock $ c <> msg + where + chunksz = 1024 + +{- Show an alert when a PairReq is seen. -} +pairReqReceived :: Bool -> UrlRenderer -> PairMsg -> Assistant () +pairReqReceived True _ _ = noop -- ignore our own PairReq +pairReqReceived False urlrenderer msg = do + button <- mkAlertButton True (T.pack "Respond") urlrenderer (FinishLocalPairR msg) + void $ addAlert $ pairRequestReceivedAlert repo button + where + repo = pairRepo msg + +{- When a verified PairAck is seen, a host is ready to pair with us, and has + - already configured our ssh key. Stop sending PairReqs, finish the pairing, + - and send a single PairDone. -} +pairAckReceived :: Bool -> Maybe PairingInProgress -> PairMsg -> [PairingInProgress] -> Assistant [PairingInProgress] +pairAckReceived True (Just pip) msg cache = do + stopSending pip + repodir <- fromRawFilePath . repoPath <$> liftAnnex gitRepo + liftIO $ setupAuthorizedKeys msg repodir + finishedLocalPairing msg (inProgressSshKeyPair pip) + startSending pip PairDone $ multicastPairMsg + (Just 1) (inProgressSecret pip) (inProgressPairData pip) + return $ pip : take 10 cache +{- A stale PairAck might also be seen, after we've finished pairing. + - Perhaps our PairDone was not received. To handle this, we keep + - a cache of recently finished pairings, and re-send PairDone in + - response to stale PairAcks for them. -} +pairAckReceived _ _ msg cache = do + let pips = filter (verifiedPairMsg msg) cache + unless (null pips) $ + forM_ pips $ \pip -> + startSending pip PairDone $ multicastPairMsg + (Just 1) (inProgressSecret pip) (inProgressPairData pip) + return cache + +{- If we get a verified PairDone, the host has accepted our PairAck, and + - has paired with us. Stop sending PairAcks, and finish pairing with them. + - + - TODO: Should third-party hosts remove their pair request alert when they + - see a PairDone? + - Complication: The user could have already clicked on the alert and be + - entering the secret. Would be better to start a fresh pair request in this + - situation. + -} +pairDoneReceived :: Bool -> Maybe PairingInProgress -> PairMsg -> Assistant () +pairDoneReceived False _ _ = noop -- not verified +pairDoneReceived True Nothing _ = noop -- not in progress +pairDoneReceived True (Just pip) msg = do + stopSending pip + finishedLocalPairing msg (inProgressSshKeyPair pip) diff --git a/Assistant/Threads/ProblemFixer.hs b/Assistant/Threads/ProblemFixer.hs new file mode 100644 index 0000000000..2b5829037f --- /dev/null +++ b/Assistant/Threads/ProblemFixer.hs @@ -0,0 +1,73 @@ +{- git-annex assistant thread to handle fixing problems with repositories + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Threads.ProblemFixer ( + problemFixerThread +) where + +import Assistant.Common +import Assistant.Types.RepoProblem +import Assistant.RepoProblem +import Assistant.Types.UrlRenderer +import Assistant.Alert +import Remote +import qualified Types.Remote as Remote +import qualified Git.Fsck +import Assistant.Repair +import qualified Git +import Annex.UUID +import Utility.ThreadScheduler + +{- Waits for problems with a repo, and tries to fsck the repo and repair + - the problem. -} +problemFixerThread :: UrlRenderer -> NamedThread +problemFixerThread urlrenderer = namedThread "ProblemFixer" $ + go =<< getRepoProblems + where + go problems = do + mapM_ (handleProblem urlrenderer) problems + liftIO $ threadDelaySeconds (Seconds 60) + -- Problems may have been re-reported while they were being + -- fixed, so ignore those. If a new unique problem happened + -- 60 seconds after the last was fixed, we're unlikely + -- to do much good anyway. + go =<< filter (\p -> not (any (sameRepoProblem p) problems)) + <$> getRepoProblems + +handleProblem :: UrlRenderer -> RepoProblem -> Assistant () +handleProblem urlrenderer repoproblem = do + fixed <- ifM ((==) (problemUUID repoproblem) <$> liftAnnex getUUID) + ( handleLocalRepoProblem urlrenderer + , maybe (return False) (handleRemoteProblem urlrenderer) + =<< liftAnnex (remoteFromUUID $ problemUUID repoproblem) + ) + when fixed $ + liftIO $ afterFix repoproblem + +handleRemoteProblem :: UrlRenderer -> Remote -> Assistant Bool +handleRemoteProblem urlrenderer rmt = do + repo <- liftAnnex $ Remote.getRepo rmt + handleRemoteProblem' repo urlrenderer rmt + +handleRemoteProblem' :: Git.Repo -> UrlRenderer -> Remote -> Assistant Bool +handleRemoteProblem' repo urlrenderer rmt + | Git.repoIsLocal repo && not (Git.repoIsLocalUnknown repo) = + ifM (liftAnnex $ checkAvailable True rmt) + ( do + fixedlocks <- repairStaleGitLocks repo + fsckresults <- showFscking urlrenderer (Just rmt) $ tryNonAsync $ + Git.Fsck.findBroken True True repo + repaired <- repairWhenNecessary urlrenderer (Remote.uuid rmt) (Just rmt) fsckresults + return $ fixedlocks || repaired + , return False + ) + | otherwise = return False + +{- This is not yet used, and should probably do a fsck. -} +handleLocalRepoProblem :: UrlRenderer -> Assistant Bool +handleLocalRepoProblem _urlrenderer = do + repairStaleGitLocks =<< liftAnnex gitRepo diff --git a/Assistant/Threads/Pusher.hs b/Assistant/Threads/Pusher.hs new file mode 100644 index 0000000000..3d7b6345c0 --- /dev/null +++ b/Assistant/Threads/Pusher.hs @@ -0,0 +1,50 @@ +{- git-annex assistant git pushing thread + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Threads.Pusher where + +import Assistant.Common +import Assistant.Commits +import Assistant.Pushes +import Assistant.DaemonStatus +import Assistant.Sync +import Utility.ThreadScheduler +import qualified Remote +import qualified Types.Remote as Remote + +{- This thread retries pushes that failed before. -} +pushRetryThread :: NamedThread +pushRetryThread = namedThread "PushRetrier" $ runEvery (Seconds halfhour) <~> do + -- We already waited half an hour, now wait until there are failed + -- pushes to retry. + topush <- getFailedPushesBefore (fromIntegral halfhour) + =<< getAssistant failedPushMap + unless (null topush) $ do + debug ["retrying", show (length topush), "failed pushes"] + void $ pushToRemotes topush + where + halfhour = 1800 + +{- This thread pushes git commits out to remotes soon after they are made. -} +pushThread :: NamedThread +pushThread = namedThread "Pusher" $ runEvery (Seconds 2) <~> do + -- We already waited two seconds as a simple rate limiter. + -- Next, wait until at least one commit has been made + void getCommits + -- Now see if now's a good time to push. + void $ pushToRemotes =<< pushTargets + +{- We want to avoid pushing to remotes that are marked readonly. + - + - Also, avoid pushing to local remotes we can easily tell are not available, + - to avoid ugly messages when a removable drive is not attached. + -} +pushTargets :: Assistant [Remote] +pushTargets = liftAnnex . filterM (Remote.checkAvailable True) + =<< candidates <$> getDaemonStatus + where + candidates = filter (not . Remote.readonly) . syncGitRemotes diff --git a/Assistant/Threads/RemoteControl.hs b/Assistant/Threads/RemoteControl.hs new file mode 100644 index 0000000000..51f5e4b9b4 --- /dev/null +++ b/Assistant/Threads/RemoteControl.hs @@ -0,0 +1,128 @@ +{- git-annex assistant communication with remotedaemon + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Threads.RemoteControl where + +import Assistant.Common +import RemoteDaemon.Types +import Annex.Path +import Utility.Batch +import Utility.SimpleProtocol +import Assistant.Alert +import Assistant.Alert.Utility +import Assistant.DaemonStatus +import qualified Git +import qualified Git.Types as Git +import qualified Remote +import qualified Types.Remote as Remote + +import Control.Concurrent +import Control.Concurrent.Async +import Network.URI +import qualified Data.Map as M +import qualified Data.Set as S + +remoteControlThread :: NamedThread +remoteControlThread = namedThread "RemoteControl" $ do + program <- liftIO programPath + (cmd, params) <- liftIO $ toBatchCommand + (program, [Param "remotedaemon", Param "--foreground"]) + let p = proc cmd (toCommand params) + bracket (setup p) cleanup (go p) + where + setup p = liftIO $ createProcess $ p + { std_in = CreatePipe + , std_out = CreatePipe + } + cleanup = liftIO . cleanupProcess + + go p (Just toh, Just fromh, _, pid) = do + urimap <- liftIO . newMVar =<< liftAnnex getURIMap + + controller <- asIO $ remoteControllerThread toh + responder <- asIO $ remoteResponderThread fromh urimap + + -- run controller and responder until the remotedaemon dies + liftIO $ void $ tryNonAsync $ + controller `concurrently` responder + debug ["remotedaemon exited"] + liftIO $ forceSuccessProcess p pid + go _ _ = error "internal" + + +-- feed from the remoteControl channel into the remotedaemon +remoteControllerThread :: Handle -> Assistant () +remoteControllerThread toh = do + clicker <- getAssistant remoteControl + forever $ do + msg <- liftIO $ readChan clicker + debug [show msg] + liftIO $ do + hPutStrLn toh $ unwords $ formatMessage msg + hFlush toh + +-- read status messages emitted by the remotedaemon and handle them +remoteResponderThread :: Handle -> MVar (M.Map URI Remote) -> Assistant () +remoteResponderThread fromh urimap = go M.empty + where + go syncalerts = do + l <- liftIO $ hGetLine fromh + debug [l] + case parseMessage l of + Just (CONNECTED uri) -> changeconnected S.insert uri + Just (DISCONNECTED uri) -> changeconnected S.delete uri + Just (SYNCING uri) -> withr uri $ \r -> + if M.member (Remote.uuid r) syncalerts + then go syncalerts + else do + i <- addAlert $ syncAlert [r] + go (M.insert (Remote.uuid r) i syncalerts) + Just (DONESYNCING uri status) -> withr uri $ \r -> + case M.lookup (Remote.uuid r) syncalerts of + Nothing -> cont + Just i -> do + let (succeeded, failed) = if status + then ([r], []) + else ([], [r]) + updateAlertMap $ mergeAlert i $ + syncResultAlert succeeded failed + go (M.delete (Remote.uuid r) syncalerts) + Just (WARNING (RemoteURI uri) msg) -> do + void $ addAlert $ + warningAlert ("RemoteControl "++ show uri) msg + cont + Nothing -> do + debug ["protocol error from remotedaemon: ", l] + cont + where + cont = go syncalerts + withr uri = withRemote uri urimap cont + changeconnected sm uri = withr uri $ \r -> do + changeCurrentlyConnected $ sm $ Remote.uuid r + cont + +getURIMap :: Annex (M.Map URI Remote) +getURIMap = Remote.remoteMap' id (\r -> mkk . Git.location <$> Remote.getRepo r) + where + mkk (Git.Url u) = Just u + mkk _ = Nothing + +withRemote + :: RemoteURI + -> MVar (M.Map URI Remote) + -> Assistant a + -> (Remote -> Assistant a) + -> Assistant a +withRemote (RemoteURI uri) remotemap noremote a = do + m <- liftIO $ readMVar remotemap + case M.lookup uri m of + Just r -> a r + Nothing -> do + {- Reload map, in case a new remote has been added. -} + m' <- liftAnnex getURIMap + void $ liftIO $ swapMVar remotemap $ m' + maybe noremote a (M.lookup uri m') diff --git a/Assistant/Threads/SanityChecker.hs b/Assistant/Threads/SanityChecker.hs new file mode 100644 index 0000000000..563e038e78 --- /dev/null +++ b/Assistant/Threads/SanityChecker.hs @@ -0,0 +1,274 @@ +{- git-annex assistant sanity checker + - + - Copyright 2012, 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Assistant.Threads.SanityChecker ( + sanityCheckerStartupThread, + sanityCheckerDailyThread, + sanityCheckerHourlyThread +) where + +import Assistant.Common +import Assistant.DaemonStatus +import Assistant.Alert +import Assistant.Repair +import Assistant.Drop +import Assistant.Ssh +import Assistant.TransferQueue +import Assistant.Types.UrlRenderer +import Assistant.Restart +import qualified Annex.Branch +import qualified Git +import qualified Git.LsFiles +import qualified Git.Command.Batch +import qualified Git.Config +import Utility.ThreadScheduler +import qualified Assistant.Threads.Watcher as Watcher +import Utility.Batch +import Utility.NotificationBroadcaster +import Utility.HumanTime +import Utility.Tense +import Git.Repair +import Git.Index +import Assistant.Unused +import Logs.Unused +import Types.Transfer +import Annex.Path +import Annex.Tmp +import qualified Annex +import qualified Utility.RawFilePath as R +#ifdef WITH_WEBAPP +import Assistant.WebApp.Types +#endif +#ifndef mingw32_HOST_OS +import Utility.LogFile +import Utility.DiskFree +#endif + +import Data.Time.Clock.POSIX +import qualified Data.Text as T +import System.PosixCompat.Files (statusChangeTime, isSymbolicLink) + +{- This thread runs once at startup, and most other threads wait for it + - to finish. (However, the webapp thread does not, to prevent the UI + - being nonresponsive.) -} +sanityCheckerStartupThread :: Maybe Duration -> NamedThread +sanityCheckerStartupThread startupdelay = namedThreadUnchecked "SanityCheckerStartup" $ do + {- Stale git locks can prevent commits from happening, etc. -} + void $ repairStaleGitLocks =<< liftAnnex gitRepo + + {- A corrupt index file can prevent the assistant from working at + - all, so detect and repair. -} + ifM (not <$> liftAnnex (inRepo checkIndexFast)) + ( do + debug ["corrupt index file found at startup; removing and restaging"] + liftAnnex $ inRepo $ removeWhenExistsWith R.removeLink . indexFile + {- Normally the startup scan avoids re-staging files, + - but with the index deleted, everything needs to be + - restaged. -} + modifyDaemonStatus_ $ \s -> s { forceRestage = True } + , whenM (liftAnnex $ inRepo missingIndex) $ do + debug ["no index file; restaging"] + modifyDaemonStatus_ $ \s -> s { forceRestage = True } + ) + {- If the git-annex index file is corrupt, it's ok to remove it; + - the data from the git-annex branch will be used, and the index + - will be automatically regenerated. -} + unlessM (liftAnnex $ Annex.Branch.withIndex $ inRepo $ Git.Repair.checkIndexFast) $ do + debug ["corrupt annex/index file found at startup; removing"] + liftAnnex $ liftIO . removeWhenExistsWith R.removeLink =<< fromRepo gitAnnexIndex + + {- Fix up ssh remotes set up by past versions of the assistant. -} + liftIO $ fixUpSshRemotes + + {- Clean up old temp files. -} + void $ liftAnnex $ tryNonAsync $ cleanupOtherTmp + + {- If there's a startup delay, it's done here. -} + liftIO $ maybe noop (threadDelaySeconds . Seconds . fromIntegral . durationSeconds) startupdelay + + {- Notify other threads that the startup sanity check is done. -} + status <- getDaemonStatus + liftIO $ sendNotification $ startupSanityCheckNotifier status + +{- This thread wakes up hourly for inxepensive frequent sanity checks. -} +sanityCheckerHourlyThread :: NamedThread +sanityCheckerHourlyThread = namedThread "SanityCheckerHourly" $ forever $ do + liftIO $ threadDelaySeconds $ Seconds oneHour + hourlyCheck + +{- This thread wakes up daily to make sure the tree is in good shape. -} +sanityCheckerDailyThread :: UrlRenderer -> NamedThread +sanityCheckerDailyThread urlrenderer = namedThread "SanityCheckerDaily" $ forever $ do + waitForNextCheck + + debug ["starting sanity check"] + void $ alertWhile sanityCheckAlert go + debug ["sanity check complete"] + where + go = do + modifyDaemonStatus_ $ \s -> s { sanityCheckRunning = True } + + now <- liftIO getPOSIXTime -- before check started + r <- either showerr return + =<< (tryIO . batch) <~> dailyCheck urlrenderer + + modifyDaemonStatus_ $ \s -> s + { sanityCheckRunning = False + , lastSanityCheck = Just now + } + + return r + + showerr e = do + liftAnnex $ warning $ UnquotedString $ show e + return False + +{- Only run one check per day, from the time of the last check. -} +waitForNextCheck :: Assistant () +waitForNextCheck = do + v <- lastSanityCheck <$> getDaemonStatus + now <- liftIO getPOSIXTime + liftIO $ threadDelaySeconds $ Seconds $ calcdelay now v + where + calcdelay _ Nothing = oneDay + calcdelay now (Just lastcheck) + | lastcheck < now = max oneDay $ + oneDay - truncate (now - lastcheck) + | otherwise = oneDay + +{- It's important to stay out of the Annex monad as much as possible while + - running potentially expensive parts of this check, since remaining in it + - will block the watcher. -} +dailyCheck :: UrlRenderer -> Assistant Bool +dailyCheck urlrenderer = do + checkRepoExists + + g <- liftAnnex gitRepo + batchmaker <- liftIO getBatchCommandMaker + + -- Find old unstaged symlinks, and add them to git. + (unstaged, cleanup) <- liftIO $ Git.LsFiles.notInRepo [] False ["."] g + now <- liftIO getPOSIXTime + forM_ unstaged $ \file -> do + ms <- liftIO $ catchMaybeIO $ R.getSymbolicLinkStatus file + case ms of + Just s | toonew (statusChangeTime s) now -> noop + | isSymbolicLink s -> addsymlink (fromRawFilePath file) ms + _ -> noop + liftIO $ void cleanup + + {- Allow git-gc to run once per day. More frequent gc is avoided + - by default to avoid slowing things down. Only run repacks when 100x + - the usual number of loose objects are present; we tend + - to have a lot of small objects and they should not be a + - significant size. -} + when (Git.Config.getMaybe "gc.auto" g == Just "0") $ + liftIO $ void $ Git.Command.Batch.run batchmaker + [ Param "-c", Param "gc.auto=670000" + , Param "gc" + , Param "--auto" + ] g + + {- Check if the unused files found last time have been dealt with. -} + checkOldUnused urlrenderer + + {- Run git-annex unused once per day. This is run as a separate + - process to stay out of the annex monad and so it can run as a + - batch job. -} + program <- liftIO programPath + let (program', params') = batchmaker (program, [Param "unused"]) + void $ liftIO $ boolSystem program' params' + {- Invalidate unused keys cache, and queue transfers of all unused + - keys, or if no transfers are called for, drop them. -} + unused <- liftAnnex unusedKeys' + void $ liftAnnex $ setUnusedKeys unused + forM_ unused $ \k -> do + unlessM (queueTransfers "unused" Later k (AssociatedFile Nothing) Upload) $ + handleDrops "unused" True k (AssociatedFile Nothing) [] + + return True + where + toonew timestamp now = now < (realToFrac (timestamp + slop) :: POSIXTime) + slop = fromIntegral tenMinutes + insanity msg = do + liftAnnex $ warning (UnquotedString msg) + void $ addAlert $ sanityCheckFixAlert msg + addsymlink file s = do + Watcher.runHandler Watcher.onAddSymlink file s + insanity $ "found unstaged symlink: " ++ file + +hourlyCheck :: Assistant () +hourlyCheck = do + checkRepoExists +#ifndef mingw32_HOST_OS + checkLogSize 0 +#else + noop +#endif + +#ifndef mingw32_HOST_OS +{- Rotate logs once when total log file size is > 2 mb. + - + - If total log size is larger than the amount of free disk space, + - continue rotating logs until size is < 2 mb, even if this + - results in immediately losing the just logged data. + -} +checkLogSize :: Int -> Assistant () +checkLogSize n = do + f <- liftAnnex $ fromRawFilePath <$> fromRepo gitAnnexDaemonLogFile + logs <- liftIO $ listLogs f + totalsize <- liftIO $ sum <$> mapM (getFileSize . toRawFilePath) logs + when (totalsize > 2 * oneMegabyte) $ do + debug ["Rotated logs due to size:", show totalsize] + liftIO $ openLog f >>= handleToFd >>= redirLog + when (n < maxLogs + 1) $ do + df <- liftIO $ getDiskFree $ takeDirectory f + case df of + Just free + | free < fromIntegral totalsize -> + checkLogSize (n + 1) + _ -> noop + where + oneMegabyte :: Integer + oneMegabyte = 1000000 +#endif + +oneHour :: Int +oneHour = 60 * 60 + +oneDay :: Int +oneDay = 24 * oneHour + +{- If annex.expireunused is set, find any keys that have lingered unused + - for the specified duration, and remove them. + - + - Otherwise, check to see if unused keys are piling up, and let the user + - know. -} +checkOldUnused :: UrlRenderer -> Assistant () +checkOldUnused urlrenderer = go =<< annexExpireUnused <$> liftAnnex Annex.getGitConfig + where + go (Just Nothing) = noop + go (Just (Just expireunused)) = expireUnused (Just expireunused) + go Nothing = maybe noop promptconfig =<< describeUnusedWhenBig + + promptconfig msg = +#ifdef WITH_WEBAPP + do + button <- mkAlertButton True (T.pack "Configure") urlrenderer ConfigUnusedR + void $ addAlert $ unusedFilesAlert [button] $ T.unpack $ renderTense Present msg +#else + debug [show $ renderTense Past msg] +#endif + +checkRepoExists :: Assistant () +checkRepoExists = do + g <- liftAnnex gitRepo + liftIO $ unlessM (doesDirectoryExist $ fromRawFilePath $ Git.repoPath g) $ + terminateSelf diff --git a/Assistant/Threads/TransferPoller.hs b/Assistant/Threads/TransferPoller.hs new file mode 100644 index 0000000000..067bd0b022 --- /dev/null +++ b/Assistant/Threads/TransferPoller.hs @@ -0,0 +1,56 @@ +{- git-annex assistant transfer polling thread + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Threads.TransferPoller where + +import Assistant.Common +import Assistant.DaemonStatus +import Types.Transfer +import Logs.Transfer +import Utility.NotificationBroadcaster +import qualified Assistant.Threads.TransferWatcher as TransferWatcher + +import Control.Concurrent +import qualified Data.Map as M + +{- This thread polls the status of ongoing transfers, determining how much + - of each transfer is complete. -} +transferPollerThread :: NamedThread +transferPollerThread = namedThread "TransferPoller" $ do + g <- liftAnnex gitRepo + tn <- liftIO . newNotificationHandle True =<< + transferNotifier <$> getDaemonStatus + forever $ do + liftIO $ threadDelay 500000 -- 0.5 seconds + ts <- currentTransfers <$> getDaemonStatus + if M.null ts + -- block until transfers running + then liftIO $ waitNotification tn + else mapM_ (poll g) $ M.toList ts + where + poll g (t, info) + {- Downloads are polled by checking the size of the + - temp file being used for the transfer. -} + | transferDirection t == Download = do + let f = gitAnnexTmpObjectLocation (transferKey t) g + sz <- liftIO $ catchMaybeIO $ getFileSize f + newsize t info sz + {- Uploads don't need to be polled for when the TransferWatcher + - thread can track file modifications. -} + | TransferWatcher.watchesTransferSize = noop + {- Otherwise, this code polls the upload progress + - by reading the transfer info file. -} + | otherwise = do + let (f, _, _) = transferFileAndLockFile t g + mi <- liftIO $ catchDefaultIO Nothing $ + readTransferInfoFile Nothing (fromRawFilePath f) + maybe noop (newsize t info . bytesComplete) mi + + newsize t info sz + | bytesComplete info /= sz && isJust sz = + alterTransferInfo t $ \i -> i { bytesComplete = sz } + | otherwise = noop diff --git a/Assistant/Threads/TransferScanner.hs b/Assistant/Threads/TransferScanner.hs new file mode 100644 index 0000000000..970516a380 --- /dev/null +++ b/Assistant/Threads/TransferScanner.hs @@ -0,0 +1,196 @@ +{- git-annex assistant thread to scan remotes to find needed transfers + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Threads.TransferScanner where + +import Assistant.Common +import Assistant.Types.ScanRemotes +import Assistant.ScanRemotes +import Assistant.TransferQueue +import Assistant.DaemonStatus +import Assistant.Drop +import Assistant.Sync +import Assistant.DeleteRemote +import Assistant.Types.UrlRenderer +import Types.Transfer +import Logs.Transfer +import Logs.Location +import Logs.Group +import qualified Remote +import qualified Types.Remote as Remote +import Utility.ThreadScheduler +import Utility.NotificationBroadcaster +import Utility.Batch +import qualified Git.LsFiles as LsFiles +import Annex.WorkTree +import Annex.Content +import Annex.Wanted +import CmdLine.Action +import Types.Command + +import qualified Data.Set as S +import Control.Concurrent + +{- This thread waits until a remote needs to be scanned, to find transfers + - that need to be made, to keep data in sync. + -} +transferScannerThread :: UrlRenderer -> NamedThread +transferScannerThread urlrenderer = namedThread "TransferScanner" $ do + startupScan + go S.empty + where + go scanned = do + scanrunning False + liftIO $ threadDelaySeconds (Seconds 2) + (rs, infos) <- unzip <$> getScanRemote + scanrunning True + if any fullScan infos || any (`S.notMember` scanned) rs + then do + expensiveScan urlrenderer rs + go $ scanned `S.union` S.fromList rs + else do + mapM_ failedTransferScan rs + go scanned + scanrunning b = do + ds <- modifyDaemonStatus $ \s -> + (s { transferScanRunning = b }, s) + liftIO $ sendNotification $ transferNotifier ds + + {- All git remotes are synced, all exports are updated, + - and all available remotes are scanned in full on startup, + - for multiple reasons, including: + - + - * This may be the first run, and there may be remotes + - already in place, that need to be synced. + - * Changes may have been made last time we run, but remotes were + - not available to be synced with. + - * Changes may have been made to remotes while we were down. + - * We may have run before, and scanned a remote, but + - only been in a subdirectory of the git remote, and so + - not synced it all. + - * We may have run before, and had transfers queued, + - and then the system (or us) crashed, and that info was + - lost. + - * A remote may be in the unwanted group, and this is a chance + - to determine if the remote has been emptied. + -} + startupScan = do + reconnectRemotes =<< syncGitRemotes <$> getDaemonStatus + addScanRemotes True =<< scannableRemotes + +{- This is a cheap scan for failed transfers involving a remote. -} +failedTransferScan :: Remote -> Assistant () +failedTransferScan r = do + failed <- liftAnnex $ clearFailedTransfers (Remote.uuid r) + mapM_ retry failed + where + retry (t, info) + | transferDirection t == Download = + {- Check if the remote still has the key. + - If not, relies on the expensiveScan to + - get it queued from some other remote. -} + whenM (liftAnnex $ remoteHas r $ transferKey t) $ + requeue t info + | otherwise = + {- The Transferrer checks when uploading + - that the remote doesn't already have the + - key, so it's not redundantly checked here. -} + requeue t info + requeue t info = queueTransferWhenSmall "retrying failed transfer" (associatedFile info) t r + +{- This is a expensive scan through the full git work tree, finding + - files to transfer. The scan is blocked when the transfer queue gets + - too large. + - + - This also finds files that are present either here or on a remote + - but that are not preferred content, and drops them. Searching for files + - to drop is done concurrently with the scan for transfers. + - + - TODO: It would be better to first drop as much as we can, before + - transferring much, to minimise disk use. + - + - During the scan, we'll also check if any unwanted repositories are empty, + - and can be removed. While unrelated, this is a cheap place to do it, + - since we need to look at the locations of all keys anyway. + -} +expensiveScan :: UrlRenderer -> [Remote] -> Assistant () +expensiveScan urlrenderer rs = batch <~> do + debug ["starting scan of", show visiblers] + + let us = map Remote.uuid rs + + mapM_ (liftAnnex . clearFailedTransfers) us + + unwantedrs <- liftAnnex $ S.fromList + <$> filterM inUnwantedGroup us + + g <- liftAnnex gitRepo + (files, cleanup) <- liftIO $ LsFiles.inRepo [] [] g + removablers <- scan unwantedrs files + void $ liftIO cleanup + + debug ["finished scan of", show visiblers] + + remove <- asIO1 $ removableRemote urlrenderer + liftIO $ mapM_ (void . tryNonAsync . remove) $ S.toList removablers + where + visiblers = let rs' = filter (not . Remote.readonly) rs + in if null rs' then rs else rs' + + scan unwanted [] = return unwanted + scan unwanted (f:fs) = do + (unwanted', ts) <- maybe + (return (unwanted, [])) + (findtransfers f unwanted) + =<< liftAnnex (lookupKey f) + mapM_ (enqueue f) ts + + {- Delay for a short time to avoid using too much CPU. -} + liftIO $ threadDelay $ fromIntegral $ oneSecond `div` 200 + + scan unwanted' fs + + enqueue f (r, t) = + queueTransferWhenSmall "expensive scan found missing object" + (AssociatedFile (Just f)) t r + findtransfers f unwanted key = do + let af = AssociatedFile (Just f) + locs <- liftAnnex $ loggedLocations key + present <- liftAnnex $ inAnnex key + let slocs = S.fromList locs + + {- The remotes may have changed since this scan began. -} + syncrs <- syncDataRemotes <$> getDaemonStatus + let use l a = mapMaybe (a key slocs) . l <$> getDaemonStatus + + liftAnnex $ handleDropsFrom locs syncrs + "expensive scan found too many copies of object" + present key af (SeekInput []) [] callCommandAction + ts <- if present + then liftAnnex . filterM (wantGetBy True (Just key) af . Remote.uuid . fst) + =<< use syncDataRemotes (genTransfer Upload False) + else ifM (liftAnnex $ wantGet True (Just key) af) + ( use downloadRemotes (genTransfer Download True) , return [] ) + let unwanted' = S.difference unwanted slocs + return (unwanted', ts) + +-- Both syncDataRemotes and exportRemotes can be scanned. +-- The downloadRemotes list contains both. +scannableRemotes :: Assistant [Remote] +scannableRemotes = downloadRemotes <$> getDaemonStatus + +genTransfer :: Direction -> Bool -> Key -> S.Set UUID -> Remote -> Maybe (Remote, Transfer) +genTransfer direction want key slocs r + | direction == Upload && Remote.readonly r = Nothing + | S.member (Remote.uuid r) slocs == want = Just + (r, Transfer direction (Remote.uuid r) (fromKey id key)) + | otherwise = Nothing + +remoteHas :: Remote -> Key -> Annex Bool +remoteHas r key = elem + <$> pure (Remote.uuid r) + <*> loggedLocations key diff --git a/Assistant/Threads/TransferWatcher.hs b/Assistant/Threads/TransferWatcher.hs new file mode 100644 index 0000000000..d692a3ffd0 --- /dev/null +++ b/Assistant/Threads/TransferWatcher.hs @@ -0,0 +1,108 @@ +{- git-annex assistant transfer watching thread + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Threads.TransferWatcher where + +import Assistant.Common +import Assistant.DaemonStatus +import Assistant.TransferSlots +import Types.Transfer +import Logs.Transfer +import Utility.DirWatcher +import Utility.DirWatcher.Types +import qualified Remote +import qualified Annex +import Annex.Perms + +import Control.Concurrent +import qualified Data.Map as M + +{- This thread watches for changes to the gitAnnexTransferDir, + - and updates the DaemonStatus's map of ongoing transfers. -} +transferWatcherThread :: NamedThread +transferWatcherThread = namedThread "TransferWatcher" $ do + dir <- liftAnnex $ gitAnnexTransferDir <$> gitRepo + liftAnnex $ createAnnexDirectory dir + let hook a = Just <$> asIO2 (runHandler a) + addhook <- hook onAdd + delhook <- hook onDel + modifyhook <- hook onModify + errhook <- hook onErr + let hooks = mkWatchHooks + { addHook = addhook + , delHook = delhook + , modifyHook = modifyhook + , errHook = errhook + } + void $ liftIO $ watchDir (fromRawFilePath dir) (const False) True hooks id + debug ["watching for transfers"] + +type Handler = FilePath -> Assistant () + +{- Runs an action handler. + - + - Exceptions are ignored, otherwise a whole thread could be crashed. + -} +runHandler :: Handler -> FilePath -> Maybe FileStatus -> Assistant () +runHandler handler file _filestatus = + either (liftIO . print) (const noop) =<< tryIO <~> handler file + +{- Called when there's an error with inotify. -} +onErr :: Handler +onErr = giveup + +{- Called when a new transfer information file is written. -} +onAdd :: Handler +onAdd file = case parseTransferFile file of + Nothing -> noop + Just t -> go t =<< liftAnnex (checkTransfer t) + where + go _ Nothing = noop -- transfer already finished + go t (Just info) = do + qp <- liftAnnex $ coreQuotePath <$> Annex.getGitConfig + debug [ "transfer starting:", describeTransfer qp t info ] + r <- liftAnnex $ Remote.remoteFromUUID $ transferUUID t + updateTransferInfo t info { transferRemote = r } + +{- Called when a transfer information file is updated. + - + - The only thing that should change in the transfer info is the + - bytesComplete, so that's the only thing updated in the DaemonStatus. -} +onModify :: Handler +onModify file = case parseTransferFile file of + Nothing -> noop + Just t -> go t =<< liftIO (readTransferInfoFile Nothing file) + where + go _ Nothing = noop + go t (Just newinfo) = alterTransferInfo t $ + \i -> i { bytesComplete = bytesComplete newinfo } + +{- This thread can only watch transfer sizes when the DirWatcher supports + - tracking modifications to files. -} +watchesTransferSize :: Bool +watchesTransferSize = modifyTracked + +{- Called when a transfer information file is removed. -} +onDel :: Handler +onDel file = case parseTransferFile file of + Nothing -> noop + Just t -> do + debug [ "transfer finishing:", show t] + minfo <- removeTransfer t + + -- Run transfer hook. + m <- transferHook <$> getDaemonStatus + maybe noop (\hook -> void $ liftIO $ forkIO $ hook t) + (M.lookup (transferKey t) m) + + finished <- asIO2 finishedTransfer + void $ liftIO $ forkIO $ do + {- XXX race workaround delay. The location + - log needs to be updated before finishedTransfer + - runs. -} + threadDelay 10000000 -- 10 seconds + finished t minfo diff --git a/Assistant/Threads/Transferrer.hs b/Assistant/Threads/Transferrer.hs new file mode 100644 index 0000000000..923479e7a1 --- /dev/null +++ b/Assistant/Threads/Transferrer.hs @@ -0,0 +1,27 @@ +{- git-annex assistant data transferrer thread + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Threads.Transferrer where + +import Assistant.Common +import Assistant.TransferQueue +import Assistant.TransferSlots +import Types.Transfer +import Annex.TransferrerPool +import Utility.Batch + +{- Dispatches transfers from the queue. -} +transfererThread :: NamedThread +transfererThread = namedThread "Transferrer" $ do + rt <- liftAnnex . mkRunTransferrer + =<< liftIO getBatchCommandMaker + forever $ inTransferSlot rt $ + maybe (return Nothing) (uncurry genTransfer) + =<< getNextTransfer notrunning + where + {- Skip transfers that are already running. -} + notrunning = isNothing . startedTime diff --git a/Assistant/Threads/UpgradeWatcher.hs b/Assistant/Threads/UpgradeWatcher.hs new file mode 100644 index 0000000000..5960a70c32 --- /dev/null +++ b/Assistant/Threads/UpgradeWatcher.hs @@ -0,0 +1,109 @@ +{- git-annex assistant thread to detect when git-annex is upgraded + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Assistant.Threads.UpgradeWatcher ( + upgradeWatcherThread +) where + +import Assistant.Common +import Assistant.Upgrade +import Utility.DirWatcher +import Utility.DirWatcher.Types +import Utility.ThreadScheduler +import Assistant.Types.UrlRenderer +import Assistant.Alert +import Assistant.DaemonStatus +#ifdef WITH_WEBAPP +import Assistant.WebApp.Types +import qualified BuildInfo +#endif + +import Control.Concurrent.MVar +import qualified Data.Text as T + +data WatcherState = InStartupScan | Started | Upgrading + deriving (Eq) + +upgradeWatcherThread :: UrlRenderer -> NamedThread +upgradeWatcherThread urlrenderer = namedThread "UpgradeWatcher" $ do + whenM (liftIO checkSuccessfulUpgrade) $ + showSuccessfulUpgrade urlrenderer + go =<< liftIO upgradeFlagFile + where + go flagfile = do + mvar <- liftIO $ newMVar InStartupScan + changed <- Just <$> asIO2 (changedFile urlrenderer mvar flagfile) + let hooks = mkWatchHooks + { addHook = changed + , delHook = changed + , addSymlinkHook = changed + , modifyHook = changed + , delDirHook = changed + } + let dir = fromRawFilePath (parentDir (toRawFilePath flagfile)) + let depth = length (splitPath dir) + 1 + let nosubdirs f = length (splitPath f) == depth + void $ liftIO $ watchDir dir nosubdirs False hooks (startup mvar) + -- Ignore bogus events generated during the startup scan. + -- We ask the watcher to not generate them, but just to be safe.. + startup mvar scanner = do + r <- scanner + void $ swapMVar mvar Started + return r + +changedFile :: UrlRenderer -> MVar WatcherState -> FilePath -> FilePath -> Maybe FileStatus -> Assistant () +changedFile urlrenderer mvar flagfile file _status + | flagfile /= file = noop + | otherwise = do + state <- liftIO $ readMVar mvar + when (state == Started) $ do + setstate Upgrading + ifM (liftIO upgradeSanityCheck) + ( handleUpgrade urlrenderer + , do + debug ["new version failed sanity check; not using"] + setstate Started + ) + where + setstate = void . liftIO . swapMVar mvar + +handleUpgrade :: UrlRenderer -> Assistant () +handleUpgrade urlrenderer = do + -- Wait 2 minutes for any final upgrade changes to settle. + -- (For example, other associated files may be being put into + -- place.) Not needed when using a distribution bundle, because + -- in that case git-annex handles the upgrade in a non-racy way. + liftIO $ unlessM usingDistribution $ + threadDelaySeconds (Seconds 120) + ifM autoUpgradeEnabled + ( do + debug ["starting automatic upgrade"] + unattendedUpgrade +#ifdef WITH_WEBAPP + , do + button <- mkAlertButton True (T.pack "Finish Upgrade") urlrenderer ConfigFinishUpgradeR + void $ addAlert $ upgradeReadyAlert button +#else + , noop +#endif + ) + +showSuccessfulUpgrade :: UrlRenderer -> Assistant () +showSuccessfulUpgrade urlrenderer = do +#ifdef WITH_WEBAPP + button <- ifM autoUpgradeEnabled + ( pure Nothing + , Just <$> mkAlertButton True + (T.pack "Enable Automatic Upgrades") + urlrenderer ConfigEnableAutomaticUpgradeR + ) + void $ addAlert $ upgradeFinishedAlert button BuildInfo.packageversion +#else + noop +#endif diff --git a/Assistant/Threads/Upgrader.hs b/Assistant/Threads/Upgrader.hs new file mode 100644 index 0000000000..a0e39e4174 --- /dev/null +++ b/Assistant/Threads/Upgrader.hs @@ -0,0 +1,85 @@ +{- git-annex assistant thread to detect when upgrade is available + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Assistant.Threads.Upgrader ( + upgraderThread +) where + +import Assistant.Common +import Assistant.Upgrade + +import Assistant.Types.UrlRenderer +import Assistant.DaemonStatus +import Assistant.Alert +import Utility.NotificationBroadcaster +import qualified Annex +import qualified BuildInfo +import qualified Utility.DottedVersion as DottedVersion +import Types.Distribution +#ifdef WITH_WEBAPP +import Assistant.WebApp.Types +#endif + +import Data.Time.Clock +import qualified Data.Text as T + +upgraderThread :: UrlRenderer -> NamedThread +upgraderThread urlrenderer = namedThread "Upgrader" $ + when upgradeSupported $ do + {- Check for upgrade on startup, unless it was just + - upgraded. -} + unlessM (liftIO checkSuccessfulUpgrade) $ + checkUpgrade urlrenderer + h <- liftIO . newNotificationHandle False . networkConnectedNotifier =<< getDaemonStatus + go h =<< liftIO getCurrentTime + where + {- Wait for a network connection event. Then see if it's been + - half a day since the last upgrade check. If so, proceed with + - check. -} + go h lastchecked = do + liftIO $ waitNotification h + autoupgrade <- liftAnnex $ annexAutoUpgrade <$> Annex.getGitConfig + if autoupgrade == NoAutoUpgrade + then go h lastchecked + else do + now <- liftIO getCurrentTime + if diffUTCTime now lastchecked > halfday + then do + checkUpgrade urlrenderer + go h =<< liftIO getCurrentTime + else go h lastchecked + halfday = 12 * 60 * 60 + +checkUpgrade :: UrlRenderer -> Assistant () +checkUpgrade urlrenderer = do + debug [ "Checking if an upgrade is available." ] + go =<< downloadDistributionInfo + where + go Nothing = debug [ "Failed to check if upgrade is available." ] + go (Just d) = do + let installed = DottedVersion.normalize BuildInfo.packageversion + let avail = DottedVersion.normalize $ distributionVersion d + let old = DottedVersion.normalize <$> distributionUrgentUpgrade d + if Just installed <= old + then canUpgrade High urlrenderer d + else if installed < avail + then canUpgrade Low urlrenderer d + else debug [ "No new version found." ] + +canUpgrade :: AlertPriority -> UrlRenderer -> GitAnnexDistribution -> Assistant () +canUpgrade urgency urlrenderer d = ifM autoUpgradeEnabled + ( startDistributionDownload d + , do +#ifdef WITH_WEBAPP + button <- mkAlertButton True (T.pack "Upgrade") urlrenderer (ConfigStartUpgradeR d) + void $ addAlert (canUpgradeAlert urgency (distributionVersion d) button) +#else + noop +#endif + ) diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs new file mode 100644 index 0000000000..3a72901087 --- /dev/null +++ b/Assistant/Threads/Watcher.hs @@ -0,0 +1,373 @@ +{- git-annex assistant tree watcher + - + - Copyright 2012-2015 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE DeriveDataTypeable, OverloadedStrings, CPP #-} + +module Assistant.Threads.Watcher ( + watchThread, + WatcherControl(..), + checkCanWatch, + needLsof, + onAddSymlink, + runHandler, +) where + +import Assistant.Common +import Assistant.DaemonStatus +import Assistant.Changes +import Assistant.Types.Changes +import Assistant.Alert +import Utility.DirWatcher +import Utility.DirWatcher.Types +import Utility.InodeCache +import qualified Utility.RawFilePath as R +import qualified Annex +import qualified Annex.Queue +import qualified Git +import qualified Git.UpdateIndex +import qualified Git.LsFiles as LsFiles +import Annex.WorkTree +import Annex.CatFile +import Annex.CheckIgnore +import Annex.Link +import Annex.Content +import Annex.ReplaceFile +import Annex.InodeSentinal +import Git.Types +import Git.FilePath +import Config.GitConfig +import Utility.ThreadScheduler +import Logs.Location +import qualified Database.Keys +#ifndef mingw32_HOST_OS +import qualified Utility.Lsof as Lsof +#endif + +import Data.Typeable +import qualified Data.ByteString.Lazy as L +import qualified Control.Exception as E +import Data.Time.Clock +import System.PosixCompat.Files (fileMode, statusChangeTime) + +checkCanWatch :: Annex () +checkCanWatch + | canWatch = do +#ifndef mingw32_HOST_OS + liftIO Lsof.setup + unlessM (liftIO (inSearchPath "lsof") <||> Annex.getRead Annex.force) + needLsof +#else + noop +#endif + | otherwise = giveup "watch mode is not available on this system" + +needLsof :: Annex () +needLsof = giveup $ unlines + [ "The lsof command is needed for watch mode to be safe, and is not in PATH." + , "To override lsof checks to ensure that files are not open for writing" + , "when added to the annex, you can use --force" + , "Be warned: This can corrupt data in the annex, and make fsck complain." + ] + +{- A special exception that can be thrown to pause or resume the watcher. -} +data WatcherControl = PauseWatcher | ResumeWatcher + deriving (Show, Eq, Typeable) + +instance E.Exception WatcherControl + +watchThread :: NamedThread +watchThread = namedThread "Watcher" $ + ifM (liftAnnex $ getGitConfigVal annexAutoCommit) + ( runWatcher + , waitFor ResumeWatcher runWatcher + ) + +runWatcher :: Assistant () +runWatcher = do + startup <- asIO1 startupScan + symlinkssupported <- liftAnnex $ coreSymlinks <$> Annex.getGitConfig + addhook <- hook $ onAddFile symlinkssupported + delhook <- hook onDel + addsymlinkhook <- hook onAddSymlink + deldirhook <- hook onDelDir + errhook <- hook onErr + let hooks = mkWatchHooks + { addHook = addhook + , delHook = delhook + , addSymlinkHook = addsymlinkhook + , delDirHook = deldirhook + , errHook = errhook + } + scanevents <- liftAnnex $ annexStartupScan <$> Annex.getGitConfig + h <- liftIO $ watchDir "." ignored scanevents hooks startup + debug [ "watching", "."] + + {- Let the DirWatcher thread run until signalled to pause it, + - then wait for a resume signal, and restart. -} + waitFor PauseWatcher $ do + liftIO $ stopWatchDir h + waitFor ResumeWatcher runWatcher + where + hook a = Just <$> asIO2 (runHandler a) + +waitFor :: WatcherControl -> Assistant () -> Assistant () +waitFor sig next = do + r <- liftIO (E.try pause :: IO (Either E.SomeException ())) + case r of + Left e -> case E.fromException e of + Just s + | s == sig -> next + _ -> noop + _ -> noop + where + pause = runEvery (Seconds 86400) noop + +{- Initial scartup scan. The action should return once the scan is complete. -} +startupScan :: IO a -> Assistant a +startupScan scanner = do + liftAnnex $ showAction "scanning" + alertWhile' startupScanAlert $ do + r <- liftIO scanner + + -- Notice any files that were deleted before + -- watching was started. + top <- liftAnnex $ fromRepo Git.repoPath + (fs, cleanup) <- liftAnnex $ inRepo $ LsFiles.deleted [] [top] + forM_ fs $ \f -> do + let f' = fromRawFilePath f + liftAnnex $ onDel' f' + maybe noop recordChange =<< madeChange f' RmChange + void $ liftIO cleanup + + liftAnnex $ showAction "started" + liftIO $ putStrLn "" + + modifyDaemonStatus_ $ \s -> s { scanComplete = True } + + -- Ensure that the Committer sees any changes + -- that it did not process, and acts on them now that + -- the scan is complete. + refillChanges =<< getAnyChanges + + return (True, r) + +{- Hardcoded ignores, passed to the DirWatcher so it can avoid looking + - at the entire .git directory. Does not include .gitignores. -} +ignored :: FilePath -> Bool +ignored = ig . takeFileName + where + ig ".git" = True + ig ".gitignore" = True + ig ".gitattributes" = True +#ifdef darwin_HOST_OS + ig ".DS_Store" = True +#endif + ig _ = False + +unlessIgnored :: FilePath -> Assistant (Maybe Change) -> Assistant (Maybe Change) +unlessIgnored file a = ifM (liftAnnex $ checkIgnored (CheckGitIgnore True) (toRawFilePath file)) + ( noChange + , a + ) + +type Handler = FilePath -> Maybe FileStatus -> Assistant (Maybe Change) + +{- Runs an action handler, and if there was a change, adds it to the ChangeChan. + - + - Exceptions are ignored, otherwise a whole watcher thread could be crashed. + -} +runHandler :: Handler -> FilePath -> Maybe FileStatus -> Assistant () +runHandler handler file filestatus = void $ do + r <- tryIO <~> handler (normalize file) filestatus + case r of + Left e -> liftAnnex $ warning $ UnquotedString $ show e + Right Nothing -> noop + Right (Just change) -> recordChange change + where + normalize f + | "./" `isPrefixOf` file = drop 2 f + | otherwise = f + +shouldRestage :: DaemonStatus -> Bool +shouldRestage ds = scanComplete ds || forceRestage ds + +onAddFile :: Bool -> Handler +onAddFile symlinkssupported f fs = + onAddFile' contentchanged addassociatedfile addlink samefilestatus symlinkssupported f fs + where + addassociatedfile key file = + Database.Keys.addAssociatedFile key + =<< inRepo (toTopFilePath (toRawFilePath file)) + samefilestatus key file status = do + cache <- Database.Keys.getInodeCaches key + curr <- withTSDelta $ \delta -> + liftIO $ toInodeCache delta (toRawFilePath file) status + case (cache, curr) of + (_, Just c) -> elemInodeCaches c cache + ([], Nothing) -> return True + _ -> return False + contentchanged oldkey file = do + Database.Keys.removeAssociatedFile oldkey + =<< inRepo (toTopFilePath (toRawFilePath file)) + unlessM (inAnnex oldkey) $ + logStatus oldkey InfoMissing + addlink file key = do + mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus (toRawFilePath file) + liftAnnex $ stagePointerFile (toRawFilePath file) mode =<< hashPointerFile key + madeChange file $ LinkChange (Just key) + +onAddFile' + :: (Key -> FilePath -> Annex ()) + -> (Key -> FilePath -> Annex ()) + -> (FilePath -> Key -> Assistant (Maybe Change)) + -> (Key -> FilePath -> FileStatus -> Annex Bool) + -> Bool + -> Handler +onAddFile' contentchanged addassociatedfile addlink samefilestatus symlinkssupported file fs = do + v <- liftAnnex $ catKeyFile (toRawFilePath file) + case (v, fs) of + (Just key, Just filestatus) -> + ifM (liftAnnex $ samefilestatus key file filestatus) + {- It's possible to get an add event for + - an existing file that is not + - really modified, but it might have + - just been deleted and been put back, + - so its annex link is restaged to make sure. -} + ( ifM (shouldRestage <$> getDaemonStatus) + ( addlink file key + , noChange + ) + , guardSymlinkStandin (Just key) $ do + debug ["changed", file] + liftAnnex $ contentchanged key file + pendingAddChange file + ) + _ -> unlessIgnored file $ + guardSymlinkStandin Nothing $ do + debug ["add", file] + pendingAddChange file + where + {- On a filesystem without symlinks, we'll get changes for regular + - files that git uses to stand-in for symlinks. Detect when + - this happens, and stage the symlink, rather than annexing the + - file. -} + guardSymlinkStandin mk a + | symlinkssupported = a + | otherwise = do + linktarget <- liftAnnex $ getAnnexLinkTarget $ + toRawFilePath file + case linktarget of + Nothing -> a + Just lt -> do + case parseLinkTargetOrPointer lt of + Nothing -> noop + Just key -> liftAnnex $ + addassociatedfile key file + onAddSymlink' (Just lt) mk file fs + +{- A symlink might be an arbitrary symlink, which is just added. + - Or, if it is a git-annex symlink, ensure it points to the content + - before adding it. + -} +onAddSymlink :: Handler +onAddSymlink file filestatus = unlessIgnored file $ do + linktarget <- liftIO (catchMaybeIO $ R.readSymbolicLink file') + kv <- liftAnnex (lookupKey file') + onAddSymlink' linktarget kv file filestatus + where + file' = toRawFilePath file + +onAddSymlink' :: Maybe LinkTarget -> Maybe Key -> Handler +onAddSymlink' linktarget mk file filestatus = go mk + where + go (Just key) = do + link <- liftAnnex $ calcRepo $ gitAnnexLink (toRawFilePath file) key + if linktarget == Just link + then ensurestaged (Just link) =<< getDaemonStatus + else do + liftAnnex $ replaceWorkTreeFile file $ + makeAnnexLink link + addLink file link (Just key) + -- other symlink, not git-annex + go Nothing = ensurestaged linktarget =<< getDaemonStatus + + {- This is often called on symlinks that are already + - staged correctly. A symlink may have been deleted + - and being re-added, or added when the watcher was + - not running. So they're normally restaged to make sure. + - + - As an optimisation, during the startup scan, avoid + - restaging everything. Only links that were created since + - the last time the daemon was running are staged. + - (If the daemon has never ran before, avoid staging + - links too.) + -} + ensurestaged (Just link) daemonstatus + | shouldRestage daemonstatus = addLink file link mk + | otherwise = case filestatus of + Just s + | not (afterLastDaemonRun (statusChangeTime s) daemonstatus) -> noChange + _ -> addLink file link mk + ensurestaged Nothing _ = noChange + +{- For speed, tries to reuse the existing blob for symlink target. -} +addLink :: FilePath -> LinkTarget -> Maybe Key -> Assistant (Maybe Change) +addLink file link mk = do + debug ["add symlink", file] + liftAnnex $ do + v <- catObjectDetails $ Ref $ encodeBS $ ':':file + case v of + Just (currlink, sha, _type) + | L.fromStrict link == currlink -> + stageSymlink (toRawFilePath file) sha + _ -> stageSymlink (toRawFilePath file) + =<< hashSymlink link + madeChange file $ LinkChange mk + +onDel :: Handler +onDel file _ = do + debug ["file deleted", file] + liftAnnex $ onDel' file + madeChange file RmChange + +onDel' :: FilePath -> Annex () +onDel' file = do + topfile <- inRepo (toTopFilePath (toRawFilePath file)) + withkey $ flip Database.Keys.removeAssociatedFile topfile + Annex.Queue.addUpdateIndex =<< + inRepo (Git.UpdateIndex.unstageFile (toRawFilePath file)) + where + withkey a = maybe noop a =<< catKeyFile (toRawFilePath file) + +{- A directory has been deleted, or moved, so tell git to remove anything + - that was inside it from its cache. Since it could reappear at any time, + - use --cached to only delete it from the index. + - + - This queues up a lot of RmChanges, which assists the Committer in + - pairing up renamed files when the directory was renamed. -} +onDelDir :: Handler +onDelDir dir _ = do + debug ["directory deleted", dir] + (fs, clean) <- liftAnnex $ inRepo $ LsFiles.deleted [] [toRawFilePath dir] + let fs' = map fromRawFilePath fs + + liftAnnex $ mapM_ onDel' fs' + + -- Get the events queued up as fast as possible, so the + -- committer sees them all in one block. + now <- liftIO getCurrentTime + recordChanges $ map (\f -> Change now f RmChange) fs' + + void $ liftIO clean + noChange + +{- Called when there's an error with inotify or kqueue. -} +onErr :: Handler +onErr msg _ = do + liftAnnex $ warning (UnquotedString msg) + void $ addAlert $ warningAlert "watcher" msg + noChange diff --git a/Assistant/Threads/WebApp.hs b/Assistant/Threads/WebApp.hs new file mode 100644 index 0000000000..3fdd12d05f --- /dev/null +++ b/Assistant/Threads/WebApp.hs @@ -0,0 +1,136 @@ +{- git-annex assistant webapp thread + - + - Copyright 2012-2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE TemplateHaskell, MultiParamTypeClasses #-} +{-# LANGUAGE ViewPatterns, OverloadedStrings #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Assistant.Threads.WebApp where + +import Assistant.Common +import Assistant.WebApp +import Assistant.WebApp.Types +import Assistant.WebApp.DashBoard +import Assistant.WebApp.SideBar +import Assistant.WebApp.Notifications +import Assistant.WebApp.RepoList +import Assistant.WebApp.Configurators +import Assistant.WebApp.Configurators.Local +import Assistant.WebApp.Configurators.Ssh +import Assistant.WebApp.Configurators.Pairing +import Assistant.WebApp.Configurators.AWS +import Assistant.WebApp.Configurators.IA +import Assistant.WebApp.Configurators.WebDAV +import Assistant.WebApp.Configurators.Preferences +import Assistant.WebApp.Configurators.Unused +import Assistant.WebApp.Configurators.Edit +import Assistant.WebApp.Configurators.Delete +import Assistant.WebApp.Configurators.Fsck +import Assistant.WebApp.Configurators.Upgrade +import Assistant.WebApp.Documentation +import Assistant.WebApp.Control +import Assistant.WebApp.OtherRepos +import Assistant.WebApp.Repair +import Assistant.WebApp.Pairing +import Assistant.Types.ThreadedMonad +import Utility.WebApp +import Utility.AuthToken +import Utility.Tmp +import Utility.FileMode +import Git +import qualified Annex + +import Yesod +import Network.Socket (SockAddr, HostName, PortNumber) +import Data.Text (pack, unpack) +import qualified Network.Wai.Handler.WarpTLS as TLS +import Network.Wai.Middleware.RequestLogger + +mkYesodDispatch "WebApp" $(parseRoutesFile "Assistant/WebApp/routes") + +type Url = String + +webAppThread + :: AssistantData + -> UrlRenderer + -> Bool + -> Maybe String + -> Maybe (IO Url) + -> Maybe HostName + -> Maybe PortNumber + -> Maybe (Url -> FilePath -> IO ()) + -> NamedThread +webAppThread assistantdata urlrenderer noannex cannotrun postfirstrun listenhost listenport onstartup = thread $ liftIO $ do + listenhost' <- if isJust listenhost + then pure listenhost + else getAnnex $ annexListen <$> Annex.getGitConfig + listenport' <- if isJust listenport + then pure listenport + else getAnnex $ annexPort <$> Annex.getGitConfig + tlssettings <- getAnnex getTlsSettings + webapp <- WebApp + <$> pure assistantdata + <*> genAuthToken 128 + <*> getreldir + <*> pure staticRoutes + <*> pure postfirstrun + <*> pure cannotrun + <*> pure noannex + <*> pure listenhost' + <*> newWormholePairingState + setUrlRenderer urlrenderer $ yesodRender webapp (pack "") + app <- toWaiAppPlain webapp + app' <- ifM (fromMaybe False <$> (getAnnex $ Just . annexDebug <$> Annex.getGitConfig)) + ( return $ logStdout app + , return app + ) + runWebApp tlssettings listenhost' listenport' app' $ \addr -> if noannex + then withTmpFile "webapp.html" $ \tmpfile h -> do + hClose h + go tlssettings addr webapp tmpfile Nothing + else do + htmlshim <- getAnnex' $ fromRepo gitAnnexHtmlShim + urlfile <- getAnnex' $ fromRepo gitAnnexUrlFile + go tlssettings addr webapp + (fromRawFilePath htmlshim) + (Just urlfile) + where + -- The webapp thread does not wait for the startupSanityCheckThread + -- to finish, so that the user interface remains responsive while + -- that's going on. + thread = namedThreadUnchecked "WebApp" + getreldir + | noannex = return Nothing + | otherwise = Just <$> + (relHome . fromRawFilePath =<< absPath =<< getAnnex' (fromRepo repoPath)) + go tlssettings addr webapp htmlshim urlfile = do + let url = myUrl tlssettings webapp addr + maybe noop (`writeFileProtected` url) urlfile + writeHtmlShim "Starting webapp..." url htmlshim + maybe noop (\a -> a url htmlshim) onstartup + + getAnnex a + | noannex = pure Nothing + | otherwise = getAnnex' a + getAnnex' = runThreadState (threadState assistantdata) + +myUrl :: Maybe TLS.TLSSettings -> WebApp -> SockAddr -> Url +myUrl tlssettings webapp addr = unpack $ yesodRender webapp urlbase DashboardR [] + where + urlbase = pack $ proto ++ "://" ++ show addr + proto + | isJust tlssettings = "https" + | otherwise = "http" + +getTlsSettings :: Annex (Maybe TLS.TLSSettings) +getTlsSettings = do + cert <- fromRepo gitAnnexWebCertificate + privkey <- fromRepo gitAnnexWebPrivKey + ifM (liftIO $ allM doesFileExist [cert, privkey]) + ( return $ Just $ TLS.tlsSettings cert privkey + , return Nothing + ) diff --git a/Assistant/TransferQueue.hs b/Assistant/TransferQueue.hs new file mode 100644 index 0000000000..571899bb6d --- /dev/null +++ b/Assistant/TransferQueue.hs @@ -0,0 +1,236 @@ +{- git-annex assistant pending transfer queue + - + - Copyright 2012-2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns #-} + +module Assistant.TransferQueue ( + TransferQueue, + Schedule(..), + newTransferQueue, + getTransferQueue, + queueTransfers, + queueTransfersMatching, + queueDeferredDownloads, + queueTransfer, + queueTransferAt, + queueTransferWhenSmall, + getNextTransfer, + getMatchingTransfers, + dequeueTransfers, +) where + +import Assistant.Common +import Assistant.DaemonStatus +import Assistant.Types.TransferQueue +import Types.Transfer +import Logs.Transfer +import Types.Remote +import qualified Remote +import qualified Types.Remote as Remote +import qualified Annex +import Annex.Wanted +import Utility.TList + +import Control.Concurrent.STM +import qualified Data.Map.Strict as M +import qualified Data.Set as S + +type Reason = String + +{- Reads the queue's content without blocking or changing it. -} +getTransferQueue :: Assistant [(Transfer, TransferInfo)] +getTransferQueue = (atomically . readTList . queuelist) <<~ transferQueue + +stubInfo :: AssociatedFile -> Remote -> TransferInfo +stubInfo f r = stubTransferInfo + { transferRemote = Just r + , associatedFile = f + } + +{- Adds transfers to queue for some of the known remotes. + - Honors preferred content settings, only transferring wanted files. -} +queueTransfers :: Reason -> Schedule -> Key -> AssociatedFile -> Direction -> Assistant Bool +queueTransfers = queueTransfersMatching (const True) + +{- Adds transfers to queue for some of the known remotes, that match a + - condition. Honors preferred content settings. -} +queueTransfersMatching :: (UUID -> Bool) -> Reason -> Schedule -> Key -> AssociatedFile -> Direction -> Assistant Bool +queueTransfersMatching matching reason schedule k f direction + | direction == Download = ifM (liftAnnex $ wantGet True (Just k) f) + ( go + , return False + ) + | otherwise = go + where + go = do + rs <- liftAnnex . selectremotes =<< getDaemonStatus + let matchingrs = filter (matching . Remote.uuid) rs + if null matchingrs + then do + defer + return False + else do + forM_ matchingrs $ \r -> + enqueue reason schedule (gentransfer r) (stubInfo f r) + return True + selectremotes st + {- Queue downloads from all remotes that + - have the key. The list of remotes is ordered with + - cheapest first. More expensive ones will only be tried + - if downloading from a cheap one fails. -} + | direction == Download = do + s <- locs + return $ filter (inset s) (downloadRemotes st) + {- Upload to all remotes that want the content and don't + - already have it. -} + | otherwise = do + s <- locs + filterM (wantGetBy True (Just k) f . Remote.uuid) $ + filter (\r -> not (inset s r || Remote.readonly r)) + (syncDataRemotes st) + where + locs = S.fromList . map Remote.uuid <$> Remote.keyPossibilities (Remote.IncludeIgnored False) k + inset s r = S.member (Remote.uuid r) s + gentransfer r = Transfer + { transferDirection = direction + , transferKeyData = fromKey id k + , transferUUID = Remote.uuid r + } + defer + {- Defer this download, as no known remote has the key. -} + | direction == Download = do + q <- getAssistant transferQueue + void $ liftIO $ atomically $ + consTList (deferreddownloads q) (k, f) + | otherwise = noop + +{- Queues any deferred downloads that can now be accomplished, leaving + - any others in the list to try again later. -} +queueDeferredDownloads :: Reason -> Schedule -> Assistant () +queueDeferredDownloads reason schedule = do + q <- getAssistant transferQueue + l <- liftIO $ atomically $ readTList (deferreddownloads q) + rs <- downloadRemotes <$> getDaemonStatus + left <- filterM (queue rs) l + unless (null left) $ + liftIO $ atomically $ appendTList (deferreddownloads q) left + where + queue rs (k, f) = do + uuids <- liftAnnex $ Remote.keyLocations k + let sources = filter (\r -> uuid r `elem` uuids) rs + unless (null sources) $ + forM_ sources $ \r -> + enqueue reason schedule + (gentransfer r) (stubInfo f r) + return $ null sources + where + gentransfer r = Transfer + { transferDirection = Download + , transferKeyData = fromKey id k + , transferUUID = Remote.uuid r + } + +enqueue :: Reason -> Schedule -> Transfer -> TransferInfo -> Assistant () +enqueue reason schedule t info + | schedule == Next = go consTList + | otherwise = go snocTList + where + go modlist = whenM (add modlist) $ do + qp <- liftAnnex $ coreQuotePath <$> Annex.getGitConfig + debug [ "queued", describeTransfer qp t info, ": " ++ reason ] + notifyTransfer + add modlist = do + q <- getAssistant transferQueue + dstatus <- getAssistant daemonStatusHandle + liftIO $ atomically $ ifM (checkRunningTransferSTM dstatus t) + ( return False + , do + l <- readTList (queuelist q) + if (t `notElem` map fst l) + then do + void $ modifyTVar' (queuesize q) succ + void $ modlist (queuelist q) (t, info) + return True + else return False + ) + +{- Adds a transfer to the queue. -} +queueTransfer :: Reason -> Schedule -> AssociatedFile -> Transfer -> Remote -> Assistant () +queueTransfer reason schedule f t remote = + enqueue reason schedule t (stubInfo f remote) + +{- Blocks until the queue is no larger than a given size, and then adds a + - transfer to the queue. -} +queueTransferAt :: Int -> Reason -> Schedule -> AssociatedFile -> Transfer -> Remote -> Assistant () +queueTransferAt wantsz reason schedule f t remote = do + q <- getAssistant transferQueue + liftIO $ atomically $ do + sz <- readTVar (queuesize q) + unless (sz <= wantsz) $ + retry -- blocks until queuesize changes + enqueue reason schedule t (stubInfo f remote) + +queueTransferWhenSmall :: Reason -> AssociatedFile -> Transfer -> Remote -> Assistant () +queueTransferWhenSmall reason = queueTransferAt 10 reason Later + +{- Blocks until a pending transfer is available in the queue, + - and removes it. + - + - Checks that it's acceptable, before adding it to the + - currentTransfers map. If it's not acceptable, it's discarded. + - + - This is done in a single STM transaction, so there is no window + - where an observer sees an inconsistent status. -} +getNextTransfer :: (TransferInfo -> Bool) -> Assistant (Maybe (Transfer, TransferInfo)) +getNextTransfer acceptable = do + q <- getAssistant transferQueue + dstatus <- getAssistant daemonStatusHandle + liftIO $ atomically $ do + sz <- readTVar (queuesize q) + if sz < 1 + then retry -- blocks until queuesize changes + else readTList (queuelist q) >>= \case + [] -> retry -- blocks until something is queued + (r@(t,info):rest) -> do + void $ modifyTVar' (queuesize q) pred + setTList (queuelist q) rest + if acceptable info + then do + adjustTransfersSTM dstatus $ + M.insert t info + return $ Just r + else return Nothing + +{- Moves transfers matching a condition from the queue, to the + - currentTransfers map. -} +getMatchingTransfers :: (Transfer -> Bool) -> Assistant [(Transfer, TransferInfo)] +getMatchingTransfers c = do + q <- getAssistant transferQueue + dstatus <- getAssistant daemonStatusHandle + liftIO $ atomically $ do + ts <- dequeueTransfersSTM q c + unless (null ts) $ + adjustTransfersSTM dstatus $ \m -> M.union m $ M.fromList ts + return ts + +{- Removes transfers matching a condition from the queue, and returns the + - removed transfers. -} +dequeueTransfers :: (Transfer -> Bool) -> Assistant [(Transfer, TransferInfo)] +dequeueTransfers c = do + q <- getAssistant transferQueue + removed <- liftIO $ atomically $ dequeueTransfersSTM q c + unless (null removed) $ + notifyTransfer + return removed + +dequeueTransfersSTM :: TransferQueue -> (Transfer -> Bool) -> STM [(Transfer, TransferInfo)] +dequeueTransfersSTM q c = do + !(removed, ts) <- partition (c . fst) <$> readTList (queuelist q) + let !len = length ts + void $ writeTVar (queuesize q) len + setTList (queuelist q) ts + return removed diff --git a/Assistant/TransferSlots.hs b/Assistant/TransferSlots.hs new file mode 100644 index 0000000000..c16871f468 --- /dev/null +++ b/Assistant/TransferSlots.hs @@ -0,0 +1,321 @@ +{- git-annex assistant transfer slots + - + - Copyright 2012-2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Assistant.TransferSlots where + +import Control.Concurrent.STM + +import Assistant.Common +import Utility.ThreadScheduler +import Utility.NotificationBroadcaster +import Assistant.Types.TransferSlots +import Assistant.DaemonStatus +import Annex.TransferrerPool +import Types.TransferrerPool +import Assistant.Types.TransferQueue +import Assistant.TransferQueue +import Assistant.Alert +import Assistant.Alert.Utility +import Assistant.Commits +import Assistant.Drop +import Types.Transfer +import Logs.Transfer +import Logs.Location +import qualified Git +import qualified Annex +import qualified Remote +import qualified Types.Remote as Remote +import Annex.Content +import Annex.Wanted +import Annex.StallDetection +import Utility.Batch +import Types.NumCopies + +import Data.Either +import qualified Data.Map as M +import qualified Control.Exception as E +import Control.Concurrent +import qualified Control.Concurrent.MSemN as MSemN +#ifndef mingw32_HOST_OS +import System.Posix.Process (getProcessGroupIDOf) +import System.Posix.Signals (signalProcessGroup, sigTERM, sigKILL) +#else +import System.Win32.Process (terminateProcessById) +#endif + +type TransferGenerator = Assistant (Maybe (Transfer, TransferInfo, Transferrer -> Assistant ())) + +{- Waits until a transfer slot becomes available, then runs a + - TransferGenerator, and then runs the transfer action in its own thread. + -} +inTransferSlot :: RunTransferrer -> TransferGenerator -> Assistant () +inTransferSlot rt gen = do + flip MSemN.wait 1 <<~ transferSlots + runTransferThread rt =<< gen + +{- Runs a TransferGenerator, and its transfer action, + - without waiting for a slot to become available. -} +inImmediateTransferSlot :: RunTransferrer -> TransferGenerator -> Assistant () +inImmediateTransferSlot rt gen = do + flip MSemN.signal (-1) <<~ transferSlots + runTransferThread rt =<< gen + +{- Runs a transfer action, in an already allocated transfer slot. + - Once it finishes, frees the transfer slot. + - + - Note that the action is subject to being killed when the transfer + - is canceled or paused. + - + - A PauseTransfer exception is handled by letting the action be killed, + - then pausing the thread until a ResumeTransfer exception is raised, + - then rerunning the action. + -} +runTransferThread :: RunTransferrer -> Maybe (Transfer, TransferInfo, Transferrer -> Assistant ()) -> Assistant () +runTransferThread _ Nothing = flip MSemN.signal 1 <<~ transferSlots +runTransferThread rt (Just (t, info, a)) = do + d <- getAssistant id + mkcheck <- checkNetworkConnections + <$> getAssistant daemonStatusHandle + aio <- asIO1 a + tid <- liftIO $ forkIO $ runTransferThread' mkcheck rt d aio + updateTransferInfo t $ info { transferTid = Just tid } + +runTransferThread' :: MkCheckTransferrer -> RunTransferrer -> AssistantData -> (Transferrer -> IO ()) -> IO () +runTransferThread' mkcheck rt d run = go + where + go = catchPauseResume $ do + p <- runAssistant d $ liftAnnex $ + Annex.getRead Annex.transferrerpool + signalactonsvar <- runAssistant d $ liftAnnex $ + Annex.getRead Annex.signalactions + withTransferrer' True signalactonsvar mkcheck rt p run + pause = catchPauseResume $ + runEvery (Seconds 86400) noop + {- Note: This must use E.try, rather than E.catch. + - When E.catch is used, and has called go in its exception + - handler, Control.Concurrent.throwTo will block sometimes + - when signaling. Using E.try avoids the problem. -} + catchPauseResume a' = do + r <- E.try a' :: IO (Either E.SomeException ()) + case r of + Left e -> case E.fromException e of + Just PauseTransfer -> pause + Just ResumeTransfer -> go + _ -> done + _ -> done + done = runAssistant d $ + flip MSemN.signal 1 <<~ transferSlots + +{- By the time this is called, the daemonstatus's currentTransfers map should + - already have been updated to include the transfer. -} +genTransfer :: Transfer -> TransferInfo -> TransferGenerator +genTransfer t info = case transferRemote info of + Just remote -> ifM (unpluggedremovabledrive remote) + ( do + -- optimisation, since the transfer would fail + liftAnnex $ recordFailedTransfer t info + void $ removeTransfer t + return Nothing + , ifM (liftAnnex $ shouldTransfer t info) + ( do + qp <- liftAnnex $ coreQuotePath <$> Annex.getGitConfig + debug [ "Transferring:" , describeTransfer qp t info ] + notifyTransfer + let sd = getStallDetection (transferDirection t) remote + return $ Just (t, info, go remote sd) + , do + qp <- liftAnnex $ coreQuotePath <$> Annex.getGitConfig + debug [ "Skipping unnecessary transfer:", + describeTransfer qp t info ] + void $ removeTransfer t + finishedTransfer t (Just info) + return Nothing + ) + ) + _ -> return Nothing + where + direction = transferDirection t + isdownload = direction == Download + + unpluggedremovabledrive remote = Git.repoIsLocalUnknown + <$> liftAnnex (Remote.getRepo remote) + + {- Alerts are only shown for successful transfers. + - Transfers can temporarily fail for many reasons, + - so there's no point in bothering the user about + - those. The assistant should recover. + - + - After a successful upload, handle dropping it from + - here, if desired. In this case, the remote it was + - uploaded to is known to have it. + - + - Also, after a successful transfer, the location + - log has changed. Indicate that a commit has been + - made, in order to queue a push of the git-annex + - branch out to remotes that did not participate + - in the transfer. + - + - If the process failed, it could have crashed, + - so remove the transfer from the list of current + - transfers, just in case it didn't stop + - in a way that lets the TransferWatcher do its + - usual cleanup. However, first check if something else is + - running the transfer, to avoid removing active transfers. + -} + go remote sd transferrer = ifM (isRight <$> performTransfer sd AssistantLevel liftAnnex (transferRemote info) t info transferrer) + ( do + case associatedFile info of + AssociatedFile Nothing -> noop + AssociatedFile (Just af) -> void $ + addAlert $ makeAlertFiller True $ + transferFileAlert direction True (fromRawFilePath af) + unless isdownload $ + handleDrops + ("object uploaded to " ++ show remote) + True (transferKey t) + (associatedFile info) + [mkVerifiedCopy RecentlyVerifiedCopy remote] + void recordCommit + , whenM (liftAnnex $ isNothing <$> checkTransfer t) $ + void $ removeTransfer t + ) + +{- Called right before a transfer begins, this is a last chance to avoid + - unnecessary transfers. + - + - For downloads, we obviously don't need to download if the already + - have the object. + - + - Smilarly, for uploads, check if the remote is known to already have + - the object. + - + - Also, uploads get queued to all remotes, in order of cost. + - This may mean, for example, that an object is uploaded over the LAN + - to a locally paired client, and once that upload is done, a more + - expensive transfer remote no longer wants the object. (Since + - all the clients have it already.) So do one last check if this is still + - preferred content. + - + - We'll also do one last preferred content check for downloads. An + - example of a case where this could be needed is if a download is queued + - for a file that gets moved out of an archive directory -- but before + - that download can happen, the file is put back in the archive. + -} +shouldTransfer :: Transfer -> TransferInfo -> Annex Bool +shouldTransfer t info + | transferDirection t == Download = + (not <$> inAnnex key) <&&> wantGet True (Just key) file + | transferDirection t == Upload = case transferRemote info of + Nothing -> return False + Just r -> notinremote r + <&&> wantGetBy True (Just key) file (Remote.uuid r) + | otherwise = return False + where + key = transferKey t + file = associatedFile info + + {- Trust the location log to check if the remote already has + - the key. This avoids a roundtrip to the remote. -} + notinremote r = notElem (Remote.uuid r) <$> loggedLocations key + +{- Queue uploads of files downloaded to us, spreading them + - out to other reachable remotes. + - + - Downloading a file may have caused a remote to not want it; + - so check for drops from remotes. + - + - Uploading a file may cause the local repo, or some other remote to not + - want it; handle that too. + -} +finishedTransfer :: Transfer -> Maybe TransferInfo -> Assistant () +finishedTransfer t (Just info) + | transferDirection t == Download = + whenM (liftAnnex $ inAnnex $ transferKey t) $ do + dodrops False + void $ queueTransfersMatching (/= transferUUID t) + "newly received object" + Later (transferKey t) (associatedFile info) Upload + | otherwise = dodrops True + where + dodrops fromhere = do + qp <- liftAnnex $ coreQuotePath <$> Annex.getGitConfig + handleDrops + ("drop wanted after " ++ describeTransfer qp t info) + fromhere (transferKey t) (associatedFile info) [] +finishedTransfer _ _ = noop + +{- Pause a running transfer. -} +pauseTransfer :: Transfer -> Assistant () +pauseTransfer = cancelTransfer True + +{- Cancel a running transfer. -} +cancelTransfer :: Bool -> Transfer -> Assistant () +cancelTransfer pause t = do + m <- getCurrentTransfers + unless pause $ + {- remove queued transfer -} + void $ dequeueTransfers $ equivilantTransfer t + {- stop running transfer -} + maybe noop stop (M.lookup t m) + where + stop info = do + {- When there's a thread associated with the + - transfer, it's signaled first, to avoid it + - displaying any alert about the transfer having + - failed when the transfer process is killed. -} + liftIO $ maybe noop signalthread $ transferTid info + liftIO $ maybe noop killproc $ transferPid info + if pause + then void $ alterTransferInfo t $ + \i -> i { transferPaused = True } + else void $ removeTransfer t + signalthread tid + | pause = throwTo tid PauseTransfer + | otherwise = killThread tid + killproc pid = void $ tryIO $ do +#ifndef mingw32_HOST_OS + {- In order to stop helper processes like rsync, + - kill the whole process group of the process + - running the transfer. -} + g <- getProcessGroupIDOf pid + let signal sig = void $ tryIO $ signalProcessGroup sig g + signal sigTERM + threadDelay 50000 -- 0.05 second grace period + signal sigKILL +#else + terminateProcessById pid +#endif + +{- Start or resume a transfer. -} +startTransfer :: Transfer -> Assistant () +startTransfer t = do + m <- getCurrentTransfers + maybe startqueued go (M.lookup t m) + where + go info = maybe (start info) resume $ transferTid info + startqueued = do + is <- map snd <$> getMatchingTransfers (== t) + maybe noop start $ headMaybe is + resume tid = do + alterTransferInfo t $ \i -> i { transferPaused = False } + liftIO $ throwTo tid ResumeTransfer + start info = do + rt <- liftAnnex . mkRunTransferrer + =<< liftIO getBatchCommandMaker + inImmediateTransferSlot rt $ + genTransfer t info + +getCurrentTransfers :: Assistant TransferMap +getCurrentTransfers = currentTransfers <$> getDaemonStatus + +checkNetworkConnections :: DaemonStatusHandle -> MkCheckTransferrer +checkNetworkConnections dstatushandle = do + dstatus <- atomically $ readTVar dstatushandle + h <- newNotificationHandle False (networkConnectedNotifier dstatus) + return $ not <$> checkNotification h diff --git a/Assistant/Types/Alert.hs b/Assistant/Types/Alert.hs new file mode 100644 index 0000000000..e6dcdc9ce2 --- /dev/null +++ b/Assistant/Types/Alert.hs @@ -0,0 +1,79 @@ +{- git-annex assistant alert types + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.Alert where + +import Utility.Tense + +import Data.Text (Text) +import qualified Data.Map as M + +{- Different classes of alerts are displayed differently. -} +data AlertClass = Success | Message | Activity | Warning | Error + deriving (Eq, Ord) + +data AlertPriority = Filler | Low | Medium | High | Pinned + deriving (Eq, Ord) + +{- An alert can have an name, which is used to combine it with other similar + - alerts. -} +data AlertName + = FileAlert TenseChunk + | SanityCheckFixAlert + | WarningAlert String + | PairAlert String + | ConnectionNeededAlert + | RemoteRemovalAlert String + | CloudRepoNeededAlert + | SyncAlert + | NotFsckedAlert + | UpgradeAlert + | UnusedFilesAlert + deriving (Eq) + +{- The first alert is the new alert, the second is an old alert. + - Should return a modified version of the old alert. -} +type AlertCombiner = Alert -> Alert -> Maybe Alert + +data Alert = Alert + { alertClass :: AlertClass + , alertHeader :: Maybe TenseText + , alertMessageRender :: Alert -> TenseText + , alertData :: [TenseChunk] + , alertCounter :: Int + , alertBlockDisplay :: Bool + , alertClosable :: Bool + , alertPriority :: AlertPriority + , alertIcon :: Maybe AlertIcon + , alertCombiner :: Maybe AlertCombiner + , alertName :: Maybe AlertName + , alertButtons :: [AlertButton] + } + +data AlertIcon = ActivityIcon | SyncIcon | SuccessIcon | ErrorIcon | InfoIcon | UpgradeIcon | ConnectionIcon + +type AlertMap = M.Map AlertId Alert + +{- Higher AlertId indicates a more recent alert. -} +newtype AlertId = AlertId Integer + deriving (Read, Show, Eq, Ord) + +firstAlertId :: AlertId +firstAlertId = AlertId 0 + +nextAlertId :: AlertId -> AlertId +nextAlertId (AlertId i) = AlertId $ succ i + +{- When clicked, a button always redirects to a URL + - It may also run an IO action in the background, which is useful + - to make the button close or otherwise change the alert. -} +data AlertButton = AlertButton + { buttonLabel :: Text + , buttonUrl :: Text + , buttonAction :: Maybe (AlertId -> IO ()) + , buttonPrimary :: Bool + } diff --git a/Assistant/Types/BranchChange.hs b/Assistant/Types/BranchChange.hs new file mode 100644 index 0000000000..1f4128b754 --- /dev/null +++ b/Assistant/Types/BranchChange.hs @@ -0,0 +1,20 @@ +{- git-annex assistant git-annex branch change tracking + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.BranchChange where + +import Control.Concurrent.MSampleVar +import Control.Applicative +import Prelude + +newtype BranchChangeHandle = BranchChangeHandle (MSampleVar ()) + +newBranchChangeHandle :: IO BranchChangeHandle +newBranchChangeHandle = BranchChangeHandle <$> newEmptySV + +fromBranchChangeHandle :: BranchChangeHandle -> MSampleVar () +fromBranchChangeHandle (BranchChangeHandle v) = v diff --git a/Assistant/Types/Changes.hs b/Assistant/Types/Changes.hs new file mode 100644 index 0000000000..a08810ba54 --- /dev/null +++ b/Assistant/Types/Changes.hs @@ -0,0 +1,101 @@ +{- git-annex assistant change tracking + - + - Copyright 2012-2015 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE BangPatterns #-} + +module Assistant.Types.Changes where + +import Types.KeySource +import Types.Key +import Utility.TList +import Utility.FileSystemEncoding +import Annex.Ingest + +import Control.Concurrent.STM +import Data.Time.Clock +import qualified Data.Set as S + +{- An un-ordered pool of Changes that have been noticed and should be + - staged and committed. Changes will typically be in order, but ordering + - may be lost. In any case, order should not matter, as any given Change + - may later be reverted by a later Change (ie, a file is added and then + - deleted). Code that processes the changes needs to deal with such + - scenarios. + -} +type ChangePool = TList Change + +newChangePool :: IO ChangePool +newChangePool = atomically newTList + +data Change + = Change + { changeTime :: UTCTime + , _changeFile :: FilePath + , changeInfo :: ChangeInfo + } + | PendingAddChange + { changeTime ::UTCTime + , _changeFile :: FilePath + } + | InProcessAddChange + { changeTime ::UTCTime + , lockedDown :: LockedDown + } + deriving (Show) + +data ChangeInfo = AddKeyChange Key | AddFileChange | LinkChange (Maybe Key) | RmChange + deriving (Show, Eq, Ord) + +changeInfoKey :: ChangeInfo -> Maybe Key +changeInfoKey (AddKeyChange k) = Just k +changeInfoKey (LinkChange (Just k)) = Just k +changeInfoKey _ = Nothing + +changeFile :: Change -> FilePath +changeFile (Change _ f _) = f +changeFile (PendingAddChange _ f) = f +changeFile (InProcessAddChange _ ld) = fromRawFilePath $ keyFilename $ keySource ld + +isPendingAddChange :: Change -> Bool +isPendingAddChange (PendingAddChange {}) = True +isPendingAddChange _ = False + +isInProcessAddChange :: Change -> Bool +isInProcessAddChange (InProcessAddChange {}) = True +isInProcessAddChange _ = False + +retryChange :: Change -> Change +retryChange c@(InProcessAddChange time _) = + PendingAddChange time $ changeFile c +retryChange c = c + +finishedChange :: Change -> Key -> Change +finishedChange c@(InProcessAddChange {}) k = Change + { changeTime = changeTime c + , _changeFile = changeFile c + , changeInfo = AddKeyChange k + } +finishedChange c _ = c + +{- Combine PendingAddChanges that are for the same file. + - Multiple such often get noticed when eg, a file is opened and then + - closed in quick succession. -} +simplifyChanges :: [Change] -> [Change] +simplifyChanges [c] = [c] +simplifyChanges cl = go cl S.empty [] + where + go [] _ l = reverse l + go (c:cs) seen l + | isPendingAddChange c = + if S.member f seen + then go cs seen l + else + let !seen' = S.insert f seen + in go cs seen' (c:l) + | otherwise = go cs seen (c:l) + where + f = changeFile c diff --git a/Assistant/Types/Commits.hs b/Assistant/Types/Commits.hs new file mode 100644 index 0000000000..d7c13b027e --- /dev/null +++ b/Assistant/Types/Commits.hs @@ -0,0 +1,19 @@ +{- git-annex assistant commit tracking + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.Commits where + +import Utility.TList + +import Control.Concurrent.STM + +type CommitChan = TList Commit + +data Commit = Commit + +newCommitChan :: IO CommitChan +newCommitChan = atomically newTList diff --git a/Assistant/Types/CredPairCache.hs b/Assistant/Types/CredPairCache.hs new file mode 100644 index 0000000000..d70c47b943 --- /dev/null +++ b/Assistant/Types/CredPairCache.hs @@ -0,0 +1,18 @@ +{- git-annex assistant CredPair cache. + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.CredPairCache where + +import Types.Creds + +import Control.Concurrent +import qualified Data.Map as M + +type CredPairCache = MVar (M.Map Login Password) + +newCredPairCache :: IO CredPairCache +newCredPairCache = newMVar M.empty diff --git a/Assistant/Types/DaemonStatus.hs b/Assistant/Types/DaemonStatus.hs new file mode 100644 index 0000000000..9c547aa10a --- /dev/null +++ b/Assistant/Types/DaemonStatus.hs @@ -0,0 +1,119 @@ +{- git-annex assistant daemon status + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.DaemonStatus where + +import Annex.Common +import Assistant.Pairing +import Utility.NotificationBroadcaster +import Types.Transfer +import Assistant.Types.ThreadName +import Assistant.Types.Alert +import Utility.Url + +import Control.Concurrent.STM +import Control.Concurrent.MVar +import Control.Concurrent.Async +import Data.Time.Clock.POSIX +import qualified Data.Map as M +import qualified Data.Set as S + +data DaemonStatus = DaemonStatus + -- All the named threads that comprise the daemon, + -- and actions to run to restart them. + { startedThreads :: M.Map ThreadName (Async (), IO ()) + -- False when the daemon is performing its startup scan + , scanComplete :: Bool + -- True when all files should be restaged. + , forceRestage :: Bool + -- Time when a previous process of the daemon was running ok + , lastRunning :: Maybe POSIXTime + -- True when the daily sanity checker is running + , sanityCheckRunning :: Bool + -- Last time the daily sanity checker ran + , lastSanityCheck :: Maybe POSIXTime + -- True when a scan for file transfers is running + , transferScanRunning :: Bool + -- Currently running file content transfers + , currentTransfers :: TransferMap + -- Messages to display to the user. + , alertMap :: AlertMap + , lastAlertId :: AlertId + -- Ordered list of all remotes that can be synced with + , syncRemotes :: [Remote] + -- Ordered list of remotes to sync git with + , syncGitRemotes :: [Remote] + -- Ordered list of remotes to sync data with + , syncDataRemotes :: [Remote] + -- Ordered list of remotes to export to + , exportRemotes :: [Remote] + -- Ordered list of remotes that data can be downloaded from + , downloadRemotes :: [Remote] + -- Are we syncing to any cloud remotes? + , syncingToCloudRemote :: Bool + -- Set of uuids of remotes that are currently connected. + , currentlyConnectedRemotes :: S.Set UUID + -- Pairing request that is in progress. + , pairingInProgress :: Maybe PairingInProgress + -- Broadcasts notifications about all changes to the DaemonStatus. + , changeNotifier :: NotificationBroadcaster + -- Broadcasts notifications when queued or current transfers change. + , transferNotifier :: NotificationBroadcaster + -- Broadcasts notifications when there's a change to the alerts. + , alertNotifier :: NotificationBroadcaster + -- Broadcasts notifications when the syncRemotes change. + , syncRemotesNotifier :: NotificationBroadcaster + -- Broadcasts notifications when the scheduleLog changes. + , scheduleLogNotifier :: NotificationBroadcaster + -- Broadcasts a notification once the startup sanity check has run. + , startupSanityCheckNotifier :: NotificationBroadcaster + -- Broadcasts notifications when the network is connected. + , networkConnectedNotifier :: NotificationBroadcaster + -- Broadcasts notifications when a global redirect is needed. + , globalRedirNotifier :: NotificationBroadcaster + , globalRedirUrl :: Maybe URLString + -- Actions to run after a Key is transferred. + , transferHook :: M.Map Key (Transfer -> IO ()) + -- MVars to signal when a remote gets connected. + , connectRemoteNotifiers :: M.Map UUID [MVar ()] + } + +type TransferMap = M.Map Transfer TransferInfo + +type DaemonStatusHandle = TVar DaemonStatus + +newDaemonStatus :: IO DaemonStatus +newDaemonStatus = DaemonStatus + <$> pure M.empty + <*> pure False + <*> pure False + <*> pure Nothing + <*> pure False + <*> pure Nothing + <*> pure False + <*> pure M.empty + <*> pure M.empty + <*> pure firstAlertId + <*> pure [] + <*> pure [] + <*> pure [] + <*> pure [] + <*> pure [] + <*> pure False + <*> pure S.empty + <*> pure Nothing + <*> newNotificationBroadcaster + <*> newNotificationBroadcaster + <*> newNotificationBroadcaster + <*> newNotificationBroadcaster + <*> newNotificationBroadcaster + <*> newNotificationBroadcaster + <*> newNotificationBroadcaster + <*> newNotificationBroadcaster + <*> pure Nothing + <*> pure M.empty + <*> pure M.empty diff --git a/Assistant/Types/NamedThread.hs b/Assistant/Types/NamedThread.hs new file mode 100644 index 0000000000..866affc90c --- /dev/null +++ b/Assistant/Types/NamedThread.hs @@ -0,0 +1,21 @@ +{- named threads + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.NamedThread where + +import Assistant.Monad +import Assistant.Types.ThreadName + +{- Information about a named thread that can be run. -} +data NamedThread = NamedThread Bool ThreadName (Assistant ()) + +namedThread :: String -> Assistant () -> NamedThread +namedThread = NamedThread True . ThreadName + +{- A named thread that can start running before the startup sanity check. -} +namedThreadUnchecked :: String -> Assistant () -> NamedThread +namedThreadUnchecked = NamedThread False . ThreadName diff --git a/Assistant/Types/Pushes.hs b/Assistant/Types/Pushes.hs new file mode 100644 index 0000000000..57cafb4fae --- /dev/null +++ b/Assistant/Types/Pushes.hs @@ -0,0 +1,24 @@ +{- git-annex assistant push tracking + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.Pushes where + +import Annex.Common + +import Control.Concurrent.STM +import Data.Time.Clock +import qualified Data.Map as M + +{- Track the most recent push failure for each remote. -} +type PushMap = M.Map Remote UTCTime +type FailedPushMap = TMVar PushMap + +{- The TMVar starts empty, and is left empty when there are no + - failed pushes. This way we can block until there are some failed pushes. + -} +newFailedPushMap :: IO FailedPushMap +newFailedPushMap = atomically newEmptyTMVar diff --git a/Assistant/Types/RemoteControl.hs b/Assistant/Types/RemoteControl.hs new file mode 100644 index 0000000000..433a37bd9b --- /dev/null +++ b/Assistant/Types/RemoteControl.hs @@ -0,0 +1,16 @@ +{- git-annex assistant RemoteDaemon control + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.RemoteControl where + +import qualified RemoteDaemon.Types as RemoteDaemon +import Control.Concurrent + +type RemoteControl = Chan RemoteDaemon.Consumed + +newRemoteControl :: IO RemoteControl +newRemoteControl = newChan diff --git a/Assistant/Types/RepoProblem.hs b/Assistant/Types/RepoProblem.hs new file mode 100644 index 0000000000..afc7e4b2f3 --- /dev/null +++ b/Assistant/Types/RepoProblem.hs @@ -0,0 +1,28 @@ +{- git-annex assistant repository problem tracking + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.RepoProblem where + +import Types +import Utility.TList + +import Control.Concurrent.STM +import Data.Function + +data RepoProblem = RepoProblem + { problemUUID :: UUID + , afterFix :: IO () + } + +{- The afterFix actions are assumed to all be equivalent. -} +sameRepoProblem :: RepoProblem -> RepoProblem -> Bool +sameRepoProblem = (==) `on` problemUUID + +type RepoProblemChan = TList RepoProblem + +newRepoProblemChan :: IO RepoProblemChan +newRepoProblemChan = atomically newTList diff --git a/Assistant/Types/ScanRemotes.hs b/Assistant/Types/ScanRemotes.hs new file mode 100644 index 0000000000..09fa75a877 --- /dev/null +++ b/Assistant/Types/ScanRemotes.hs @@ -0,0 +1,25 @@ +{- git-annex assistant remotes needing scanning + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.ScanRemotes where + +import Annex.Common + +import Control.Concurrent.STM +import qualified Data.Map as M + +data ScanInfo = ScanInfo + { scanPriority :: Float + , fullScan :: Bool + } + +type ScanRemoteMap = TMVar (M.Map Remote ScanInfo) + +{- The TMVar starts empty, and is left empty when there are no remotes + - to scan. -} +newScanRemoteMap :: IO ScanRemoteMap +newScanRemoteMap = atomically newEmptyTMVar diff --git a/Assistant/Types/ThreadName.hs b/Assistant/Types/ThreadName.hs new file mode 100644 index 0000000000..06435f474f --- /dev/null +++ b/Assistant/Types/ThreadName.hs @@ -0,0 +1,14 @@ +{- name of a thread + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.ThreadName where + +newtype ThreadName = ThreadName String + deriving (Eq, Read, Show, Ord) + +fromThreadName :: ThreadName -> String +fromThreadName (ThreadName n) = n diff --git a/Assistant/Types/ThreadedMonad.hs b/Assistant/Types/ThreadedMonad.hs new file mode 100644 index 0000000000..4c07317807 --- /dev/null +++ b/Assistant/Types/ThreadedMonad.hs @@ -0,0 +1,40 @@ +{- making the Annex monad available across threads + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.ThreadedMonad where + +import Annex.Common +import qualified Annex + +import Control.Concurrent +import Data.Tuple + +{- The Annex state is stored in a MVar, so that threaded actions can access + - it. -} +type ThreadState = MVar (Annex.AnnexState, Annex.AnnexRead) + +{- Stores the Annex state in a MVar. + - + - Once the action is finished, retrieves the state from the MVar. + -} +withThreadState :: (ThreadState -> Annex a) -> Annex a +withThreadState a = do + state <- Annex.getState id + rd <- Annex.getRead id + mvar <- liftIO $ newMVar (state, rd) + r <- a mvar + newstate <- liftIO $ fst <$> takeMVar mvar + Annex.changeState (const newstate) + return r + +{- Runs an Annex action, using the state from the MVar. + - + - This serializes calls by threads; only one thread can run in Annex at a + - time. -} +runThreadState :: ThreadState -> Annex a -> IO a +runThreadState mvar a = modifyMVar mvar $ \v -> swap <$> Annex.run v a + diff --git a/Assistant/Types/TransferQueue.hs b/Assistant/Types/TransferQueue.hs new file mode 100644 index 0000000000..f15a43200c --- /dev/null +++ b/Assistant/Types/TransferQueue.hs @@ -0,0 +1,29 @@ +{- git-annex assistant pending transfer queue + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.Types.TransferQueue where + +import Annex.Common +import Types.Transfer + +import Control.Concurrent.STM +import Utility.TList + +data TransferQueue = TransferQueue + { queuesize :: TVar Int + , queuelist :: TList (Transfer, TransferInfo) + , deferreddownloads :: TList (Key, AssociatedFile) + } + +data Schedule = Next | Later + deriving (Eq) + +newTransferQueue :: IO TransferQueue +newTransferQueue = atomically $ TransferQueue + <$> newTVar 0 + <*> newTList + <*> newTList diff --git a/Assistant/Types/TransferSlots.hs b/Assistant/Types/TransferSlots.hs new file mode 100644 index 0000000000..bbcc84fb4b --- /dev/null +++ b/Assistant/Types/TransferSlots.hs @@ -0,0 +1,34 @@ +{- git-annex assistant transfer slots + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE DeriveDataTypeable #-} + +module Assistant.Types.TransferSlots where + +import qualified Control.Exception as E +import qualified Control.Concurrent.MSemN as MSemN +import Data.Typeable + +type TransferSlots = MSemN.MSemN Int + +{- A special exception that can be thrown to pause or resume a transfer, while + - keeping its slot in use. -} +data TransferException = PauseTransfer | ResumeTransfer + deriving (Show, Eq, Typeable) + +instance E.Exception TransferException + +{- Number of concurrent transfers allowed to be run from the assistant. + - + - Transfers launched by other means, including by remote assistants, + - do not currently take up slots. + -} +numSlots :: Int +numSlots = 1 + +newTransferSlots :: IO TransferSlots +newTransferSlots = MSemN.new numSlots diff --git a/Assistant/Types/UrlRenderer.hs b/Assistant/Types/UrlRenderer.hs new file mode 100644 index 0000000000..f0ffb10acd --- /dev/null +++ b/Assistant/Types/UrlRenderer.hs @@ -0,0 +1,26 @@ +{- webapp url renderer access from the assistant + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Assistant.Types.UrlRenderer ( + UrlRenderer, + newUrlRenderer +) where + +#ifdef WITH_WEBAPP + +import Assistant.WebApp (UrlRenderer, newUrlRenderer) + +#else + +data UrlRenderer = UrlRenderer -- dummy type + +newUrlRenderer :: IO UrlRenderer +newUrlRenderer = return UrlRenderer + +#endif diff --git a/Assistant/Unused.hs b/Assistant/Unused.hs new file mode 100644 index 0000000000..8af61527d2 --- /dev/null +++ b/Assistant/Unused.hs @@ -0,0 +1,85 @@ +{- git-annex assistant unused files + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Assistant.Unused where + +import qualified Data.Map as M + +import Assistant.Common +import qualified Git +import Logs.Unused +import Logs.Location +import Annex.Content +import Utility.DataUnits +import Utility.DiskFree +import Utility.HumanTime +import Utility.Tense + +import Data.Time.Clock.POSIX +import qualified Data.Text as T + +describeUnused :: Assistant (Maybe TenseText) +describeUnused = describeUnused' False + +describeUnusedWhenBig :: Assistant (Maybe TenseText) +describeUnusedWhenBig = describeUnused' True + +{- This uses heuristics: 1000 unused keys, or more unused keys + - than the remaining free disk space, or more than 1/10th the total + - disk space being unused keys all suggest a problem. -} +describeUnused' :: Bool -> Assistant (Maybe TenseText) +describeUnused' whenbig = liftAnnex $ go =<< readUnusedLog "" + where + go m = do + let num = M.size m + let diskused = foldl' sumkeysize 0 (M.keys m) + df <- forpath getDiskFree + disksize <- forpath getDiskSize + return $ if num == 0 + then Nothing + else if not whenbig || moreused df diskused || tenthused disksize diskused + then Just $ tenseWords + [ UnTensed $ T.pack $ roughSize storageUnits False diskused + , Tensed "are" "were" + , "taken up by unused files" + ] + else if num > 1000 + then Just $ tenseWords + [ UnTensed $ T.pack $ show num ++ " unused files" + , Tensed "exist" "existed" + ] + else Nothing + + moreused Nothing _ = False + moreused (Just df) used = df <= used + + tenthused Nothing _ = False + tenthused (Just disksize) used = used >= disksize `div` 10 + + sumkeysize s k = s + fromMaybe 0 (fromKey keySize k) + + forpath a = inRepo $ liftIO . a . fromRawFilePath . Git.repoPath + +{- With a duration, expires all unused files that are older. + - With Nothing, expires *all* unused files. -} +expireUnused :: Maybe Duration -> Assistant () +expireUnused duration = do + m <- liftAnnex $ readUnusedLog "" + now <- liftIO getPOSIXTime + let oldkeys = M.keys $ M.filter (tooold now) m + forM_ oldkeys $ \k -> do + debug ["removing old unused key", serializeKey k] + liftAnnex $ tryNonAsync $ do + lockContentForRemoval k noop removeAnnex + logStatus k InfoMissing + where + boundary = durationToPOSIXTime <$> duration + tooold now (_, mt) = case boundary of + Nothing -> True + Just b -> maybe False (\t -> now - t >= b) mt diff --git a/Assistant/Upgrade.hs b/Assistant/Upgrade.hs new file mode 100644 index 0000000000..fde0b4c8b9 --- /dev/null +++ b/Assistant/Upgrade.hs @@ -0,0 +1,376 @@ +{- git-annex assistant upgrading + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} + +module Assistant.Upgrade where + +import Assistant.Common +import Assistant.Restart +import qualified Annex +import Assistant.Alert +import Assistant.DaemonStatus +import Utility.Env +import Utility.Env.Set +import Types.Distribution +import Types.Transfer +import Logs.Web +import Logs.Location +import Annex.Content +import Annex.UUID +import qualified Backend +import qualified Types.Backend +import Assistant.TransferQueue +import Assistant.TransferSlots +import Remote (remoteFromUUID) +import Annex.Path +import Config.Files +import Utility.ThreadScheduler +import Utility.Tmp.Dir +import Utility.UserInfo +import Utility.Gpg +import Utility.FileMode +import Utility.Metered +import qualified Utility.Lsof as Lsof +import qualified BuildInfo +import qualified Utility.Url as Url +import qualified Annex.Url as Url hiding (download) +import Utility.Tuple +import qualified Utility.RawFilePath as R + +import Data.Either +import qualified Data.Map as M + +{- Upgrade without interaction in the webapp. -} +unattendedUpgrade :: Assistant () +unattendedUpgrade = do + prepUpgrade + url <- runRestart + postUpgrade url + +prepUpgrade :: Assistant () +prepUpgrade = do + void $ addAlert upgradingAlert + liftIO $ setEnv upgradedEnv "1" True + prepRestart + +postUpgrade :: URLString -> Assistant () +postUpgrade = postRestart + +autoUpgradeEnabled :: Assistant Bool +autoUpgradeEnabled = liftAnnex $ (==) AutoUpgrade . annexAutoUpgrade <$> Annex.getGitConfig + +checkSuccessfulUpgrade :: IO Bool +checkSuccessfulUpgrade = isJust <$> getEnv upgradedEnv + +upgradedEnv :: String +upgradedEnv = "GIT_ANNEX_UPGRADED" + +{- Start downloading the distribution key from the web. + - Install a hook that will be run once the download is complete, + - and finishes the upgrade. + - + - Creates the destination directory where the upgrade will be installed + - early, in order to check if another upgrade has happened (or is + - happening). On failure, the directory is removed. + -} +startDistributionDownload :: GitAnnexDistribution -> Assistant () +startDistributionDownload d = go =<< liftIO . newVersionLocation d =<< liftIO oldVersionLocation + where + go Nothing = debug ["Skipping redundant upgrade"] + go (Just dest) = do + liftAnnex $ setUrlPresent k u + hook <- asIO1 $ distributionDownloadComplete d dest cleanup + modifyDaemonStatus_ $ \s -> s + { transferHook = M.insert k hook (transferHook s) } + maybe noop (queueTransfer "upgrade" Next (AssociatedFile (Just (toRawFilePath f))) t) + =<< liftAnnex (remoteFromUUID webUUID) + startTransfer t + k = mkKey $ const $ distributionKey d + u = distributionUrl d + f = takeFileName u ++ " (for upgrade)" + t = Transfer + { transferDirection = Download + , transferUUID = webUUID + , transferKeyData = fromKey id k + } + cleanup = liftAnnex $ do + lockContentForRemoval k noop removeAnnex + setUrlMissing k u + logStatus k InfoMissing + +{- Called once the download is done. + - Passed an action that can be used to clean up the downloaded file. + - + - Verifies the content of the downloaded key. + -} +distributionDownloadComplete :: GitAnnexDistribution -> FilePath -> Assistant () -> Transfer -> Assistant () +distributionDownloadComplete d dest cleanup t + | transferDirection t == Download = do + debug ["finished downloading git-annex distribution"] + maybe (failedupgrade "bad download") go + =<< liftAnnex (withObjectLoc k fsckit) + | otherwise = cleanup + where + k = mkKey $ const $ distributionKey d + fsckit f = Backend.maybeLookupBackendVariety (fromKey keyVariety k) >>= \case + Nothing -> return $ Just (fromRawFilePath f) + Just b -> case Types.Backend.verifyKeyContent b of + Nothing -> return $ Just (fromRawFilePath f) + Just verifier -> ifM (verifier k f) + ( return $ Just (fromRawFilePath f) + , return Nothing + ) + go f = do + ua <- asIO $ upgradeToDistribution dest cleanup f + fa <- asIO1 failedupgrade + liftIO $ ua `catchNonAsync` (fa . show) + failedupgrade msg = do + void $ addAlert $ upgradeFailedAlert msg + cleanup + liftIO $ void $ tryIO $ removeDirectoryRecursive dest + +{- The upgrade method varies by OS. + - + - In general, find where the distribution was installed before, + - and unpack the new distribution next to it (in a versioned directory). + - Then update the programFile to point to the new version. + -} +upgradeToDistribution :: FilePath -> Assistant () -> FilePath -> Assistant () +upgradeToDistribution newdir cleanup distributionfile = do + liftIO $ createDirectoryIfMissing True newdir + (program, deleteold) <- unpack + changeprogram program + cleanup + prepUpgrade + url <- runRestart + {- At this point, the new assistant is fully running, so + - it's safe to delete the old version. -} + liftIO $ void $ tryIO deleteold + postUpgrade url + where + changeprogram program = liftIO $ do + unlessM (boolSystem program [Param "version"]) $ + giveup "New git-annex program failed to run! Not using." + pf <- programFile + liftIO $ writeFile pf program + +#ifdef darwin_HOST_OS + {- OS X uses a dmg, so mount it, and copy the contents into place. -} + unpack = liftIO $ do + olddir <- oldVersionLocation + withTmpDirIn (fromRawFilePath (parentDir (toRawFilePath newdir))) "git-annex.upgrade" $ \tmpdir -> do + void $ boolSystem "hdiutil" + [ Param "attach", File distributionfile + , Param "-mountpoint", File tmpdir + ] + void $ boolSystem "cp" + [ Param "-R" + , File $ tmpdir installBase "Contents" + , File $ newdir + ] + void $ boolSystem "hdiutil" + [ Param "eject" + , File tmpdir + ] + sanitycheck newdir + let deleteold = do + deleteFromManifest $ olddir "Contents" "MacOS" + makeorigsymlink olddir + return (newdir "Contents" "MacOS" "git-annex", deleteold) +#else + {- Linux uses a tarball (so could other POSIX systems), so + - untar it (into a temp directory) and move the directory + - into place. -} + unpack = liftIO $ do + olddir <- oldVersionLocation + withTmpDirIn (fromRawFilePath $ parentDir $ toRawFilePath newdir) "git-annex.upgrade" $ \tmpdir -> do + let tarball = tmpdir "tar" + -- Cannot rely on filename extension, and this also + -- avoids problems if tar doesn't support transparent + -- decompression. + void $ boolSystem "sh" + [ Param "-c" + , Param $ "zcat < " ++ shellEscape distributionfile ++ + " > " ++ shellEscape tarball + ] + tarok <- boolSystem "tar" + [ Param "xf" + , Param tarball + , Param "--directory", File tmpdir + ] + unless tarok $ + giveup $ "failed to untar " ++ distributionfile + sanitycheck $ tmpdir installBase + installby R.rename newdir (tmpdir installBase) + let deleteold = do + deleteFromManifest olddir + makeorigsymlink olddir + return (newdir "git-annex", deleteold) + installby a dstdir srcdir = + mapM_ (\x -> a (toRawFilePath x) (toRawFilePath (dstdir takeFileName x))) + =<< dirContents srcdir +#endif + sanitycheck dir = + unlessM (doesDirectoryExist dir) $ + giveup $ "did not find " ++ dir ++ " in " ++ distributionfile + makeorigsymlink olddir = do + let origdir = fromRawFilePath (parentDir (toRawFilePath olddir)) installBase + removeWhenExistsWith R.removeLink (toRawFilePath origdir) + R.createSymbolicLink (toRawFilePath newdir) (toRawFilePath origdir) + +{- Finds where the old version was installed. -} +oldVersionLocation :: IO FilePath +oldVersionLocation = readProgramFile >>= \case + Nothing -> giveup "Cannot find old distribution bundle; not upgrading." + Just pf -> do + let pdir = fromRawFilePath $ parentDir $ toRawFilePath pf +#ifdef darwin_HOST_OS + let dirs = splitDirectories pdir + {- It will probably be deep inside a git-annex.app directory. -} + let olddir = case findIndex ("git-annex.app" `isPrefixOf`) dirs of + Nothing -> pdir + Just i -> joinPath (take (i + 1) dirs) +#else + let olddir = pdir +#endif + when (null olddir) $ + giveup $ "Cannot find old distribution bundle; not upgrading. (Looked in " ++ pdir ++ ")" + return olddir + +{- Finds a place to install the new version. + - Generally, put it in the parent directory of where the old version was + - installed, and use a version number in the directory name. + - If unable to write to there, instead put it in the home directory. + - + - The directory is created. If it already exists, returns Nothing. + -} +newVersionLocation :: GitAnnexDistribution -> FilePath -> IO (Maybe FilePath) +newVersionLocation d olddir = + trymkdir newloc $ do + home <- myHomeDir + trymkdir (home s) $ + return Nothing + where + s = installBase ++ "." ++ distributionVersion d + topdir = fromRawFilePath $ parentDir $ toRawFilePath olddir + newloc = topdir s + trymkdir dir fallback = + (createDirectory dir >> return (Just dir)) + `catchIO` const fallback + +installBase :: String +installBase = "git-annex." ++ +#ifdef linux_HOST_OS + "linux" +#else +#ifdef darwin_HOST_OS + "app" +#else + "dir" +#endif +#endif + +deleteFromManifest :: FilePath -> IO () +deleteFromManifest dir = do + fs <- map (dir ) . lines <$> catchDefaultIO "" (readFile manifest) + mapM_ (removeWhenExistsWith R.removeLink . toRawFilePath) fs + removeWhenExistsWith R.removeLink (toRawFilePath manifest) + removeEmptyRecursive dir + where + manifest = dir "git-annex.MANIFEST" + +removeEmptyRecursive :: FilePath -> IO () +removeEmptyRecursive dir = do + mapM_ removeEmptyRecursive =<< dirContents dir + void $ tryIO $ removeDirectory dir + +{- This is a file that the UpgradeWatcher can watch for modifications to + - detect when git-annex has been upgraded. + -} +upgradeFlagFile :: IO FilePath +upgradeFlagFile = programPath + +{- Sanity check to see if an upgrade is complete and the program is ready + - to be run. -} +upgradeSanityCheck :: IO Bool +upgradeSanityCheck = ifM usingDistribution + ( doesFileExist =<< programFile + , do + -- Ensure that the program is present, and has no writers, + -- and can be run. This should handle distribution + -- upgrades, manual upgrades, etc. + program <- programPath + untilM (doesFileExist program <&&> nowriter program) $ + threadDelaySeconds (Seconds 60) + boolSystem program [Param "version"] + ) + where + nowriter f = null + . filter (`elem` [Lsof.OpenReadWrite, Lsof.OpenWriteOnly]) + . map snd3 + <$> Lsof.query [f] + +usingDistribution :: IO Bool +usingDistribution = isJust <$> getEnv "GIT_ANNEX_STANDLONE_ENV" + +downloadDistributionInfo :: Assistant (Maybe GitAnnexDistribution) +downloadDistributionInfo = do + uo <- liftAnnex Url.getUrlOptions + gpgcmd <- liftAnnex $ gpgCmd <$> Annex.getGitConfig + liftIO $ withTmpDir "git-annex.tmp" $ \tmpdir -> do + let infof = tmpdir "info" + let sigf = infof ++ ".sig" + ifM (isRight <$> Url.download nullMeterUpdate Nothing distributionInfoUrl infof uo + <&&> (isRight <$> Url.download nullMeterUpdate Nothing distributionInfoSigUrl sigf uo) + <&&> verifyDistributionSig gpgcmd sigf) + ( parseInfoFile <$> readFileStrict infof + , return Nothing + ) + +distributionInfoUrl :: String +distributionInfoUrl = fromJust BuildInfo.upgradelocation ++ ".info" + +distributionInfoSigUrl :: String +distributionInfoSigUrl = distributionInfoUrl ++ ".sig" + +{- Upgrade only supported on linux and OSX. -} +upgradeSupported :: Bool +#ifdef linux_HOST_OS +upgradeSupported = isJust BuildInfo.upgradelocation +#else +#ifdef darwin_HOST_OS +upgradeSupported = isJust BuildInfo.upgradelocation +#else +upgradeSupported = False +#endif +#endif + +{- Verifies that a file from the git-annex distribution has a valid + - signature. Pass the detached .sig file; the file to be verified should + - be located next to it. + - + - The gpg keyring used to verify the signature is located in + - trustedkeys.gpg, next to the git-annex program. + -} +verifyDistributionSig :: GpgCmd -> FilePath -> IO Bool +verifyDistributionSig gpgcmd sig = readProgramFile >>= \case + Just p | isAbsolute p -> + withUmask 0o0077 $ withTmpDir "git-annex-gpg.tmp" $ \gpgtmp -> do + let trustedkeys = takeDirectory p "trustedkeys.gpg" + boolGpgCmd gpgcmd + [ Param "--no-default-keyring" + , Param "--no-auto-check-trustdb" + , Param "--no-options" + , Param "--homedir" + , File gpgtmp + , Param "--keyring" + , File trustedkeys + , Param "--verify" + , File sig + ] + _ -> return False diff --git a/Assistant/WebApp.hs b/Assistant/WebApp.hs new file mode 100644 index 0000000000..3d0b632717 --- /dev/null +++ b/Assistant/WebApp.hs @@ -0,0 +1,74 @@ +{- git-annex assistant webapp core + - + - Copyright 2012, 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE TypeFamilies, QuasiQuotes, MultiParamTypeClasses #-} +{-# LANGUAGE TemplateHaskell, OverloadedStrings, RankNTypes #-} + +module Assistant.WebApp where + +import Assistant.WebApp.Types +import Assistant.Common +import Utility.NotificationBroadcaster +import Utility.Yesod +import Utility.AuthToken + +import Data.Text (Text) +import Control.Concurrent +import qualified Network.Wai as W +import qualified Data.ByteString.Char8 as S8 +import qualified Data.Text as T + +waitNotifier :: Assistant NotificationBroadcaster -> NotificationId -> Handler () +waitNotifier getbroadcaster nid = liftAssistant $ do + b <- getbroadcaster + liftIO $ waitNotification $ notificationHandleFromId b nid + +newNotifier :: Assistant NotificationBroadcaster -> Handler NotificationId +newNotifier getbroadcaster = liftAssistant $ do + b <- getbroadcaster + liftIO $ notificationHandleToId <$> newNotificationHandle True b + +{- Adds the auth parameter as a hidden field on a form. Must be put into + - every form. -} +webAppFormAuthToken :: Widget +webAppFormAuthToken = do + webapp <- liftH getYesod + [whamlet||] + +{- A button with an icon, and maybe label or tooltip, that can be + - clicked to perform some action. + - With javascript, clicking it POSTs the Route, and remains on the same + - page. + - With noscript, clicking it GETs the Route. -} +actionButton :: Route WebApp -> (Maybe String) -> (Maybe String) -> String -> String -> Widget +actionButton route label tooltip buttonclass iconclass = $(widgetFile "actionbutton") + +type UrlRenderFunc = Route WebApp -> [(Text, Text)] -> Text +type UrlRenderer = MVar (UrlRenderFunc) + +newUrlRenderer :: IO UrlRenderer +newUrlRenderer = newEmptyMVar + +setUrlRenderer :: UrlRenderer -> (UrlRenderFunc) -> IO () +setUrlRenderer = putMVar + +inFirstRun :: Handler Bool +inFirstRun = isNothing . relDir <$> getYesod + +{- Blocks until the webapp is running and has called setUrlRenderer. -} +renderUrl :: UrlRenderer -> Route WebApp -> [(Text, Text)] -> IO Text +renderUrl urlrenderer route params = do + r <- readMVar urlrenderer + return $ r route params + +{- Redirects back to the referring page, or if there's none, DashboardR -} +redirectBack :: Handler () +redirectBack = do + mr <- lookup "referer" . W.requestHeaders <$> waiRequest + case mr of + Nothing -> redirect DashboardR + Just r -> redirect $ T.pack $ S8.unpack r diff --git a/Assistant/WebApp/Common.hs b/Assistant/WebApp/Common.hs new file mode 100644 index 0000000000..a148dcabdb --- /dev/null +++ b/Assistant/WebApp/Common.hs @@ -0,0 +1,17 @@ +{- git-annex assistant webapp, common imports + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Assistant.WebApp.Common (module X) where + +import Assistant.Common as X +import Assistant.WebApp as X +import Assistant.WebApp.Page as X +import Assistant.WebApp.Form as X +import Assistant.WebApp.Types as X +import Assistant.WebApp.RepoId as X +import Utility.Yesod as X hiding (textField, passwordField, insertBy, replace, joinPath, deleteBy, delete, insert, Key, Option, PermissionDenied) +import Data.Text as X (Text) diff --git a/Assistant/WebApp/Configurators.hs b/Assistant/WebApp/Configurators.hs new file mode 100644 index 0000000000..0042638b15 --- /dev/null +++ b/Assistant/WebApp/Configurators.hs @@ -0,0 +1,44 @@ +{- git-annex assistant webapp configurators + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE QuasiQuotes, TemplateHaskell, OverloadedStrings #-} + +module Assistant.WebApp.Configurators where + +import Assistant.WebApp.Common +import Assistant.WebApp.RepoList + +{- The main configuration screen. -} +getConfigurationR :: Handler Html +getConfigurationR = ifM inFirstRun + ( redirect FirstRepositoryR + , page "Configuration" (Just Configuration) $ do + $(widgetFile "configurators/main") + ) + +getAddRepositoryR :: Handler Html +getAddRepositoryR = page "Add Repository" (Just Configuration) $ do + let repolist = repoListDisplay mainRepoSelector + $(widgetFile "configurators/addrepository") + +makeMiscRepositories :: Widget +makeMiscRepositories = $(widgetFile "configurators/addrepository/misc") + +makeCloudRepositories :: Widget +makeCloudRepositories = $(widgetFile "configurators/addrepository/cloud") + +makeWormholePairing :: Widget +makeWormholePairing = $(widgetFile "configurators/addrepository/wormholepairing") + +makeSshRepository :: Widget +makeSshRepository = $(widgetFile "configurators/addrepository/ssh") + +makeConnectionRepositories :: Widget +makeConnectionRepositories = $(widgetFile "configurators/addrepository/connection") + +makeArchiveRepositories :: Widget +makeArchiveRepositories = $(widgetFile "configurators/addrepository/archive") diff --git a/Assistant/WebApp/Configurators/AWS.hs b/Assistant/WebApp/Configurators/AWS.hs new file mode 100644 index 0000000000..fb9f92b0dd --- /dev/null +++ b/Assistant/WebApp/Configurators/AWS.hs @@ -0,0 +1,213 @@ +{- git-annex assistant webapp configurators for Amazon AWS services + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE QuasiQuotes, TemplateHaskell, OverloadedStrings #-} + +module Assistant.WebApp.Configurators.AWS where + +import Assistant.WebApp.Common +import Assistant.WebApp.MakeRemote +import qualified Remote.S3 as S3 +import Logs.Remote +import qualified Remote +import qualified Types.Remote as Remote +import qualified Remote.Glacier as Glacier +import qualified Remote.Helper.AWS as AWS +import Types.Remote (RemoteConfig) +import Types.StandardGroups +import Creds +import Assistant.Gpg +import Git.Types (RemoteName) +import Annex.SpecialRemote.Config +import Types.ProposedAccepted + +import qualified Data.Text as T +import qualified Data.Map as M +import Data.Char + +awsConfigurator :: Widget -> Handler Html +awsConfigurator = page "Add an Amazon repository" (Just Configuration) + +glacierConfigurator :: Widget -> Handler Html +glacierConfigurator a = do + ifM (liftIO $ inSearchPath "glacier") + ( awsConfigurator a + , awsConfigurator needglaciercli + ) + where + needglaciercli = $(widgetFile "configurators/needglaciercli") + +data StorageClass + = StandardRedundancy + | StandardInfrequentAccess + deriving (Eq, Enum, Bounded) + +instance Show StorageClass where + show StandardRedundancy = "STANDARD" + show StandardInfrequentAccess = "STANDARD_IA" + +data AWSInput = AWSInput + { accessKeyID :: Text + , secretAccessKey :: Text + , datacenter :: Text + -- Only used for S3, not Glacier. + , storageClass :: StorageClass + , repoName :: Text + , enableEncryption :: EnableEncryption + } + +data AWSCreds = AWSCreds Text Text + +extractCreds :: AWSInput -> AWSCreds +extractCreds i = AWSCreds (accessKeyID i) (secretAccessKey i) + +s3InputAForm :: Maybe CredPair -> MkAForm AWSInput +s3InputAForm defcreds = AWSInput + <$> accessKeyIDFieldWithHelp (T.pack . fst <$> defcreds) + <*> secretAccessKeyField (T.pack . snd <$> defcreds) + <*> datacenterField AWS.S3 + <*> areq (selectFieldList storageclasses) (bfs "Storage class") (Just StandardRedundancy) + <*> areq textField (bfs "Repository name") (Just "S3") + <*> enableEncryptionField + where + storageclasses :: [(Text, StorageClass)] + storageclasses = + [ ("Standard redundancy", StandardRedundancy) + , ("Infrequent access (cheaper for backups and archives)", StandardInfrequentAccess) + ] + +glacierInputAForm :: Maybe CredPair -> MkAForm AWSInput +glacierInputAForm defcreds = AWSInput + <$> accessKeyIDFieldWithHelp (T.pack . fst <$> defcreds) + <*> secretAccessKeyField (T.pack . snd <$> defcreds) + <*> datacenterField AWS.Glacier + <*> pure StandardRedundancy + <*> areq textField (bfs "Repository name") (Just "glacier") + <*> enableEncryptionField + +awsCredsAForm :: Maybe CredPair -> MkAForm AWSCreds +awsCredsAForm defcreds = AWSCreds + <$> accessKeyIDFieldWithHelp (T.pack . fst <$> defcreds) + <*> secretAccessKeyField (T.pack . snd <$> defcreds) + +accessKeyIDField :: Widget -> Maybe Text -> MkAForm Text +accessKeyIDField help = areq (textField `withNote` help) (bfs "Access Key ID") + +accessKeyIDFieldWithHelp :: Maybe Text -> MkAForm Text +accessKeyIDFieldWithHelp = accessKeyIDField help + where + help = [whamlet| + + Get Amazon access keys +|] + +secretAccessKeyField :: Maybe Text -> MkAForm Text +secretAccessKeyField = areq passwordField (bfs "Secret Access Key") + +datacenterField :: AWS.Service -> MkAForm Text +datacenterField service = areq (selectFieldList list) (bfs "Datacenter") defregion + where + list = M.toList $ AWS.regionMap service + defregion = Just $ AWS.defaultRegion service + +getAddS3R :: Handler Html +getAddS3R = postAddS3R + +postAddS3R :: Handler Html +postAddS3R = awsConfigurator $ do + defcreds <- liftAnnex previouslyUsedAWSCreds + ((result, form), enctype) <- liftH $ + runFormPostNoToken $ renderBootstrap3 bootstrapFormLayout $ s3InputAForm defcreds + case result of + FormSuccess input -> liftH $ do + let name = T.unpack $ repoName input + makeAWSRemote initSpecialRemote S3.remote TransferGroup (extractCreds input) name $ M.fromList + [ configureEncryption $ enableEncryption input + , (typeField, Proposed "S3") + , (Proposed "datacenter", Proposed $ T.unpack $ datacenter input) + , (Proposed "storageclass", Proposed $ show $ storageClass input) + , (Proposed "chunk", Proposed "1MiB") + ] + _ -> $(widgetFile "configurators/adds3") + +getAddGlacierR :: Handler Html +getAddGlacierR = postAddGlacierR + +postAddGlacierR :: Handler Html +postAddGlacierR = glacierConfigurator $ do + defcreds <- liftAnnex previouslyUsedAWSCreds + ((result, form), enctype) <- liftH $ + runFormPostNoToken $ renderBootstrap3 bootstrapFormLayout $ glacierInputAForm defcreds + case result of + FormSuccess input -> liftH $ do + let name = T.unpack $ repoName input + makeAWSRemote initSpecialRemote Glacier.remote SmallArchiveGroup (extractCreds input) name $ M.fromList + [ configureEncryption $ enableEncryption input + , (typeField, Proposed "glacier") + , (Proposed "datacenter", Proposed $ T.unpack $ datacenter input) + ] + _ -> $(widgetFile "configurators/addglacier") + +getEnableS3R :: UUID -> Handler Html +getEnableS3R uuid = do + m <- liftAnnex remoteConfigMap + isia <- case M.lookup uuid m of + Just c -> liftAnnex $ do + pc <- parsedRemoteConfig S3.remote c + return $ S3.configIA pc + Nothing -> return False + if isia + then redirect $ EnableIAR uuid + else postEnableS3R uuid + +postEnableS3R :: UUID -> Handler Html +postEnableS3R uuid = awsConfigurator $ enableAWSRemote S3.remote uuid + +getEnableGlacierR :: UUID -> Handler Html +getEnableGlacierR = postEnableGlacierR + +postEnableGlacierR :: UUID -> Handler Html +postEnableGlacierR = glacierConfigurator . enableAWSRemote Glacier.remote + +enableAWSRemote :: RemoteType -> UUID -> Widget +enableAWSRemote remotetype uuid = do + defcreds <- liftAnnex previouslyUsedAWSCreds + ((result, form), enctype) <- liftH $ + runFormPostNoToken $ renderBootstrap3 bootstrapFormLayout $ awsCredsAForm defcreds + case result of + FormSuccess creds -> liftH $ do + m <- liftAnnex remoteConfigMap + let name = fromJust $ lookupName $ + fromJust $ M.lookup uuid m + makeAWSRemote enableSpecialRemote remotetype SmallArchiveGroup creds name M.empty + _ -> do + description <- liftAnnex $ + T.pack <$> Remote.prettyUUID uuid + $(widgetFile "configurators/enableaws") + +makeAWSRemote :: SpecialRemoteMaker -> RemoteType -> StandardGroup -> AWSCreds -> RemoteName -> RemoteConfig -> Handler () +makeAWSRemote maker remotetype defaultgroup (AWSCreds ak sk) name config = + setupCloudRemote defaultgroup Nothing $ + maker hostname remotetype (Just creds) config + where + creds = (T.unpack ak, T.unpack sk) + {- AWS services use the remote name as the basis for a host + - name, so filter it to contain valid characters. -} + hostname = case filter isAlphaNum name of + [] -> "aws" + n -> n + +getRepoInfo :: RemoteConfig -> Widget +getRepoInfo c = [whamlet|S3 remote using bucket: #{bucket}|] + where + bucket = maybe "" fromProposedAccepted $ M.lookup (Accepted "bucket") c + +previouslyUsedAWSCreds :: Annex (Maybe CredPair) +previouslyUsedAWSCreds = getM gettype [S3.remote, Glacier.remote] + where + gettype t = previouslyUsedCredPair AWS.creds t $ + not . S3.configIA . Remote.config diff --git a/Assistant/WebApp/Configurators/Delete.hs b/Assistant/WebApp/Configurators/Delete.hs new file mode 100644 index 0000000000..333e13656a --- /dev/null +++ b/Assistant/WebApp/Configurators/Delete.hs @@ -0,0 +1,121 @@ +{- git-annex assistant webapp repository deletion + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE QuasiQuotes, TemplateHaskell, OverloadedStrings #-} + +module Assistant.WebApp.Configurators.Delete where + +import Assistant.WebApp.Common +import Assistant.DeleteRemote +import Assistant.DaemonStatus +import Assistant.ScanRemotes +import Assistant.Sync +import qualified Remote +import qualified Git +import Config.Files.AutoStart +import Logs.Trust +import Logs.Remote +import Logs.PreferredContent +import Types.StandardGroups +import Annex.UUID +import Command.Uninit (prepareRemoveAnnexDir) + +import qualified Data.Text as T +import qualified Data.Map as M + +notCurrentRepo :: UUID -> Handler Html -> Handler Html +notCurrentRepo uuid a = do + u <- liftAnnex getUUID + if u == uuid + then redirect DeleteCurrentRepositoryR + else go =<< liftAnnex (Remote.remoteFromUUID uuid) + where + go Nothing = giveup "Unknown UUID" + go (Just _) = a + +getDeleteRepositoryR :: UUID -> Handler Html +getDeleteRepositoryR uuid = notCurrentRepo uuid $ do + deletionPage $ do + reponame <- liftAnnex $ Remote.prettyUUID uuid + $(widgetFile "configurators/delete/start") + +getStartDeleteRepositoryR :: UUID -> Handler Html +getStartDeleteRepositoryR uuid = do + remote <- fromMaybe (giveup "unknown remote") + <$> liftAnnex (Remote.remoteFromUUID uuid) + liftAnnex $ do + trustSet uuid UnTrusted + setStandardGroup uuid UnwantedGroup + liftAssistant $ addScanRemotes True [remote] + redirect DashboardR + +getFinishDeleteRepositoryR :: UUID -> Handler Html +getFinishDeleteRepositoryR uuid = deletionPage $ do + void $ liftAssistant $ removeRemote uuid + + reponame <- liftAnnex $ Remote.prettyUUID uuid + {- If it's not listed in the remote log, it must be a git repo. -} + gitrepo <- liftAnnex $ M.notMember uuid <$> remoteConfigMap + $(widgetFile "configurators/delete/finished") + +getDeleteCurrentRepositoryR :: Handler Html +getDeleteCurrentRepositoryR = deleteCurrentRepository + +postDeleteCurrentRepositoryR :: Handler Html +postDeleteCurrentRepositoryR = deleteCurrentRepository + +deleteCurrentRepository :: Handler Html +deleteCurrentRepository = dangerPage $ do + reldir <- fromJust . relDir <$> liftH getYesod + havegitremotes <- haveremotes syncGitRemotes + havedataremotes <- haveremotes downloadRemotes + ((result, form), enctype) <- liftH $ + runFormPostNoToken $ renderBootstrap3 bootstrapFormLayout $ + sanityVerifierAForm $ SanityVerifier magicphrase + case result of + FormSuccess _ -> liftH $ do + dir <- liftAnnex $ fromRawFilePath <$> fromRepo Git.repoPath + liftIO $ removeAutoStartFile dir + + {- Disable syncing to this repository, and all + - remotes. This stops all transfers, and all + - file watching. -} + liftAssistant $ do + changeSyncable Nothing False + rs <- syncRemotes <$> getDaemonStatus + mapM_ (\r -> changeSyncable (Just r) False) rs + + liftAnnex $ prepareRemoveAnnexDir dir + liftIO $ removeDirectoryRecursive . fromRawFilePath + =<< absPath (toRawFilePath dir) + + redirect ShutdownConfirmedR + _ -> $(widgetFile "configurators/delete/currentrepository") + where + haveremotes selector = not . null . selector + <$> liftAssistant getDaemonStatus + +data SanityVerifier = SanityVerifier T.Text + deriving (Eq) + +sanityVerifierAForm :: SanityVerifier -> MkAForm SanityVerifier +sanityVerifierAForm template = SanityVerifier + <$> areq checksanity (bfs "Confirm deletion?") Nothing + where + checksanity = checkBool (\input -> SanityVerifier input == template) + insane textField + + insane = "Maybe this is not a good idea..." :: Text + +deletionPage :: Widget -> Handler Html +deletionPage = page "Delete repository" (Just Configuration) + +dangerPage :: Widget -> Handler Html +dangerPage = page "Danger danger danger" (Just Configuration) + +magicphrase :: Text +magicphrase = "Yes, please do as I say!" diff --git a/Assistant/WebApp/Configurators/Edit.hs b/Assistant/WebApp/Configurators/Edit.hs new file mode 100644 index 0000000000..65da2d588e --- /dev/null +++ b/Assistant/WebApp/Configurators/Edit.hs @@ -0,0 +1,329 @@ +{- git-annex assistant webapp configurator for editing existing repos + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE QuasiQuotes, TemplateHaskell, OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} + +module Assistant.WebApp.Configurators.Edit where + +import Assistant.WebApp.Common +import Assistant.WebApp.Gpg +import Assistant.WebApp.Configurators +import Assistant.DaemonStatus +import Assistant.WebApp.MakeRemote (uniqueRemoteName) +import Assistant.ScanRemotes +import Assistant.Sync +import Assistant.Alert +import qualified Assistant.WebApp.Configurators.AWS as AWS +import qualified Assistant.WebApp.Configurators.IA as IA +import qualified Remote.S3 as S3 +import qualified Remote +import qualified Types.Remote as Remote +import Remote.List.Util +import Logs.UUID +import Logs.Group +import Logs.PreferredContent +import Logs.Remote +import Types.StandardGroups +import qualified Git +import qualified Git.Types as Git +import qualified Git.Command +import qualified Git.Config +import qualified Annex +import Git.Remote +import Remote.Helper.Encryptable (extractCipher, parseEncryptionConfig) +import Types.Crypto +import Utility.Gpg +import Annex.UUID +import Annex.Perms +import Assistant.Ssh +import Config +import Config.GitConfig +import Config.DynamicConfig +import Types.Group +import Types.ProposedAccepted +import Annex.SpecialRemote.Config + +import qualified Data.Text as T +import qualified Data.Map as M +import qualified Data.Set as S + +data RepoGroup = RepoGroupCustom String | RepoGroupStandard StandardGroup + deriving (Show, Eq) + +data RepoConfig = RepoConfig + { repoName :: Text + , repoDescription :: Maybe Text + , repoGroup :: RepoGroup + , repoAssociatedDirectory :: Maybe Text + , repoSyncable :: Bool + } + deriving (Show) + +getRepoConfig :: UUID -> Maybe Remote -> Annex RepoConfig +getRepoConfig uuid mremote = do + -- Ensure we're editing current data by discarding caches. + void groupMapLoad + void uuidDescMapLoad + + groups <- lookupGroups uuid + remoteconfig <- M.lookup uuid <$> remoteConfigMap + let (repogroup, associateddirectory) = case getStandardGroup groups of + Nothing -> (RepoGroupCustom $ unwords $ map fromGroup $ S.toList groups, Nothing) + Just g -> (RepoGroupStandard g, associatedDirectory remoteconfig g) + + description <- fmap (T.pack . fromUUIDDesc) . M.lookup uuid <$> uuidDescMap + + syncable <- case mremote of + Just r -> liftIO $ getDynamicConfig $ remoteAnnexSync $ Remote.gitconfig r + Nothing -> getGitConfigVal annexAutoCommit + + return $ RepoConfig + (T.pack $ maybe "here" Remote.name mremote) + description + repogroup + (T.pack <$> associateddirectory) + syncable + +setRepoConfig :: UUID -> Maybe Remote -> RepoConfig -> RepoConfig -> Handler () +setRepoConfig uuid mremote oldc newc = do + when descriptionChanged $ liftAnnex $ do + maybe noop (describeUUID uuid . toUUIDDesc . T.unpack) (repoDescription newc) + void uuidDescMapLoad + when nameChanged $ do + liftAnnex $ do + name <- uniqueRemoteName (legalName newc) 0 <$> Annex.getGitRemotes + {- git remote rename expects there to be a + - remote..fetch, and exits nonzero if + - there's not. Special remotes don't normally + - have that, and don't use it. Temporarily add + - it if it's missing. -} + let remotefetch = Git.ConfigKey $ encodeBS $ + "remote." ++ T.unpack (repoName oldc) ++ ".fetch" + needfetch <- isNothing <$> fromRepo (Git.Config.getMaybe remotefetch) + when needfetch $ + inRepo $ Git.Command.run + [Param "config", Param (Git.fromConfigKey remotefetch), Param ""] + inRepo $ Git.Command.run + [ Param "remote" + , Param "rename" + , Param $ T.unpack $ repoName oldc + , Param name + ] + remotesChanged + liftAssistant updateSyncRemotes + when associatedDirectoryChanged $ case repoAssociatedDirectory newc of + Nothing -> noop + Just t + | T.null t -> noop + | otherwise -> liftAnnex $ do + let dir = takeBaseName $ T.unpack t + m <- remoteConfigMap + case M.lookup uuid m of + Nothing -> noop + Just remoteconfig -> configSet uuid $ + M.insert (Proposed "preferreddir") (Proposed dir) remoteconfig + when groupChanged $ do + liftAnnex $ case repoGroup newc of + RepoGroupStandard g -> setStandardGroup uuid g + RepoGroupCustom s -> groupSet uuid $ S.fromList $ map toGroup $ words s + {- Enabling syncing will cause a scan, + - so avoid queueing a duplicate scan. -} + when (repoSyncable newc && not syncableChanged) $ liftAssistant $ + case mremote of + Just remote -> addScanRemotes True [remote] + Nothing -> addScanRemotes True + =<< syncDataRemotes <$> getDaemonStatus + when syncableChanged $ + liftAssistant $ changeSyncable mremote (repoSyncable newc) + where + syncableChanged = repoSyncable oldc /= repoSyncable newc + associatedDirectoryChanged = repoAssociatedDirectory oldc /= repoAssociatedDirectory newc + groupChanged = repoGroup oldc /= repoGroup newc + nameChanged = isJust mremote && legalName oldc /= legalName newc + descriptionChanged = repoDescription oldc /= repoDescription newc + + legalName = makeLegalName . T.unpack . repoName + +editRepositoryAForm :: Maybe Git.Repo -> Maybe Remote -> RepoConfig -> MkAForm RepoConfig +editRepositoryAForm mrepo mremote d = RepoConfig + <$> areq (if ishere then readonlyTextField else textField) + (bfs "Name") (Just $ repoName d) + <*> aopt textField (bfs "Description") (Just $ repoDescription d) + <*> areq (selectFieldList groups `withNote` help) (bfs "Repository group") (Just $ repoGroup d) + <*> associateddirectory + <*> areq checkBoxField "Syncing enabled" (Just $ repoSyncable d) + where + ishere = isNothing mremote + isspecial = maybe False ((== Git.Unknown) . Git.location) mrepo + groups = customgroups ++ standardgroups + standardgroups :: [(Text, RepoGroup)] + standardgroups = map (\g -> (T.pack $ descStandardGroup g , RepoGroupStandard g)) $ + filter sanegroup [minBound..maxBound] + sanegroup + | isspecial = const True + | otherwise = not . specialRemoteOnly + customgroups :: [(Text, RepoGroup)] + customgroups = case repoGroup d of + RepoGroupCustom s -> [(T.pack s, RepoGroupCustom s)] + _ -> [] + help = [whamlet|What's this?|] + + associateddirectory = case repoAssociatedDirectory d of + Nothing -> aopt hiddenField "" Nothing + Just dir -> aopt textField (bfs "Associated directory") (Just $ Just dir) + +getEditRepositoryR :: RepoId -> Handler Html +getEditRepositoryR = postEditRepositoryR + +postEditRepositoryR :: RepoId -> Handler Html +postEditRepositoryR = editForm False + +getEditNewRepositoryR :: UUID -> Handler Html +getEditNewRepositoryR = postEditNewRepositoryR + +postEditNewRepositoryR :: UUID -> Handler Html +postEditNewRepositoryR = editForm True . RepoUUID + +getEditNewCloudRepositoryR :: UUID -> Handler Html +getEditNewCloudRepositoryR = postEditNewCloudRepositoryR + +postEditNewCloudRepositoryR :: UUID -> Handler Html +postEditNewCloudRepositoryR uuid = connectionNeeded >> editForm True (RepoUUID uuid) + +editForm :: Bool -> RepoId -> Handler Html +editForm new (RepoUUID uuid) + | uuid == webUUID || uuid == bitTorrentUUID = page "The web" (Just Configuration) $ do + $(widgetFile "configurators/edit/webrepository") + | otherwise = page "Edit repository" (Just Configuration) $ do + mremote <- liftAnnex $ Remote.remoteFromUUID uuid + when (mremote == Nothing) $ + whenM ((/=) uuid <$> liftAnnex getUUID) $ + giveup "unknown remote" + curr <- liftAnnex $ getRepoConfig uuid mremote + liftAnnex $ checkAssociatedDirectory curr mremote + mrepo <- liftAnnex $ + maybe (pure Nothing) (Just <$$> Remote.getRepo) mremote + ((result, form), enctype) <- liftH $ + runFormPostNoToken $ renderBootstrap3 bootstrapFormLayout $ + editRepositoryAForm mrepo mremote curr + case result of + FormSuccess input -> liftH $ do + setRepoConfig uuid mremote curr input + liftAnnex $ checkAssociatedDirectory input mremote + redirect DashboardR + _ -> do + let istransfer = repoGroup curr == RepoGroupStandard TransferGroup + config <- liftAnnex $ fromMaybe mempty + . M.lookup uuid + <$> remoteConfigMap + let repoInfo = getRepoInfo mremote config + let repoEncryption = getRepoEncryption mremote (Just config) + $(widgetFile "configurators/edit/repository") +editForm _new r@(RepoName _) = page "Edit repository" (Just Configuration) $ do + mr <- liftAnnex (repoIdRemote r) + let repoInfo = case mr of + Just rmt -> do + config <- liftAnnex $ fromMaybe mempty + . M.lookup (Remote.uuid rmt) + <$> remoteConfigMap + getRepoInfo mr config + Nothing -> getRepoInfo Nothing mempty + g <- liftAnnex gitRepo + mrepo <- liftAnnex $ maybe (pure Nothing) (Just <$$> Remote.getRepo) mr + let sshrepo = maybe False (\repo -> remoteLocationIsSshUrl (parseRemoteLocation (Git.repoLocation repo) False g)) mrepo + $(widgetFile "configurators/edit/nonannexremote") + +{- Makes any directory associated with the repository. -} +checkAssociatedDirectory :: RepoConfig -> Maybe Remote -> Annex () +checkAssociatedDirectory _ Nothing = noop +checkAssociatedDirectory cfg (Just r) = do + repoconfig <- M.lookup (Remote.uuid r) <$> remoteConfigMap + case repoGroup cfg of + RepoGroupStandard gr -> case associatedDirectory repoconfig gr of + Just d -> do + top <- fromRawFilePath <$> fromRepo Git.repoPath + createWorkTreeDirectory (toRawFilePath (top d)) + Nothing -> noop + _ -> noop + +getRepoInfo :: Maybe Remote.Remote -> Remote.RemoteConfig -> Widget +getRepoInfo (Just r) c = case fromProposedAccepted <$> M.lookup typeField c of + Just "S3" -> do + pc <- liftAnnex $ parsedRemoteConfig S3.remote c + if S3.configIA pc + then IA.getRepoInfo c + else AWS.getRepoInfo c + Just t + | t /= "git" -> [whamlet|#{t} remote|] + _ -> getGitRepoInfo =<< liftAnnex (Remote.getRepo r) +getRepoInfo _ _ = [whamlet|git repository|] + +getGitRepoInfo :: Git.Repo -> Widget +getGitRepoInfo r = do + let loc = Git.repoLocation r + [whamlet|git repository located at #{loc}|] + +getRepoEncryption :: Maybe Remote.Remote -> Maybe Remote.RemoteConfig -> Widget +getRepoEncryption (Just _) (Just c) = case extractCipher pc of + Nothing -> + [whamlet|not encrypted|] + (Just (SharedCipher _)) -> + [whamlet|encrypted: encryption key stored in git repository|] + (Just (EncryptedCipher _ _ ks)) -> desckeys ks + (Just (SharedPubKeyCipher _ ks)) -> desckeys ks + where + pc = either (const (Remote.ParsedRemoteConfig mempty mempty)) id $ + parseEncryptionConfig c + desckeys (KeyIds { keyIds = ks }) = do + cmd <- liftAnnex $ gpgCmd <$> Annex.getGitConfig + knownkeys <- liftIO (secretKeys cmd) + [whamlet| +encrypted using gpg key: +