Compare commits

...

No commits in common. "ci" and "balanced" have entirely different histories.
ci ... balanced

15034 changed files with 465993 additions and 153 deletions

141
.appveyor.yml Normal file
View file

@ -0,0 +1,141 @@
# This CI setup provides a largely homogeneous configuration across all
# major platforms (Windows, MacOS, and Linux). The aim of this test setup is
# to create a "native" platform experience, using as few cross-platform
# helper tools as possible.
#
# All workers support remote login. Login details are shown at the top of each
# CI run log.
#
# - Linux/Mac workers (via SSH):
#
# - A permitted SSH key must be defined in an APPVEYOR_SSH_KEY environment
# variable (via the appveyor project settings)
#
# - SSH login info is given in the form of: 'appveyor@67.225.164.xx -p 22xxx'
#
# - Login with:
#
# ssh -o StrictHostKeyChecking=no <LOGIN>
#
# - to prevent the CI run from exiting, `touch` a file named `BLOCK` in the
# user HOME directory (current directory directly after login). The session
# will run until the file is removed (or 60 min have passed)
#
# - Windows workers (via RDP):
#
# - An RDP password should be defined in an APPVEYOR_RDP_PASSWORD environment
# variable (via the appveyor project settings), or a random password is used
# every time
#
# - RDP login info is given in the form of IP:PORT
#
# - Login with:
#
# xfreerdp /cert:ignore /dynamic-resolution /u:appveyor /p:<PASSWORD> /v:<LOGIN>
#
# - to prevent the CI run from exiting, create a textfile named
# `BLOCK.txt` in the currently directory after login. The session
# will run until the file is removed (or 60 min have passed)
# Do a shallow clone with enough commits that queued builds will still
# find the commit they want to build.
clone_depth: 100
environment:
# Do not use `image` as a matrix dimension, to have fine-grained control over
# what tests run on which platform
# The ID variable had no impact, but sorts first in the CI run overview
# an intelligible name can help to locate a specific test run
matrix:
# List a CI run for each platform first, to have immediate access when there
# is a need for debugging
# Windows core tests
- ID: WinP39core
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
STACK_ROOT: "c:\\sr"
# MacOS core tests
- ID: MacP38core
APPVEYOR_BUILD_WORKER_IMAGE: macos-monterey
# Ubuntu core tests
# (disabled because it's not needed)
#- ID: Ubu20
# APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004
# do not run the CI if only documentation changes were made
# documentation builds are tested elsewhere and cheaper
skip_commits:
files:
- doc/
- CHANGELOG
# it is OK to specify paths that may not exist for a particular test run
cache:
- C:\sr -> stack.yaml
- C:\Users\appveyor\AppData\Local\Programs\stack -> stack.yaml
- /Users/appveyor/.stack -> stack.yaml
# turn of support for MS project build support (not needed)
build: off
# init cannot use any components from the repo, because it runs prior to
# cloning it
init:
# remove windows 260-char limit on path names
- cmd: powershell Set-Itemproperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name LongPathsEnabled -value 1
# enable developer mode on windows
# this should enable mklink without admin privileges, but it doesn't seem to work
#- cmd: powershell tools\ci\appveyor_enable_windevmode.ps1
# enable RDP access on windows (RDP password is in appveyor project config)
# this is relatively expensive (1-2min), but very convenient to jump into any build at any time
- cmd: powershell.exe iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
install:
# enable external SSH access to CI worker on all other systems
# needs APPVEYOR_SSH_KEY defined in project settings (or environment)
- sh: curl -sflL 'https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-ssh.sh' | bash -e -
# install stack (works on linux, OSX, and windows)
- curl -sSL https://get.haskellstack.org/ | sh
# Building dependencies takes almost too long on windows, so build without
# optimisation (including when building the dependencies)
before_build:
- sh: cp stack.yaml stack.yaml.build
- ps: cp stack.yaml stack.yaml.build
- sh: 'echo "apply-ghc-options: everything" >> stack.yaml.build'
- ps: '"apply-ghc-options: everything" |Add-Content -Path .\stack.yaml.build'
- stack --stack-yaml stack.yaml.build build --only-dependencies --ghc-options=-O0
build_script:
- stack --stack-yaml stack.yaml.build build --copy-bins --ghc-options=-O0
#after_build:
#
#before_test:
#
# Cannot use stack run git-annex because it does not support --ghc-options
# and would rebuild all deps. Instead, use the binary --copy-bins installed.
test_script:
- cmd: C:\Users\appveyor\AppData\Roaming\local\bin\git-annex.exe test
- sh: ln -s $(stack path --local-bin)/git-annex git-annex
- sh: ln -s $(stack path --local-bin)/git-annex git-annex-shell
- sh: PATH=`pwd`:$PATH; export PATH; git-annex test
#after_test:
#
#on_success:
#
#on_failure:
#
on_finish:
# conditionally block the exit of a CI run for direct debugging
- sh: while [ -f ~/BLOCK ]; do sleep 5; done
- cmd: powershell.exe while ((Test-Path "C:\Users\\appveyor\\BLOCK.txt")) { Start-Sleep 5 }
# block exit until 60 minute timeout, for direct debugging
#- sh: while true; do sleep 5; done
#- cmd: powershell.exe while ($true) { Start-Sleep 5 }

8
.codespellrc Normal file
View file

@ -0,0 +1,8 @@
[codespell]
skip = .git,*.pdf,*.svg,*._comment,jquery.*.js,*.mdwn,changelog,CHANGELOG,list.2018,html,dist,dist-newstyle,.stack-work,man,tags,tmp
ignore-regex=\b(valUs|addIn)\b
# some common variables etc (case insensitive)
# keypair - constructs
## May be TODO later, touches too much
# sentinal -> sentinel
ignore-words-list = dne,inout,fo,ot,bu,te,allright,inh,mor,myu,keypair,pasttime,sentinal,startd,ifset,afile,buildt,toword

View file

@ -1,18 +0,0 @@
Support ghc-9.8 by widening a lot of constraints.
This patch can be removed once upstream supports ghc 9.8 offically.
diff -uprN git-annex-10.20240227.orig/cabal.project git-annex-10.20240227/cabal.project
--- git-annex-10.20240227.orig/cabal.project 1970-01-01 01:00:00.000000000 +0100
+++ git-annex-10.20240227/cabal.project 2024-04-28 13:30:14.061706299 +0200
@@ -0,0 +1,10 @@
+packages: *.cabal
+
+allow-newer: dav
+allow-newer: haskeline:filepath
+allow-newer: haskeline:directory
+allow-newer: xml-hamlet
+allow-newer: aws:filepath
+allow-newer: dbus:network
+allow-newer: dbus:filepath
+allow-newer: microstache:filepath

View file

@ -1,85 +0,0 @@
on:
workflow_dispatch:
inputs:
ref_name:
description: 'Tag or commit'
required: true
type: string
push:
tags:
- '*'
jobs:
cabal-config-edge:
name: Generate cabal config for edge
runs-on: x86_64
container:
image: alpine:edge
env:
CI_ALPINE_TARGET_RELEASE: edge
steps:
- name: Environment setup
run: apk add nodejs git cabal patch
- name: Repo pull
uses: actions/checkout@v4
with:
fetch-depth: 1
ref: ${{ inputs.ref_name }}
- name: Config generation
run: |
patch -p1 -i .forgejo/patches/ghc-9.8.patch
HOME="${{ github.workspace}}"/cabal_cache cabal update
HOME="${{ github.workspace}}"/cabal_cache cabal v2-freeze --shadow-installed-packages --strong-flags --flags="+assistant +webapp +pairing +production +torrentparser +magicmime +benchmark -debuglocks +dbus +networkbsd +gitlfs +httpclientrestricted"
mv cabal.project.freeze git-annex.config
- name: Package upload
uses: forgejo/upload-artifact@v3
with:
name: cabalconfigedge
path: git-annex*.config
cabal-config-v320:
name: Generate cabal config for edge
runs-on: x86_64
container:
image: alpine:3.20
env:
CI_ALPINE_TARGET_RELEASE: v3.20
steps:
- name: Environment setup
run: apk add nodejs git cabal patch
- name: Repo pull
uses: actions/checkout@v4
with:
fetch-depth: 1
ref: ${{ inputs.ref_name }}
- name: Config generation
run: |
patch -p1 -i .forgejo/patches/ghc-9.8.patch
HOME="${{ github.workspace }}"/cabal_cache cabal update
HOME="${{ github.workspace }}"/cabal_cache cabal v2-freeze --shadow-installed-packages --strong-flags --flags="+assistant +webapp +pairing +production +torrentparser +magicmime +benchmark -debuglocks +dbus +networkbsd +gitlfs +httpclientrestricted"
mv cabal.project.freeze git-annex.config
- name: Package upload
uses: forgejo/upload-artifact@v3
with:
name: cabalconfig320
path: git-annex*.config
upload-tarball:
name: Upload to generic repo
runs-on: x86_64
needs: [cabal-config-edge,cabal-config-v320]
container:
image: alpine:latest
steps:
- name: Environment setup
run: apk add nodejs curl findutils
- name: Package download
uses: forgejo/download-artifact@v3
- name: Package deployment
run: |
if test $GITHUB_REF_NAME == "ci" ; then
CI_REF_NAME=${{ inputs.ref_name }}
else
CI_REF_NAME=$GITHUB_REF_NAME
fi
curl --user ${{ vars.CODE_FORGEJO_USER }}:${{ secrets.CODE_FORGEJO_TOKEN }} --upload-file ./cabalconfigedge/git-annex.config ${{ github.server_url }}/api/packages/mirrors/generic/git-annex/$CI_REF_NAME/git-annex-$CI_REF_NAME-edge.cabal
curl --user ${{ vars.CODE_FORGEJO_USER }}:${{ secrets.CODE_FORGEJO_TOKEN }} --upload-file ./cabalconfig320/git-annex.config ${{ github.server_url }}/api/packages/mirrors/generic/git-annex/$CI_REF_NAME/git-annex-$CI_REF_NAME-v320.cabal

View file

@ -1,50 +0,0 @@
on:
workflow_dispatch:
schedule:
- cron: '@hourly'
jobs:
mirror:
name: Pull from upstream
runs-on: x86_64
container:
image: alpine:latest
env:
upstream: https://git.joeyh.name/git/git-annex.git
tags: '10.2024*'
steps:
- name: Environment setup
run: apk add grep git sed coreutils bash nodejs
- name: Fetch destination
uses: actions/checkout@v4
with:
fetch_depth: 1
ref: ci
token: ${{ secrets.CODE_FORGEJO_TOKEN }}
- name: Missing tag detecting
run: |
git ls-remote $upstream "refs/tags/$tags" | grep -v '{' | sed 's|.*/||' | sort > upstream_tags
git ls-remote ${{ github.server_url}}/${{ github.repository }} "refs/tags/$tags" | grep -v '{' | sed 's|.*/||' | sort > destination_tags
comm -23 upstream_tags destination_tags > missing_tags
echo "Missing tags:"
cat missing_tags
- name: Missing tag fetch
run: |
git remote add upstream $upstream
while read tag; do
git fetch upstream tag $tag --no-tags
done < missing_tags
- name: Packaging workflow injection
run: |
while read tag; do
git checkout $tag
git tag -d $tag
git checkout ci -- ./.forgejo
git config user.name "forgejo-actions[bot]"
git config user.email "dev@ayakael.net"
git commit -m 'Inject custom workflow'
git tag -a $tag -m $tag
done < missing_tags
- name: Push to destination
run: git push --force origin refs/tags/*:refs/tags/* --tags

3
.ghci Normal file
View file

@ -0,0 +1,3 @@
:load Common
:set -XLambdaCase
:set -fno-warn-tabs

1
.gitattributes vendored Normal file
View file

@ -0,0 +1 @@
CHANGELOG merge=dpkg-mergechangelogs

41
.gitignore vendored Normal file
View file

@ -0,0 +1,41 @@
tags
TAGS
Setup
*.hi
*.o
tmp
test
Build/SysConfig
Build/Version
Build/InstallDesktopFile
Build/Standalone
Build/BuildVersion
Build/MakeMans
git-annex
git-annex-shell
git-remote-annex
man
git-union-merge
git-union-merge.1
doc/.ikiwiki
html
*.tix
.hpc
dist
dist-newstyle
cabal.project.local
cabal.project.local~*
result
git-annex-build-deps*
# Sandboxed builds
cabal-dev
.cabal-sandbox
cabal.sandbox.config
.stack-work
stack.yaml.lock
# Project-local emacs configuration
.dir-locals.el
# OSX related
.DS_Store
.virthualenv
.tasty-rerun-log

30
.mailmap Normal file
View file

@ -0,0 +1,30 @@
Antoine Beaupré <anarcat@koumbit.org> anarcat <anarcat@web>
Antoine Beaupré <anarcat@koumbit.org> https://id.koumbit.net/anarcat <https://id.koumbit.net/anarcat@web>
Greg Grossmeier <greg@grossmeier.net> http://grossmeier.net/ <greg@web>
Jimmy Tang <jtang@tchpc.tcd.ie> jtang <jtang@web>
Joachim Breitner <mail@joachim-breitner.de> http://www.joachim-breitner.de/ <nomeata@web>
Joey Hess <id@joeyh.name> Joey Hess <joey@gnu.kitenet.net>
Joey Hess <id@joeyh.name> Joey Hess <joey@kitenet.net>
Joey Hess <id@joeyh.name> Joey Hess <joeyh@debian.org>
Joey Hess <id@joeyh.name> Joey Hess <joeyh@fischer.debian.org>
Joey Hess <id@joeyh.name> Joey Hess <joeyh@joeyh.name>
Joey Hess <id@joeyh.name> Joey Hess <joeyh@oberon.tam-lin.net>
Joey Hess <id@joeyh.name> Joey Hess <joeyh@oberon.underhill.private>
Joey Hess <id@joeyh.name> http://joey.kitenet.net/ <joey@web>
Joey Hess <id@joeyh.name> http://joeyh.name/ <http://joeyh.name/@web>
Joey Hess <id@joeyh.name> http://joeyh.name/ <joey@web>
Joey Hess <id@joeyh.name> https://www.google.com/accounts/o8/id?id=AItOawmJfIszzreLNvCqzqzvTayA9_9L6gb9RtY <Joey@web>
Johan Kiviniemi <devel@johan.kiviniemi.name> http://johan.kiviniemi.name/ <Johan@web>
Johan Kiviniemi <devel@johan.kiviniemi.name> http://johan.kiviniemi.name/ <http://johan.kiviniemi.name/@web>
Nicolas Pouillard <nicolas.pouillard@gmail.com> http://ertai.myopenid.com/ <npouillard@web>
Peter Simons <simons@cryp.to> Peter Simons <simons@ubuntu-12.04>
Peter Simons <simons@cryp.to> http://peter-simons.myopenid.com/ <http://peter-simons.myopenid.com/@web>
Philipp Kern <pkern@debian.org> http://phil.0x539.de/ <Philipp_Kern@web>
Richard Hartmann <richih@debian.org> https://www.google.com/accounts/o8/id?id=AItOawl9sYlePmv1xK-VvjBdN-5doOa_Xw-jH4U <Richard@web>
Yaroslav Halchenko <debian@onerussian.com>
Yaroslav Halchenko <debian@onerussian.com> yarikoptic <yarikoptic@web>
Yaroslav Halchenko <debian@onerussian.com> http://yarikoptic.myopenid.com/ <site-myopenid@web>
Yaroslav Halchenko <debian@onerussian.com> https://www.google.com/accounts/o8/id?id=AItOawnx8kHW66N3BqmkVpgtXDlYMvr8TJ5VvfY <Yaroslav@web>
Yaroslav Halchenko <debian@onerussian.com> https://me.yahoo.com/a/EbvxpTI_xP9Aod7Mg4cwGhgjrCrdM5s-#7c0f4 <https://me.yahoo.com/a/EbvxpTI_xP9Aod7Mg4cwGhgjrCrdM5s-#7c0f4@web>
Øyvind A. Holm <sunny@sunbase.org> http://sunny256.sunbase.org/ <sunny256@web>
Øyvind A. Holm <sunny@sunbase.org> https://sunny256.wordpress.com/ <sunny256@web>

483
Annex.hs Normal file
View file

@ -0,0 +1,483 @@
{- git-annex monad
-
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE GeneralizedNewtypeDeriving, BangPatterns, PackageImports #-}
module Annex (
Annex,
AnnexState(..),
AnnexRead(..),
new,
run,
eval,
makeRunner,
getRead,
getState,
changeState,
withState,
setField,
setOutput,
getField,
addCleanupAction,
gitRepo,
inRepo,
fromRepo,
calcRepo,
calcRepo',
getGitConfig,
overrideGitConfig,
changeGitRepo,
adjustGitRepo,
addGitConfigOverride,
getGitConfigOverrides,
getRemoteGitConfig,
withCurrentState,
changeDirectory,
getGitRemotes,
incError,
) where
import Common
import qualified Git
import qualified Git.Config
import qualified Git.Construct
import Annex.Fixup
import Git.HashObject
import Git.CheckAttr
import Git.CheckIgnore
import qualified Git.Hook
import qualified Git.Queue
import Types.Key
import Types.Backend
import Types.GitConfig
import qualified Types.Remote
import Types.Crypto
import Types.BranchState
import Types.TrustLevel
import Types.Group
import Types.Messages
import Types.Concurrency
import Types.UUID
import Types.FileMatcher
import Types.NumCopies
import Types.LockCache
import Types.DesktopNotify
import Types.CleanupActions
import Types.AdjustedBranch
import Types.WorkerPool
import Types.IndexFiles
import Types.CatFileHandles
import Types.RemoteConfig
import Types.TransferrerPool
import Types.VectorClock
import Types.Cluster
import Types.MaxSize
import Annex.VectorClock.Utility
import Annex.Debug.Utility
import qualified Database.Keys.Handle as Keys
import Utility.InodeCache
import Utility.Url
import Utility.ResourcePool
import Utility.HumanTime
import Git.Credential (CredentialCache(..))
import "mtl" Control.Monad.Reader
import Control.Concurrent
import Control.Concurrent.STM
import qualified Control.Monad.Fail as Fail
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import Data.Time.Clock.POSIX
{- git-annex's monad is a ReaderT around an AnnexState stored in a MVar,
- and an AnnexRead. The MVar is not exposed outside this module.
-
- Note that when an Annex action fails and the exception is caught,
- any changes the action has made to the AnnexState are retained,
- due to the use of the MVar to store the state.
-}
newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState, AnnexRead) IO a }
deriving (
Monad,
MonadIO,
MonadReader (MVar AnnexState, AnnexRead),
MonadCatch,
MonadThrow,
MonadMask,
Fail.MonadFail,
Functor,
Applicative,
Alternative
)
-- Values that can be read, but not modified by an Annex action.
data AnnexRead = AnnexRead
{ branchstate :: MVar BranchState
, activekeys :: TVar (M.Map Key ThreadId)
, activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
, keysdbhandle :: Keys.DbHandle
, sshstalecleaned :: TMVar Bool
, signalactions :: TVar (M.Map SignalAction (Int -> IO ()))
, transferrerpool :: TransferrerPool
, debugenabled :: Bool
, debugselector :: DebugSelector
, explainenabled :: Bool
, ciphers :: TMVar (M.Map StorableCipher Cipher)
, fast :: Bool
, force :: Bool
, forcenumcopies :: Maybe NumCopies
, forcemincopies :: Maybe MinCopies
, forcebackend :: Maybe String
, rebalance :: Bool
, useragent :: Maybe String
, desktopnotify :: DesktopNotify
, gitcredentialcache :: TMVar CredentialCache
}
newAnnexRead :: GitConfig -> IO AnnexRead
newAnnexRead c = do
bs <- newMVar startBranchState
emptyactivekeys <- newTVarIO M.empty
emptyactiveremotes <- newMVar M.empty
kh <- Keys.newDbHandle
sc <- newTMVarIO False
si <- newTVarIO M.empty
tp <- newTransferrerPool
cm <- newTMVarIO M.empty
cc <- newTMVarIO (CredentialCache M.empty)
return $ AnnexRead
{ branchstate = bs
, activekeys = emptyactivekeys
, activeremotes = emptyactiveremotes
, keysdbhandle = kh
, sshstalecleaned = sc
, signalactions = si
, transferrerpool = tp
, debugenabled = annexDebug c
, debugselector = debugSelectorFromGitConfig c
, explainenabled = False
, ciphers = cm
, fast = False
, force = False
, forcebackend = Nothing
, forcenumcopies = Nothing
, forcemincopies = Nothing
, rebalance = False
, useragent = Nothing
, desktopnotify = mempty
, gitcredentialcache = cc
}
-- Values that can change while running an Annex action.
data AnnexState = AnnexState
{ repo :: Git.Repo
, repoadjustment :: (Git.Repo -> IO Git.Repo)
, gitconfig :: GitConfig
, gitconfigadjustment :: (GitConfig -> GitConfig)
, gitconfigoverride :: [String]
, gitremotes :: Maybe [Git.Repo]
, gitconfiginodecache :: Maybe InodeCache
, backend :: Maybe (BackendA Annex)
, remotes :: [Types.Remote.RemoteA Annex]
, output :: MessageState
, concurrency :: ConcurrencySetting
, daemon :: Bool
, repoqueue :: Maybe (Git.Queue.Queue Annex)
, catfilehandles :: CatFileHandles
, hashobjecthandle :: Maybe (ResourcePool HashObjectHandle)
, checkattrhandle :: Maybe (ResourcePool CheckAttrHandle)
, checkignorehandle :: Maybe (ResourcePool CheckIgnoreHandle)
, globalnumcopies :: Maybe (Maybe NumCopies)
, globalmincopies :: Maybe (Maybe MinCopies)
, limit :: ExpandableMatcher Annex
, timelimit :: Maybe (Duration, POSIXTime)
, sizelimit :: Maybe (TVar Integer)
, uuiddescmap :: Maybe UUIDDescMap
, preferredcontentmap :: Maybe (FileMatcherMap Annex)
, requiredcontentmap :: Maybe (FileMatcherMap Annex)
, remoteconfigmap :: Maybe (M.Map UUID RemoteConfig)
, clusters :: Maybe (Annex Clusters)
, maxsizes :: Maybe (M.Map UUID MaxSize)
, forcetrust :: TrustMap
, trustmap :: Maybe TrustMap
, groupmap :: Maybe GroupMap
, lockcache :: LockCache
, fields :: M.Map String String
, cleanupactions :: M.Map CleanupAction (Annex ())
, sentinalstatus :: Maybe SentinalStatus
, errcounter :: Integer
, reachedlimit :: Bool
, adjustedbranchrefreshcounter :: Integer
, unusedkeys :: Maybe (S.Set Key)
, tempurls :: M.Map Key URLString
, existinghooks :: M.Map Git.Hook.Hook Bool
, workers :: Maybe (TMVar (WorkerPool (AnnexState, AnnexRead)))
, cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment))
, cachedgitenv :: Maybe (AltIndexFile, FilePath, [(String, String)])
, urloptions :: Maybe UrlOptions
, insmudgecleanfilter :: Bool
, getvectorclock :: IO CandidateVectorClock
, proxyremote :: Maybe (Either ClusterUUID (Types.Remote.RemoteA Annex))
}
newAnnexState :: GitConfig -> Git.Repo -> IO AnnexState
newAnnexState c r = do
o <- newMessageState
vc <- startVectorClock
return $ AnnexState
{ repo = r
, repoadjustment = return
, gitconfig = c
, gitconfigadjustment = id
, gitconfigoverride = []
, gitremotes = Nothing
, gitconfiginodecache = Nothing
, backend = Nothing
, remotes = []
, output = o
, concurrency = ConcurrencyCmdLine NonConcurrent
, daemon = False
, repoqueue = Nothing
, catfilehandles = catFileHandlesNonConcurrent
, hashobjecthandle = Nothing
, checkattrhandle = Nothing
, checkignorehandle = Nothing
, globalnumcopies = Nothing
, globalmincopies = Nothing
, limit = BuildingMatcher []
, timelimit = Nothing
, sizelimit = Nothing
, uuiddescmap = Nothing
, preferredcontentmap = Nothing
, requiredcontentmap = Nothing
, remoteconfigmap = Nothing
, clusters = Nothing
, maxsizes = Nothing
, forcetrust = M.empty
, trustmap = Nothing
, groupmap = Nothing
, lockcache = M.empty
, fields = M.empty
, cleanupactions = M.empty
, sentinalstatus = Nothing
, errcounter = 0
, reachedlimit = False
, adjustedbranchrefreshcounter = 0
, unusedkeys = Nothing
, tempurls = M.empty
, existinghooks = M.empty
, workers = Nothing
, cachedcurrentbranch = Nothing
, cachedgitenv = Nothing
, urloptions = Nothing
, insmudgecleanfilter = False
, getvectorclock = vc
, proxyremote = Nothing
}
{- Makes an Annex state object for the specified git repo.
- Ensures the config is read, if it was not already, and performs
- any necessary git repo fixups. -}
new :: Git.Repo -> IO (AnnexState, AnnexRead)
new r = do
r' <- Git.Config.read r
let c = extractGitConfig FromGitConfig r'
st <- newAnnexState c =<< fixupRepo r' c
rd <- newAnnexRead c
return (st, rd)
{- Performs an action in the Annex monad from a starting state,
- returning a new state. -}
run :: (AnnexState, AnnexRead) -> Annex a -> IO (a, (AnnexState, AnnexRead))
run (st, rd) a = do
mv <- newMVar st
run' mv rd a
run' :: MVar AnnexState -> AnnexRead -> Annex a -> IO (a, (AnnexState, AnnexRead))
run' mvar rd a = do
r <- runReaderT (runAnnex a) (mvar, rd)
st <- takeMVar mvar
return (r, (st, rd))
{- Performs an action in the Annex monad from a starting state,
- and throws away the changed state. -}
eval :: (AnnexState, AnnexRead) -> Annex a -> IO a
eval v a = fst <$> run v a
{- Makes a runner action, that allows diving into IO and from inside
- the IO action, running an Annex action. -}
makeRunner :: Annex (Annex a -> IO a)
makeRunner = do
(mvar, rd) <- ask
return $ \a -> do
(r, (s, _rd)) <- run' mvar rd a
putMVar mvar s
return r
getRead :: (AnnexRead -> v) -> Annex v
getRead selector = selector . snd <$> ask
getState :: (AnnexState -> v) -> Annex v
getState selector = do
mvar <- fst <$> ask
st <- liftIO $ readMVar mvar
return $ selector st
changeState :: (AnnexState -> AnnexState) -> Annex ()
changeState modifier = do
mvar <- fst <$> ask
liftIO $ modifyMVar_ mvar $ return . modifier
withState :: (AnnexState -> IO (AnnexState, b)) -> Annex b
withState modifier = do
mvar <- fst <$> ask
liftIO $ modifyMVar mvar modifier
{- Sets a field to a value -}
setField :: String -> String -> Annex ()
setField field value = changeState $ \st ->
st { fields = M.insert field value $ fields st }
{- Adds a cleanup action to perform. -}
addCleanupAction :: CleanupAction -> Annex () -> Annex ()
addCleanupAction k a = changeState $ \st ->
st { cleanupactions = M.insert k a $ cleanupactions st }
{- Sets the type of output to emit. -}
setOutput :: OutputType -> Annex ()
setOutput o = changeState $ \st ->
let m = output st
in st { output = m { outputType = adjustOutputType (outputType m) o } }
{- Gets the value of a field. -}
getField :: String -> Annex (Maybe String)
getField field = M.lookup field <$> getState fields
{- Returns the annex's git repository. -}
gitRepo :: Annex Git.Repo
gitRepo = getState repo
{- Runs an IO action in the annex's git repository. -}
inRepo :: (Git.Repo -> IO a) -> Annex a
inRepo a = liftIO . a =<< gitRepo
{- Extracts a value from the annex's git repisitory. -}
fromRepo :: (Git.Repo -> a) -> Annex a
fromRepo a = a <$> gitRepo
{- Calculates a value from an annex's git repository and its GitConfig. -}
calcRepo :: (Git.Repo -> GitConfig -> IO a) -> Annex a
calcRepo a = do
s <- getState id
liftIO $ a (repo s) (gitconfig s)
calcRepo' :: (Git.Repo -> GitConfig -> a) -> Annex a
calcRepo' f = do
s <- getState id
pure $ f (repo s) (gitconfig s)
{- Gets the GitConfig settings. -}
getGitConfig :: Annex GitConfig
getGitConfig = getState gitconfig
{- Overrides a GitConfig setting. The modification persists across
- reloads of the repo's config. -}
overrideGitConfig :: (GitConfig -> GitConfig) -> Annex ()
overrideGitConfig f = changeState $ \st -> st
{ gitconfigadjustment = gitconfigadjustment st . f
, gitconfig = f (gitconfig st)
}
{- Adds an adjustment to the Repo data. Adjustments persist across reloads
- of the repo's config.
-
- Note that the action may run more than once, and should avoid eg,
- appending the same value to a repo's config when run repeatedly.
-}
adjustGitRepo :: (Git.Repo -> IO Git.Repo) -> Annex ()
adjustGitRepo a = do
changeState $ \st -> st { repoadjustment = \r -> repoadjustment st r >>= a }
changeGitRepo =<< gitRepo
{- Adds git config setting, like "foo=bar". It will be passed with -c
- to git processes. The config setting is also recorded in the Repo,
- and the GitConfig is updated. -}
addGitConfigOverride :: String -> Annex ()
addGitConfigOverride v = do
adjustGitRepo $ \r ->
Git.Config.store (encodeBS v) Git.Config.ConfigList $
r { Git.gitGlobalOpts = go (Git.gitGlobalOpts r) }
changeState $ \st -> st { gitconfigoverride = v : gitconfigoverride st }
where
-- Remove any prior occurrence of the setting to avoid
-- building up many of them when the adjustment is run repeatedly,
-- and add the setting to the end.
go [] = [Param "-c", Param v]
go (Param "-c": Param v':rest) | v' == v = go rest
go (c:rest) = c : go rest
{- Values that were passed to addGitConfigOverride. -}
getGitConfigOverrides :: Annex [String]
getGitConfigOverrides = reverse <$> getState gitconfigoverride
{- Changing the git Repo data also involves re-extracting its GitConfig. -}
changeGitRepo :: Git.Repo -> Annex ()
changeGitRepo r = do
repoadjuster <- getState repoadjustment
gitconfigadjuster <- getState gitconfigadjustment
r' <- liftIO $ repoadjuster r
changeState $ \st -> st
{ repo = r'
, gitconfig = gitconfigadjuster $
extractGitConfig FromGitConfig r'
, gitremotes = Nothing
}
{- Gets the RemoteGitConfig from a remote, given the Git.Repo for that
- remote. -}
getRemoteGitConfig :: Git.Repo -> Annex RemoteGitConfig
getRemoteGitConfig r = do
g <- gitRepo
liftIO $ atomically $ extractRemoteGitConfig g (Git.repoDescribe r)
{- Converts an Annex action into an IO action, that runs with a copy
- of the current Annex state.
-
- Use with caution; the action should not rely on changing the
- state, as it will be thrown away. -}
withCurrentState :: Annex a -> Annex (IO a)
withCurrentState a = do
(mvar, rd) <- ask
st <- liftIO $ readMVar mvar
return $ eval (st, rd) a
{- It's not safe to use setCurrentDirectory in the Annex monad,
- because the git repo paths are stored relative.
- Instead, use this.
-}
changeDirectory :: FilePath -> Annex ()
changeDirectory d = do
r <- liftIO . Git.adjustPath absPath =<< gitRepo
liftIO $ setCurrentDirectory d
r' <- liftIO $ Git.relPath r
changeState $ \st -> st { repo = r' }
incError :: Annex ()
incError = changeState $ \st ->
let !c = errcounter st + 1
!st' = st { errcounter = c }
in st'
getGitRemotes :: Annex [Git.Repo]
getGitRemotes = do
st <- getState id
case gitremotes st of
Just rs -> return rs
Nothing -> do
rs <- liftIO $ Git.Construct.fromRemotes (repo st)
changeState $ \st' -> st' { gitremotes = Just rs }
return rs

69
Annex/Action.hs Normal file
View file

@ -0,0 +1,69 @@
{- git-annex actions
-
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Action (
action,
verifiedAction,
quiesce,
stopCoProcesses,
) where
import qualified Data.Map as M
import Annex.Common
import qualified Annex
import Annex.Content
import Annex.CatFile
import Annex.CheckAttr
import Annex.HashObject
import Annex.CheckIgnore
import Annex.TransferrerPool
import qualified Database.Keys
{- Runs an action that may throw exceptions, catching and displaying them. -}
action :: Annex () -> Annex Bool
action a = tryNonAsync a >>= \case
Right () -> return True
Left e -> do
warning (UnquotedString (show e))
return False
verifiedAction :: Annex Verification -> Annex (Bool, Verification)
verifiedAction a = tryNonAsync a >>= \case
Right v -> return (True, v)
Left e -> do
warning (UnquotedString (show e))
return (False, UnVerified)
{- Rn all cleanup actions, save all state, stop all long-running child
- processes.
-
- This can be run repeatedly with other Annex actions run in between,
- but usually it is run only once at the end.
-
- When passed True, avoids making any commits to the git-annex branch,
- leaving changes in the journal for later commit.
-}
quiesce :: Bool -> Annex ()
quiesce nocommit = do
cas <- Annex.withState $ \st -> return
( st { Annex.cleanupactions = mempty }
, Annex.cleanupactions st
)
sequence_ (M.elems cas)
saveState nocommit
stopCoProcesses
Database.Keys.closeDb
{- Stops all long-running child processes, including git query processes. -}
stopCoProcesses :: Annex ()
stopCoProcesses = do
catFileStop
checkAttrStop
hashObjectStop
checkIgnoreStop
emptyTransferrerPool

688
Annex/AdjustedBranch.hs Normal file
View file

@ -0,0 +1,688 @@
{- adjusted branch
-
- Copyright 2016-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns, OverloadedStrings #-}
module Annex.AdjustedBranch (
Adjustment(..),
LinkAdjustment(..),
PresenceAdjustment(..),
LinkPresentAdjustment(..),
adjustmentHidesFiles,
adjustmentIsStable,
OrigBranch,
AdjBranch(..),
originalToAdjusted,
adjustedToOriginal,
fromAdjustedBranch,
getAdjustment,
enterAdjustedBranch,
adjustedBranchRefresh,
adjustedBranchRefreshFull,
adjustBranch,
adjustTree,
adjustToCrippledFileSystem,
commitForAdjustedBranch,
propigateAdjustedCommits,
propigateAdjustedCommits',
commitAdjustedTree,
commitAdjustedTree',
BasisBranch(..),
basisBranch,
setBasisBranch,
preventCommits,
AdjustedClone(..),
checkAdjustedClone,
checkVersionSupported,
isGitVersionSupported,
) where
import Annex.Common
import Types.AdjustedBranch
import Annex.AdjustedBranch.Name
import qualified Annex
import Git
import Git.Types
import qualified Git.Branch
import qualified Git.Ref
import qualified Git.Command
import qualified Git.Tree
import qualified Git.DiffTree
import Git.Tree (TreeItem(..))
import Git.Sha
import Git.Env
import Git.Index
import Git.FilePath
import qualified Git.LockFile
import qualified Git.Version
import Annex.CatFile
import Annex.Link
import Annex.Content.Presence
import Annex.CurrentBranch
import Types.CleanupActions
import qualified Database.Keys
import Config
import Logs.View (is_branchView)
import Logs.AdjustedBranchUpdate
import Utility.FileMode
import qualified Utility.RawFilePath as R
import Data.Time.Clock.POSIX
import qualified Data.Map as M
import System.PosixCompat.Files (fileMode)
class AdjustTreeItem t where
-- How to perform various adjustments to a TreeItem.
adjustTreeItem :: t -> TreeItem -> Annex (Maybe TreeItem)
-- Will adjusting a given tree always yield the same adjusted tree?
adjustmentIsStable :: t -> Bool
instance AdjustTreeItem Adjustment where
adjustTreeItem (LinkAdjustment l) t = adjustTreeItem l t
adjustTreeItem (PresenceAdjustment p Nothing) t = adjustTreeItem p t
adjustTreeItem (PresenceAdjustment p (Just l)) t =
adjustTreeItem p t >>= \case
Nothing -> return Nothing
Just t' -> adjustTreeItem l t'
adjustTreeItem (LinkPresentAdjustment l) t = adjustTreeItem l t
adjustmentIsStable (LinkAdjustment l) = adjustmentIsStable l
adjustmentIsStable (PresenceAdjustment p _) = adjustmentIsStable p
adjustmentIsStable (LinkPresentAdjustment l) = adjustmentIsStable l
instance AdjustTreeItem LinkAdjustment where
adjustTreeItem UnlockAdjustment =
ifSymlink adjustToPointer noAdjust
adjustTreeItem LockAdjustment =
ifSymlink noAdjust adjustToSymlink
adjustTreeItem FixAdjustment =
ifSymlink adjustToSymlink noAdjust
adjustTreeItem UnFixAdjustment =
ifSymlink (adjustToSymlink' gitAnnexLinkCanonical) noAdjust
adjustmentIsStable _ = True
instance AdjustTreeItem PresenceAdjustment where
adjustTreeItem HideMissingAdjustment =
ifPresent noAdjust hideAdjust
adjustTreeItem ShowMissingAdjustment =
noAdjust
adjustmentIsStable HideMissingAdjustment = False
adjustmentIsStable ShowMissingAdjustment = True
instance AdjustTreeItem LinkPresentAdjustment where
adjustTreeItem UnlockPresentAdjustment =
ifPresent adjustToPointer adjustToSymlink
adjustTreeItem LockPresentAdjustment =
-- Turn all pointers back to symlinks, whether the content
-- is present or not. This is done because the content
-- availability may have changed and the branch not been
-- re-adjusted to keep up, so there may be pointers whose
-- content is not present.
ifSymlink noAdjust adjustToSymlink
adjustmentIsStable UnlockPresentAdjustment = False
adjustmentIsStable LockPresentAdjustment = True
ifSymlink
:: (TreeItem -> Annex a)
-> (TreeItem -> Annex a)
-> TreeItem
-> Annex a
ifSymlink issymlink notsymlink ti@(TreeItem _f m _s)
| toTreeItemType m == Just TreeSymlink = issymlink ti
| otherwise = notsymlink ti
ifPresent
:: (TreeItem -> Annex (Maybe TreeItem))
-> (TreeItem -> Annex (Maybe TreeItem))
-> TreeItem
-> Annex (Maybe TreeItem)
ifPresent ispresent notpresent ti@(TreeItem _ _ s) =
catKey s >>= \case
Just k -> ifM (inAnnex k) (ispresent ti, notpresent ti)
Nothing -> return (Just ti)
noAdjust :: TreeItem -> Annex (Maybe TreeItem)
noAdjust = return . Just
hideAdjust :: TreeItem -> Annex (Maybe TreeItem)
hideAdjust _ = return Nothing
adjustToPointer :: TreeItem -> Annex (Maybe TreeItem)
adjustToPointer ti@(TreeItem f _m s) = catKey s >>= \case
Just k -> do
Database.Keys.addAssociatedFile k f
exe <- catchDefaultIO False $
(isExecutable . fileMode) <$>
(liftIO . R.getFileStatus
=<< calcRepo (gitAnnexLocation k))
let mode = fromTreeItemType $
if exe then TreeExecutable else TreeFile
Just . TreeItem f mode <$> hashPointerFile k
Nothing -> return (Just ti)
adjustToSymlink :: TreeItem -> Annex (Maybe TreeItem)
adjustToSymlink = adjustToSymlink' gitAnnexLink
adjustToSymlink' :: (RawFilePath -> Key -> Git.Repo -> GitConfig -> IO RawFilePath) -> TreeItem -> Annex (Maybe TreeItem)
adjustToSymlink' gitannexlink ti@(TreeItem f _m s) = catKey s >>= \case
Just k -> do
absf <- inRepo $ \r -> absPath $ fromTopFilePath f r
linktarget <- calcRepo $ gitannexlink absf k
Just . TreeItem f (fromTreeItemType TreeSymlink)
<$> hashSymlink linktarget
Nothing -> return (Just ti)
-- This is a hidden branch ref, that's used as the basis for the AdjBranch,
-- since pushes can overwrite the OrigBranch at any time. So, changes
-- are propagated from the AdjBranch to the head of the BasisBranch.
newtype BasisBranch = BasisBranch Ref
-- The basis for refs/heads/adjusted/master(unlocked) is
-- refs/basis/adjusted/master(unlocked).
basisBranch :: AdjBranch -> BasisBranch
basisBranch (AdjBranch adjbranch) = BasisBranch $
Ref ("refs/basis/" <> fromRef' (Git.Ref.base adjbranch))
getAdjustment :: Branch -> Maybe Adjustment
getAdjustment = fmap fst . adjustedToOriginal
fromAdjustedBranch :: Branch -> OrigBranch
fromAdjustedBranch b = maybe b snd (adjustedToOriginal b)
{- Enter an adjusted version of current branch (or, if already in an
- adjusted version of a branch, changes the adjustment of the original
- branch).
-
- Can fail, if no branch is checked out, or if the adjusted branch already
- exists, or if staged changes prevent a checkout.
-}
enterAdjustedBranch :: Adjustment -> Annex Bool
enterAdjustedBranch adj = inRepo Git.Branch.current >>= \case
Just currbranch -> case getAdjustment currbranch of
Just curradj | curradj == adj ->
updateAdjustedBranch adj (AdjBranch currbranch)
(fromAdjustedBranch currbranch)
_ -> go currbranch
Nothing -> do
warning "not on any branch!"
return False
where
go currbranch = do
let origbranch = fromAdjustedBranch currbranch
let adjbranch = adjBranch $ originalToAdjusted origbranch adj
ifM (inRepo (Git.Ref.exists adjbranch) <&&> (not <$> Annex.getRead Annex.force) <&&> pure (not (is_branchView origbranch)))
( do
mapM_ (warning . UnquotedString . unwords)
[ [ "adjusted branch"
, Git.Ref.describe adjbranch
, "already exists."
]
, [ "Aborting because that branch may have changes that have not yet reached"
, Git.Ref.describe origbranch
]
, [ "You can check out the adjusted branch manually to enter it,"
, "or add the --force option to overwrite the old branch."
]
]
return False
, do
starttime <- liftIO getPOSIXTime
b <- preventCommits $ const $
adjustBranch adj origbranch
ok <- checkoutAdjustedBranch b False
when ok $
recordAdjustedBranchUpdateFinished starttime
return ok
)
checkoutAdjustedBranch :: AdjBranch -> Bool -> Annex Bool
checkoutAdjustedBranch (AdjBranch b) quietcheckout = do
-- checkout can have output in large repos
unless quietcheckout
showOutput
inRepo $ Git.Command.runBool $
[ Param "checkout"
, Param $ fromRef $ Git.Ref.base b
, if quietcheckout then Param "--quiet" else Param "--progress"
]
{- Already in a branch with this adjustment, but the user asked to enter it
- again. This should have the same result as propagating any commits
- back to the original branch, checking out the original branch, deleting
- and rebuilding the adjusted branch, and then checking it out.
- But, it can be implemented more efficiently than that.
-}
updateAdjustedBranch :: Adjustment -> AdjBranch -> OrigBranch -> Annex Bool
updateAdjustedBranch adj (AdjBranch currbranch) origbranch
| not (adjustmentIsStable adj) = do
(b, origheadfile, newheadfile) <- preventCommits $ \commitlck -> do
-- Avoid losing any commits that the adjusted branch
-- has that have not yet been propagated back to the
-- origbranch.
_ <- propigateAdjustedCommits' True origbranch adj commitlck
origheadfile <- inRepo $ readFileStrict . Git.Ref.headFile
origheadsha <- inRepo (Git.Ref.sha currbranch)
b <- adjustBranch adj origbranch
-- Git normally won't do anything when asked to check
-- out the currently checked out branch, even when its
-- ref has changed. Work around this by writing a raw
-- sha to .git/HEAD.
newheadfile <- case origheadsha of
Just s -> do
inRepo $ \r -> do
let newheadfile = fromRef s
writeFile (Git.Ref.headFile r) newheadfile
return (Just newheadfile)
_ -> return Nothing
return (b, origheadfile, newheadfile)
-- Make git checkout quiet to avoid warnings about
-- disconnected branch tips being lost.
ok <- checkoutAdjustedBranch b True
-- Avoid leaving repo with detached head.
unless ok $ case newheadfile of
Nothing -> noop
Just v -> preventCommits $ \_commitlck -> inRepo $ \r -> do
v' <- readFileStrict (Git.Ref.headFile r)
when (v == v') $
writeFile (Git.Ref.headFile r) origheadfile
return ok
| otherwise = preventCommits $ \commitlck -> do
-- Done for consistency.
_ <- propigateAdjustedCommits' True origbranch adj commitlck
-- No need to actually update the branch because the
-- adjustment is stable.
return True
{- Passed an action that, if it succeeds may get or drop the Key associated
- with the file. When the adjusted branch needs to be refreshed to reflect
- those changes, it's handled here.
-
- Note that the AssociatedFile must be verified by this to point to the
- Key. In some cases, the value was provided by the user and might not
- really be an associated file.
-}
adjustedBranchRefresh :: AssociatedFile -> Annex a -> Annex a
adjustedBranchRefresh _af a = do
r <- a
go
return r
where
go = getCurrentBranch >>= \case
(Just origbranch, Just adj) ->
unless (adjustmentIsStable adj) $ do
recordAdjustedBranchUpdateNeeded
n <- annexAdjustedBranchRefresh <$> Annex.getGitConfig
unless (n == 0) $ ifM (checkcounter n)
-- This is slow, it would be better to incrementally
-- adjust the AssociatedFile, and only call this once
-- at shutdown to handle cases where not all
-- AssociatedFiles are known.
( adjustedBranchRefreshFull' adj origbranch
, Annex.addCleanupAction AdjustedBranchUpdate $
adjustedBranchRefreshFull' adj origbranch
)
_ -> return ()
checkcounter n
-- Special case, 1 (or true) refreshes only at shutdown.
| n == 1 = pure False
| otherwise = Annex.withState $ \s ->
let !c = Annex.adjustedbranchrefreshcounter s + 1
!enough = c >= pred n
!c' = if enough then 0 else c
!s' = s { Annex.adjustedbranchrefreshcounter = c' }
in pure (s', enough)
{- Slow, but more dependable version of adjustedBranchRefresh that
- does not rely on all AssociatedFiles being known. -}
adjustedBranchRefreshFull :: Adjustment -> OrigBranch -> Annex ()
adjustedBranchRefreshFull adj origbranch =
whenM isAdjustedBranchUpdateNeeded $ do
adjustedBranchRefreshFull' adj origbranch
adjustedBranchRefreshFull' :: Adjustment -> OrigBranch -> Annex ()
adjustedBranchRefreshFull' adj origbranch = do
-- Restage pointer files so modifications to them due to get/drop
-- do not prevent checking out the updated adjusted branch.
restagePointerFiles =<< Annex.gitRepo
starttime <- liftIO getPOSIXTime
let adjbranch = originalToAdjusted origbranch adj
ifM (updateAdjustedBranch adj adjbranch origbranch)
( recordAdjustedBranchUpdateFinished starttime
, warning "Updating adjusted branch failed."
)
adjustToCrippledFileSystem :: Annex ()
adjustToCrippledFileSystem = do
warning "Entering an adjusted branch where files are unlocked as this filesystem does not support locked files."
checkVersionSupported
whenM (isNothing <$> inRepo Git.Branch.current) $
commitForAdjustedBranch []
inRepo Git.Branch.current >>= \case
Just currbranch -> case getAdjustment currbranch of
Just curradj | curradj == adj -> return ()
_ -> do
let adjbranch = originalToAdjusted currbranch adj
ifM (inRepo (Git.Ref.exists $ adjBranch adjbranch))
( unlessM (checkoutAdjustedBranch adjbranch False) $
failedenter
, unlessM (enterAdjustedBranch adj) $
failedenter
)
Nothing -> failedenter
where
adj = LinkAdjustment UnlockAdjustment
failedenter = warning "Failed to enter adjusted branch!"
{- Commit before entering adjusted branch. Only needs to be done
- when the current branch does not have any commits yet.
-
- If something is already staged, it will be committed, but otherwise
- an empty commit will be made.
-}
commitForAdjustedBranch :: [CommandParam] -> Annex ()
commitForAdjustedBranch ps = do
cmode <- annexCommitMode <$> Annex.getGitConfig
let cquiet = Git.Branch.CommitQuiet True
void $ inRepo $ Git.Branch.commitCommand cmode cquiet $
[ Param "--allow-empty"
, Param "-m"
, Param "commit before entering adjusted branch"
] ++ ps
setBasisBranch :: BasisBranch -> Ref -> Annex ()
setBasisBranch (BasisBranch basis) new =
inRepo $ Git.Branch.update' basis new
setAdjustedBranch :: String -> AdjBranch -> Ref -> Annex ()
setAdjustedBranch msg (AdjBranch b) r = inRepo $ Git.Branch.update msg b r
adjustBranch :: Adjustment -> OrigBranch -> Annex AdjBranch
adjustBranch adj origbranch = do
-- Start basis off with the current value of the origbranch.
setBasisBranch basis origbranch
sha <- adjustCommit adj basis
setAdjustedBranch "entering adjusted branch" adjbranch sha
return adjbranch
where
adjbranch = originalToAdjusted origbranch adj
basis = basisBranch adjbranch
adjustCommit :: Adjustment -> BasisBranch -> Annex Sha
adjustCommit adj basis = do
treesha <- adjustTree adj basis
commitAdjustedTree treesha basis
adjustTree :: Adjustment -> BasisBranch -> Annex Sha
adjustTree adj (BasisBranch basis) = do
let toadj = adjustTreeItem adj
treesha <- Git.Tree.adjustTree
toadj
[]
(\_old new -> new)
[]
basis =<< Annex.gitRepo
return treesha
type CommitsPrevented = Git.LockFile.LockHandle
{- Locks git's index file, preventing git from making a commit, merge,
- or otherwise changing the HEAD ref while the action is run.
-
- Throws an IO exception if the index file is already locked.
-}
preventCommits :: (CommitsPrevented -> Annex a) -> Annex a
preventCommits = bracket setup cleanup
where
setup = do
lck <- fromRepo $ indexFileLock . indexFile
liftIO $ Git.LockFile.openLock (fromRawFilePath lck)
cleanup = liftIO . Git.LockFile.closeLock
{- Commits a given adjusted tree, with the provided parent ref.
-
- This should always yield the same value, even if performed in different
- clones of a repo, at different times. The commit message and other
- metadata is based on the parent.
-}
commitAdjustedTree :: Sha -> BasisBranch -> Annex Sha
commitAdjustedTree treesha parent@(BasisBranch b) =
commitAdjustedTree' treesha parent [b]
commitAdjustedTree' :: Sha -> BasisBranch -> [Ref] -> Annex Sha
commitAdjustedTree' treesha (BasisBranch basis) parents =
go =<< catCommit basis
where
go Nothing = do
cmode <- annexCommitMode <$> Annex.getGitConfig
inRepo $ mkcommit cmode
go (Just basiscommit) = do
cmode <- annexCommitMode <$> Annex.getGitConfig
inRepo $ commitWithMetaData
(commitAuthorMetaData basiscommit)
(commitCommitterMetaData basiscommit)
(mkcommit cmode)
-- Make sure that the exact message is used in the commit,
-- since that message is looked for later.
-- After git-annex 10.20240227, it's possible to use
-- commitTree instead of this, but this is being kept
-- for some time, for compatibility with older versions.
mkcommit cmode = Git.Branch.commitTreeExactMessage cmode
adjustedBranchCommitMessage parents treesha
{- This message should never be changed. -}
adjustedBranchCommitMessage :: String
adjustedBranchCommitMessage = "git-annex adjusted branch"
{- Allow for a trailing newline after the message. -}
hasAdjustedBranchCommitMessage :: Commit -> Bool
hasAdjustedBranchCommitMessage c =
dropWhileEnd (\x -> x == '\n' || x == '\r') (commitMessage c)
== adjustedBranchCommitMessage
findAdjustingCommit :: AdjBranch -> Annex (Maybe Commit)
findAdjustingCommit (AdjBranch b) = go =<< catCommit b
where
go Nothing = return Nothing
go (Just c)
| hasAdjustedBranchCommitMessage c = return (Just c)
| otherwise = case commitParent c of
[p] -> go =<< catCommit p
_ -> return Nothing
{- Check for any commits present on the adjusted branch that have not yet
- been propagated to the basis branch, and propagate them to the basis
- branch and from there on to the orig branch.
-
- After propagating the commits back to the basis branch,
- rebase the adjusted branch on top of the updated basis branch.
-}
propigateAdjustedCommits :: OrigBranch -> Adjustment -> Annex ()
propigateAdjustedCommits origbranch adj =
preventCommits $ \commitsprevented ->
join $ snd <$> propigateAdjustedCommits' True origbranch adj commitsprevented
{- Returns sha of updated basis branch, and action which will rebase
- the adjusted branch on top of the updated basis branch. -}
propigateAdjustedCommits'
:: Bool
-> OrigBranch
-> Adjustment
-> CommitsPrevented
-> Annex (Maybe Sha, Annex ())
propigateAdjustedCommits' warnwhendiverged origbranch adj _commitsprevented =
inRepo (Git.Ref.sha basis) >>= \case
Just origsha -> catCommit currbranch >>= \case
Just currcommit ->
newcommits >>= go origsha origsha False >>= \case
Left e -> do
warning (UnquotedString e)
return (Nothing, return ())
Right newparent -> return
( Just newparent
, rebase currcommit newparent
)
Nothing -> return (Nothing, return ())
Nothing -> do
warning $ UnquotedString $
"Cannot find basis ref " ++ fromRef basis ++ "; not propagating adjusted commits to original branch " ++ fromRef origbranch
return (Nothing, return ())
where
(BasisBranch basis) = basisBranch adjbranch
adjbranch@(AdjBranch currbranch) = originalToAdjusted origbranch adj
newcommits = inRepo $ Git.Branch.changedCommits basis currbranch
-- Get commits oldest first, so they can be processed
-- in order made.
[Param "--reverse"]
go origsha parent _ [] = do
setBasisBranch (BasisBranch basis) parent
inRepo (Git.Ref.sha origbranch) >>= \case
Just origbranchsha | origbranchsha /= origsha ->
when warnwhendiverged $
warning $ UnquotedString $
"Original branch " ++ fromRef origbranch ++ " has diverged from current adjusted branch " ++ fromRef currbranch
_ -> inRepo $ Git.Branch.update' origbranch parent
return (Right parent)
go origsha parent pastadjcommit (sha:l) = catCommit sha >>= \case
Just c
| hasAdjustedBranchCommitMessage c ->
go origsha parent True l
| pastadjcommit ->
reverseAdjustedCommit parent adj (sha, c) origbranch
>>= \case
Left e -> return (Left e)
Right commit -> go origsha commit pastadjcommit l
_ -> go origsha parent pastadjcommit l
rebase currcommit newparent = do
-- Reuse the current adjusted tree, and reparent it
-- on top of the newparent.
commitAdjustedTree (commitTree currcommit) (BasisBranch newparent)
>>= inRepo . Git.Branch.update rebaseOnTopMsg currbranch
rebaseOnTopMsg :: String
rebaseOnTopMsg = "rebasing adjusted branch on top of updated original branch"
{- Reverses an adjusted commit, and commit with provided commitparent,
- yielding a commit sha.
-
- Adjusts the tree of the commitparent, changing only the files that the
- commit changed, and reverse adjusting those changes.
-
- The commit message, and the author and committer metadata are
- copied over from the basiscommit. However, any gpg signature
- will be lost, and any other headers are not copied either. -}
reverseAdjustedCommit :: Sha -> Adjustment -> (Sha, Commit) -> OrigBranch -> Annex (Either String Sha)
reverseAdjustedCommit commitparent adj (csha, basiscommit) origbranch
| length (commitParent basiscommit) > 1 = return $
Left $ "unable to propagate merge commit " ++ show csha ++ " back to " ++ show origbranch
| otherwise = do
cmode <- annexCommitMode <$> Annex.getGitConfig
treesha <- reverseAdjustedTree commitparent adj csha
revadjcommit <- inRepo $ commitWithMetaData
(commitAuthorMetaData basiscommit)
(commitCommitterMetaData basiscommit) $
Git.Branch.commitTree cmode
[commitMessage basiscommit]
[commitparent] treesha
return (Right revadjcommit)
{- Adjusts the tree of the basis, changing only the files that the
- commit changed, and reverse adjusting those changes.
-
- commitDiff does not support merge commits, so the csha must not be a
- merge commit. -}
reverseAdjustedTree :: Sha -> Adjustment -> Sha -> Annex Sha
reverseAdjustedTree basis adj csha = do
(diff, cleanup) <- inRepo (Git.DiffTree.commitDiff csha)
let (adds, others) = partition (\dti -> Git.DiffTree.srcsha dti `elem` nullShas) diff
let (removes, changes) = partition (\dti -> Git.DiffTree.dstsha dti `elem` nullShas) others
adds' <- catMaybes <$>
mapM (adjustTreeItem reverseadj) (map diffTreeToTreeItem adds)
treesha <- Git.Tree.adjustTree
(propchanges changes)
adds'
(\_old new -> new)
(map Git.DiffTree.file removes)
basis
=<< Annex.gitRepo
void $ liftIO cleanup
return treesha
where
reverseadj = reverseAdjustment adj
propchanges changes ti@(TreeItem f _ _) =
case M.lookup (norm f) m of
Nothing -> return (Just ti) -- not changed
Just change -> adjustTreeItem reverseadj change
where
m = M.fromList $ map (\i@(TreeItem f' _ _) -> (norm f', i)) $
map diffTreeToTreeItem changes
norm = normalise . fromRawFilePath . getTopFilePath
diffTreeToTreeItem :: Git.DiffTree.DiffTreeItem -> TreeItem
diffTreeToTreeItem dti = TreeItem
(Git.DiffTree.file dti)
(Git.DiffTree.dstmode dti)
(Git.DiffTree.dstsha dti)
data AdjustedClone = InAdjustedClone | NotInAdjustedClone
{- Cloning a repository that has an adjusted branch checked out will
- result in the clone having the same adjusted branch checked out -- but
- the origbranch won't exist in the clone, nor will the basis. So
- to properly set up the adjusted branch, the origbranch and basis need
- to be set.
-
- We can't trust that the origin's origbranch matches up with the currently
- checked out adjusted branch; the origin could have the two branches
- out of sync (eg, due to another branch having been pushed to the origin's
- origbranch), or due to a commit on its adjusted branch not having been
- propagated back to origbranch.
-
- So, find the adjusting commit on the currently checked out adjusted
- branch, and use the parent of that commit as the basis, and set the
- origbranch to it.
-}
checkAdjustedClone :: Annex AdjustedClone
checkAdjustedClone = ifM isBareRepo
( return NotInAdjustedClone
, go =<< inRepo Git.Branch.current
)
where
go Nothing = return NotInAdjustedClone
go (Just currbranch) = case adjustedToOriginal currbranch of
Nothing -> return NotInAdjustedClone
Just (adj, origbranch) -> do
let basis@(BasisBranch bb) = basisBranch (originalToAdjusted origbranch adj)
unlessM (inRepo $ Git.Ref.exists bb) $ do
aps <- fmap commitParent <$> findAdjustingCommit (AdjBranch currbranch)
case aps of
Just [p] -> do
unlessM (inRepo $ Git.Ref.exists origbranch) $
inRepo $ Git.Branch.update' origbranch p
setBasisBranch basis p
_ -> giveup $ "Unable to clean up from clone of adjusted branch; perhaps you should check out " ++ Git.Ref.describe origbranch
return InAdjustedClone
checkVersionSupported :: Annex ()
checkVersionSupported =
unlessM (liftIO isGitVersionSupported) $
giveup "Your version of git is too old; upgrade it to 2.2.0 or newer to use adjusted branches."
-- git 2.2.0 needed for GIT_COMMON_DIR which is needed
-- by updateAdjustedBranch to use withWorkTreeRelated.
isGitVersionSupported :: IO Bool
isGitVersionSupported = not <$> Git.Version.older "2.2.0"

View file

@ -0,0 +1,167 @@
{- adjusted branch merging
-
- Copyright 2016-2023 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns, OverloadedStrings #-}
module Annex.AdjustedBranch.Merge (
canMergeToAdjustedBranch,
mergeToAdjustedBranch,
) where
import Annex.Common
import Annex.AdjustedBranch
import qualified Annex
import Git
import Git.Types
import qualified Git.Branch
import qualified Git.Ref
import qualified Git.Command
import qualified Git.Merge
import Git.Sha
import Annex.CatFile
import Annex.AutoMerge
import Annex.Tmp
import Annex.GitOverlay
import Utility.Tmp.Dir
import Utility.CopyFile
import Utility.Directory.Create
import qualified Data.ByteString as S
import qualified System.FilePath.ByteString as P
canMergeToAdjustedBranch :: Branch -> (OrigBranch, Adjustment) -> Annex Bool
canMergeToAdjustedBranch tomerge (origbranch, adj) =
inRepo $ Git.Branch.changed currbranch tomerge
where
AdjBranch currbranch = originalToAdjusted origbranch adj
{- Update the currently checked out adjusted branch, merging the provided
- branch into it. Note that the provided branch should be a non-adjusted
- branch. -}
mergeToAdjustedBranch :: Branch -> (OrigBranch, Adjustment) -> [Git.Merge.MergeConfig] -> Bool -> Git.Branch.CommitMode -> Annex Bool
mergeToAdjustedBranch tomerge (origbranch, adj) mergeconfig canresolvemerge commitmode = catchBoolIO $
join $ preventCommits go
where
adjbranch@(AdjBranch currbranch) = originalToAdjusted origbranch adj
basis = basisBranch adjbranch
go commitsprevented = do
(updatedorig, _) <- propigateAdjustedCommits'
False origbranch adj commitsprevented
changestomerge updatedorig
{- Since the adjusted branch changes files, merging tomerge
- directly into it would likely result in unnecessary merge
- conflicts. To avoid those conflicts, instead merge tomerge into
- updatedorig. The result of the merge can the be
- adjusted to yield the final adjusted branch.
-
- In order to do a merge into a ref that is not checked out,
- set the work tree to a temp directory, and set GIT_DIR
- to another temp directory, in which HEAD contains the
- updatedorig sha. GIT_COMMON_DIR is set to point to the real
- git directory, and so git can read and write objects from there,
- but will use GIT_DIR for HEAD and index.
-
- (Doing the merge this way also lets it run even though the main
- index file is currently locked.)
-}
changestomerge (Just updatedorig) = withOtherTmp $ \othertmpdir -> do
git_dir <- fromRepo Git.localGitDir
let git_dir' = fromRawFilePath git_dir
tmpwt <- fromRepo gitAnnexMergeDir
withTmpDirIn (fromRawFilePath othertmpdir) "git" $ \tmpgit -> withWorkTreeRelated tmpgit $
withemptydir git_dir tmpwt $ withWorkTree tmpwt $ do
liftIO $ writeFile (tmpgit </> "HEAD") (fromRef updatedorig)
-- Copy in refs and packed-refs, to work
-- around bug in git 2.13.0, which
-- causes it not to look in GIT_DIR for refs.
refs <- liftIO $ emptyWhenDoesNotExist $
dirContentsRecursive $
git_dir' </> "refs"
let refs' = (git_dir' </> "packed-refs") : refs
liftIO $ forM_ refs' $ \src -> do
let src' = toRawFilePath src
whenM (doesFileExist src) $ do
dest <- relPathDirToFile git_dir src'
let dest' = toRawFilePath tmpgit P.</> dest
createDirectoryUnder [git_dir]
(P.takeDirectory dest')
void $ createLinkOrCopy src' dest'
-- This reset makes git merge not care
-- that the work tree is empty; otherwise
-- it will think that all the files have
-- been staged for deletion, and sometimes
-- the merge includes these deletions
-- (for an unknown reason).
-- http://thread.gmane.org/gmane.comp.version-control.git/297237
inRepo $ Git.Command.run [Param "reset", Param "HEAD", Param "--quiet"]
when (tomerge /= origbranch) $
showAction $ UnquotedString $ "Merging into " ++ fromRef (Git.Ref.base origbranch)
merged <- autoMergeFrom' tomerge Nothing mergeconfig commitmode True
(const $ resolveMerge (Just updatedorig) tomerge True)
if merged
then do
!mergecommit <- liftIO $ extractSha
<$> S.readFile (tmpgit </> "HEAD")
-- This is run after the commit lock is dropped.
return $ postmerge mergecommit
else return $ return False
changestomerge Nothing = return $ return False
withemptydir git_dir d a = bracketIO setup cleanup (const a)
where
setup = do
whenM (doesDirectoryExist d) $
removeDirectoryRecursive d
createDirectoryUnder [git_dir] (toRawFilePath d)
cleanup _ = removeDirectoryRecursive d
{- A merge commit has been made between the basisbranch and
- tomerge. Update the basisbranch and origbranch to point
- to that commit, adjust it to get the new adjusted branch,
- and check it out.
-
- But, there may be unstaged work tree changes that conflict,
- so the check out is done by making a normal merge of
- the new adjusted branch.
-}
postmerge (Just mergecommit) = do
setBasisBranch basis mergecommit
inRepo $ Git.Branch.update' origbranch mergecommit
adjtree <- adjustTree adj (BasisBranch mergecommit)
adjmergecommit <- commitAdjustedTree adjtree (BasisBranch mergecommit)
-- Make currbranch be the parent, so that merging
-- this commit will be a fast-forward.
adjmergecommitff <- commitAdjustedTree' adjtree (BasisBranch mergecommit) [currbranch]
showAction "Merging into adjusted branch"
ifM (autoMergeFrom adjmergecommitff (Just currbranch) mergeconfig commitmode canresolvemerge)
( reparent adjtree adjmergecommit =<< getcurrentcommit
, return False
)
postmerge Nothing = return False
-- Now that the merge into the adjusted branch is complete,
-- take the tree from that merge, and attach it on top of the
-- adjmergecommit, if it's different.
reparent adjtree adjmergecommit (Just currentcommit) = do
if (commitTree currentcommit /= adjtree)
then do
cmode <- annexCommitMode <$> Annex.getGitConfig
c <- inRepo $ Git.Branch.commitTree cmode
["Merged " ++ fromRef tomerge]
[adjmergecommit]
(commitTree currentcommit)
inRepo $ Git.Branch.update "updating adjusted branch" currbranch c
propigateAdjustedCommits origbranch adj
else inRepo $ Git.Branch.update "updating adjusted branch" currbranch adjmergecommit
return True
reparent _ _ Nothing = return False
getcurrentcommit = inRepo Git.Branch.currentUnsafe >>= \case
Nothing -> return Nothing
Just c -> catCommit c

View file

@ -0,0 +1,99 @@
{- adjusted branch names
-
- Copyright 2016-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.AdjustedBranch.Name (
originalToAdjusted,
adjustedToOriginal,
AdjBranch(..),
OrigBranch,
) where
import Types.AdjustedBranch
import Git
import qualified Git.Ref
import Utility.Misc
import Control.Applicative
import Data.Char
import qualified Data.ByteString as S
adjustedBranchPrefix :: S.ByteString
adjustedBranchPrefix = "refs/heads/adjusted/"
class SerializeAdjustment t where
serializeAdjustment :: t -> S.ByteString
deserializeAdjustment :: S.ByteString -> Maybe t
instance SerializeAdjustment Adjustment where
serializeAdjustment (LinkAdjustment l) =
serializeAdjustment l
serializeAdjustment (PresenceAdjustment p Nothing) =
serializeAdjustment p
serializeAdjustment (PresenceAdjustment p (Just l)) =
serializeAdjustment p <> "-" <> serializeAdjustment l
serializeAdjustment (LinkPresentAdjustment l) =
serializeAdjustment l
deserializeAdjustment s =
(LinkAdjustment <$> deserializeAdjustment s)
<|>
(PresenceAdjustment <$> deserializeAdjustment s1 <*> pure (deserializeAdjustment s2))
<|>
(PresenceAdjustment <$> deserializeAdjustment s <*> pure Nothing)
<|>
(LinkPresentAdjustment <$> deserializeAdjustment s)
where
(s1, s2) = separate' (== (fromIntegral (ord '-'))) s
instance SerializeAdjustment LinkAdjustment where
serializeAdjustment UnlockAdjustment = "unlocked"
serializeAdjustment LockAdjustment = "locked"
serializeAdjustment FixAdjustment = "fixed"
serializeAdjustment UnFixAdjustment = "unfixed"
deserializeAdjustment "unlocked" = Just UnlockAdjustment
deserializeAdjustment "locked" = Just LockAdjustment
deserializeAdjustment "fixed" = Just FixAdjustment
deserializeAdjustment "unfixed" = Just UnFixAdjustment
deserializeAdjustment _ = Nothing
instance SerializeAdjustment PresenceAdjustment where
serializeAdjustment HideMissingAdjustment = "hidemissing"
serializeAdjustment ShowMissingAdjustment = "showmissing"
deserializeAdjustment "hidemissing" = Just HideMissingAdjustment
deserializeAdjustment "showmissing" = Just ShowMissingAdjustment
deserializeAdjustment _ = Nothing
instance SerializeAdjustment LinkPresentAdjustment where
serializeAdjustment UnlockPresentAdjustment = "unlockpresent"
serializeAdjustment LockPresentAdjustment = "lockpresent"
deserializeAdjustment "unlockpresent" = Just UnlockPresentAdjustment
deserializeAdjustment "lockpresent" = Just LockPresentAdjustment
deserializeAdjustment _ = Nothing
newtype AdjBranch = AdjBranch { adjBranch :: Branch }
originalToAdjusted :: OrigBranch -> Adjustment -> AdjBranch
originalToAdjusted orig adj = AdjBranch $ Ref $
adjustedBranchPrefix <> base <> "(" <> serializeAdjustment adj <> ")"
where
base = fromRef' (Git.Ref.base orig)
type OrigBranch = Branch
adjustedToOriginal :: Branch -> Maybe (Adjustment, OrigBranch)
adjustedToOriginal b
| adjustedBranchPrefix `S.isPrefixOf` bs = do
let (base, as) = separateEnd' (== openparen) (S.drop prefixlen bs)
adj <- deserializeAdjustment (S.takeWhile (/= closeparen) as)
Just (adj, Git.Ref.branchRef (Ref base))
| otherwise = Nothing
where
bs = fromRef' b
prefixlen = S.length adjustedBranchPrefix
openparen = fromIntegral (ord '(')
closeparen = fromIntegral (ord ')')

391
Annex/AutoMerge.hs Normal file
View file

@ -0,0 +1,391 @@
{- git-annex automatic merge conflict resolution
-
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.AutoMerge
( autoMergeFrom
, autoMergeFrom'
, resolveMerge
, commitResolvedMerge
) where
import Annex.Common
import qualified Annex
import qualified Annex.Queue
import Annex.CatFile
import Annex.Link
import Annex.Content
import qualified Git.LsFiles as LsFiles
import qualified Git.UpdateIndex as UpdateIndex
import qualified Git.Merge
import qualified Git.Ref
import qualified Git
import qualified Git.Branch
import Git.Types (TreeItemType(..), fromTreeItemType)
import Git.FilePath
import Annex.ReplaceFile
import Annex.VariantFile
import qualified Database.Keys
import Annex.InodeSentinal
import Utility.InodeCache
import Utility.FileMode
import qualified Utility.RawFilePath as R
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
import System.PosixCompat.Files (isSymbolicLink)
{- Merges from a branch into the current branch (which may not exist yet),
- with automatic merge conflict resolution.
-
- Callers should use Git.Branch.changed first, to make sure that
- there are changes from the current branch to the branch being merged in.
-}
autoMergeFrom :: Git.Ref -> Maybe Git.Ref -> [Git.Merge.MergeConfig] -> Git.Branch.CommitMode -> Bool -> Annex Bool
autoMergeFrom branch currbranch mergeconfig commitmode canresolvemerge =
autoMergeFrom' branch currbranch mergeconfig commitmode canresolvemerge resolvemerge
where
resolvemerge old
| canresolvemerge = resolveMerge old branch False
| otherwise = return False
autoMergeFrom' :: Git.Ref -> Maybe Git.Ref -> [Git.Merge.MergeConfig] -> Git.Branch.CommitMode -> Bool -> (Maybe Git.Ref -> Annex Bool) -> Annex Bool
autoMergeFrom' branch currbranch mergeconfig commitmode willresolvemerge toresolvemerge = do
showOutput
case currbranch of
Nothing -> go Nothing
Just b -> go =<< inRepo (Git.Ref.sha b)
where
go old = do
-- merge.directoryRenames=conflict plus automatic
-- merge conflict resolution results in files in a
-- "renamed" directory getting variant names,
-- so is not a great combination. If the user has
-- explicitly set it, use it, but otherwise when
-- merge conflicts will be resolved, override
-- to merge.directoryRenames=false.
overridedirectoryrenames <- if willresolvemerge
then isNothing . mergeDirectoryRenames
<$> Annex.getGitConfig
else pure False
let f r
| overridedirectoryrenames = r
{ Git.gitGlobalOpts =
Param "-c"
: Param "merge.directoryRenames=false"
: Git.gitGlobalOpts r
}
| otherwise = r
r <- inRepo (Git.Merge.merge branch mergeconfig commitmode . f)
<||> (toresolvemerge old <&&> commitResolvedMerge commitmode)
-- Merging can cause new associated files to appear
-- and the smudge filter will add them to the database.
-- To ensure that this process sees those changes,
-- close the database if it was open.
Database.Keys.closeDb
return r
{- Resolves a conflicted merge. It's important that any conflicts be
- resolved in a way that itself avoids later merge conflicts, since
- multiple repositories may be doing this concurrently.
-
- Only merge conflicts where at least one side is an annexed file
- is resolved.
-
- This uses the Keys pointed to by the files to construct new
- filenames. So when both sides modified annexed file foo,
- it will be deleted, and replaced with files foo.variant-A and
- foo.variant-B.
-
- On the other hand, when one side deleted foo, and the other modified it,
- it will be deleted, and the modified version stored as file
- foo.variant-A (or B).
-
- It's also possible that one side has foo as an annexed file, and
- the other as a directory or non-annexed file. The annexed file
- is renamed to resolve the merge, and the other object is preserved as-is.
-
- The merge is resolved in the work tree and files
- staged, to clean up from a conflicted merge that was run in the work
- tree.
-
- This is complicated by needing to support merges run in an overlay
- work tree, in which case the CWD won't be within the work tree.
- In this mode, there is no need to update the work tree at all,
- as the overlay work tree will get deleted.
-
- Unlocked files remain unlocked after merging, and locked files
- remain locked. When the merge conflict is between a locked and unlocked
- file, that otherwise point to the same content, the unlocked mode wins.
- This is done because only unlocked files work in filesystems that don't
- support symlinks.
-
- Returns false when there are no merge conflicts to resolve.
- A git merge can fail for other reasons, and this allows detecting
- such failures.
-}
resolveMerge :: Maybe Git.Ref -> Git.Ref -> Bool -> Annex Bool
resolveMerge us them inoverlay = do
top <- if inoverlay
then pure "."
else fromRepo Git.repoPath
(fs, cleanup) <- inRepo (LsFiles.unmerged [top])
srcmap <- if inoverlay
then pure M.empty
else inodeMap $ pure (concatMap getunmergedfiles fs, return True)
(mergedks, mergedfs) <- unzip <$> mapM (resolveMerge' srcmap us them inoverlay) fs
let mergedks' = concat mergedks
let mergedfs' = catMaybes mergedfs
let merged = not (null mergedfs')
void $ liftIO cleanup
unless inoverlay $ do
(deleted, cleanup2) <- inRepo (LsFiles.deleted [] [top])
unless (null deleted) $
Annex.Queue.addCommand [] "rm"
[Param "--quiet", Param "-f", Param "--"]
(map fromRawFilePath deleted)
void $ liftIO cleanup2
when merged $ do
Annex.Queue.flush
unless inoverlay $ do
unstagedmap <- inodeMap $ inRepo $
LsFiles.notInRepo [] False [top]
cleanConflictCruft mergedks' mergedfs' unstagedmap
showLongNote "Merge conflict was automatically resolved; you may want to examine the result."
return merged
where
getunmergedfiles u = catMaybes
[ Just (LsFiles.unmergedFile u)
, LsFiles.unmergedSiblingFile u
]
resolveMerge' :: InodeMap -> Maybe Git.Ref -> Git.Ref -> Bool -> LsFiles.Unmerged -> Annex ([Key], Maybe FilePath)
resolveMerge' _ Nothing _ _ _ = return ([], Nothing)
resolveMerge' unstagedmap (Just us) them inoverlay u = do
kus <- getkey LsFiles.valUs
kthem <- getkey LsFiles.valThem
case (kus, kthem) of
-- Both sides of conflict are annexed files
(Just keyUs, Just keyThem)
| keyUs /= keyThem -> resolveby [keyUs, keyThem] $ do
makevariantannexlink keyUs LsFiles.valUs
makevariantannexlink keyThem LsFiles.valThem
-- cleanConflictCruft can't handle unlocked
-- files, so delete here.
unless inoverlay $
unless (islocked LsFiles.valUs) $
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath file)
| otherwise -> resolveby [keyUs, keyThem] $
-- Only resolve using symlink when both
-- were locked, otherwise use unlocked
-- pointer.
-- In either case, keep original filename.
if islocked LsFiles.valUs && islocked LsFiles.valThem
then makesymlink keyUs file
else makepointer keyUs file (combinedmodes)
-- Our side is annexed file, other side is not.
-- Make the annexed file into a variant file and graft in the
-- other file/directory as it was.
(Just keyUs, Nothing) -> resolveby [keyUs] $ do
graftin them file LsFiles.valThem LsFiles.valThem LsFiles.valUs
makevariantannexlink keyUs LsFiles.valUs
-- Our side is not annexed file, other side is.
(Nothing, Just keyThem) -> resolveby [keyThem] $ do
graftin us file LsFiles.valUs LsFiles.valUs LsFiles.valThem
makevariantannexlink keyThem LsFiles.valThem
-- Neither side is annexed file; cannot resolve.
(Nothing, Nothing) -> return ([], Nothing)
where
file = fromRawFilePath $ LsFiles.unmergedFile u
sibfile = fromRawFilePath <$> LsFiles.unmergedSiblingFile u
getkey select =
case select (LsFiles.unmergedSha u) of
Just sha -> catKey sha
Nothing -> pure Nothing
islocked select = select (LsFiles.unmergedTreeItemType u) == Just TreeSymlink
combinedmodes = case catMaybes [ourmode, theirmode] of
[] -> Nothing
l -> Just (combineModes l)
where
ourmode = fromTreeItemType
<$> LsFiles.valUs (LsFiles.unmergedTreeItemType u)
theirmode = fromTreeItemType
<$> LsFiles.valThem (LsFiles.unmergedTreeItemType u)
makevariantannexlink key select
| islocked select = makesymlink key dest
| otherwise = makepointer key dest destmode
where
dest = variantFile file key
destmode = fromTreeItemType <$> select (LsFiles.unmergedTreeItemType u)
stagefile :: FilePath -> Annex FilePath
stagefile f
| inoverlay = (</> f) . fromRawFilePath <$> fromRepo Git.repoPath
| otherwise = pure f
makesymlink key dest = do
l <- calcRepo $ gitAnnexLink (toRawFilePath dest) key
unless inoverlay $ replacewithsymlink dest l
dest' <- toRawFilePath <$> stagefile dest
stageSymlink dest' =<< hashSymlink l
replacewithsymlink dest link = replaceWorkTreeFile dest $
makeGitLink link
makepointer key dest destmode = do
unless inoverlay $
unlessM (reuseOldFile unstagedmap key file dest) $
linkFromAnnex key (toRawFilePath dest) destmode >>= \case
LinkAnnexFailed -> liftIO $
writePointerFile (toRawFilePath dest) key destmode
_ -> noop
dest' <- toRawFilePath <$> stagefile dest
stagePointerFile dest' destmode =<< hashPointerFile key
unless inoverlay $
Database.Keys.addAssociatedFile key
=<< inRepo (toTopFilePath (toRawFilePath dest))
{- Stage a graft of a directory or file from a branch
- and update the work tree. -}
graftin b item selectwant selectwant' selectunwant = do
Annex.Queue.addUpdateIndex
=<< fromRepo (UpdateIndex.lsSubTree b item)
let replacefile isexecutable = case selectwant' (LsFiles.unmergedSha u) of
Nothing -> noop
Just sha -> replaceWorkTreeFile item $ \tmp -> do
c <- catObject sha
liftIO $ L.writeFile (decodeBS tmp) c
when isexecutable $
liftIO $ void $ tryIO $
modifyFileMode tmp $
addModes executeModes
-- Update the work tree to reflect the graft.
unless inoverlay $ case (selectwant (LsFiles.unmergedTreeItemType u), selectunwant (LsFiles.unmergedTreeItemType u)) of
(Just TreeSymlink, _) -> do
case selectwant' (LsFiles.unmergedSha u) of
Nothing -> noop
Just sha -> do
link <- catSymLinkTarget sha
replacewithsymlink item link
(Just TreeFile, Just TreeSymlink) -> replacefile False
(Just TreeExecutable, Just TreeSymlink) -> replacefile True
_ -> ifM (liftIO $ doesDirectoryExist item)
-- a conflict between a file and a directory
-- leaves the directory, so since a directory
-- is there, it must be what was wanted
( noop
-- probably a file with conflict markers is
-- in the work tree; replace with grafted
-- file content (this is needed when
-- the annexed file is unlocked)
, replacefile False
)
resolveby ks a = do
{- Remove conflicted file from index so merge can be resolved.
- If there's a sibling conflicted file, remove it too. -}
Annex.Queue.addCommand [] "rm"
[ Param "--quiet"
, Param "-f"
, Param "--cached"
, Param "--"
]
(catMaybes [Just file, sibfile])
liftIO $ maybe noop
(removeWhenExistsWith R.removeLink . toRawFilePath)
sibfile
void a
return (ks, Just file)
{- git-merge moves conflicting files away to files
- named something like f~HEAD or f~branch or just f, but the
- exact name chosen can vary. Once the conflict is resolved,
- this cruft can be deleted. To avoid deleting legitimate
- files that look like this, only delete files that are
- A) not staged in git and
- B) have a name related to the merged files and
- C) are pointers to or have the content of keys that were involved
- in the merge.
-}
cleanConflictCruft :: [Key] -> [FilePath] -> InodeMap -> Annex ()
cleanConflictCruft resolvedks resolvedfs unstagedmap = do
is <- S.fromList . map (inodeCacheToKey Strongly) . concat
<$> mapM Database.Keys.getInodeCaches resolvedks
forM_ (M.toList unstagedmap) $ \(i, f) ->
whenM (matchesresolved is i f) $
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f)
where
fs = S.fromList resolvedfs
ks = S.fromList resolvedks
inks = maybe False (flip S.member ks)
matchesresolved is i f
| S.member f fs || S.member (conflictCruftBase f) fs = anyM id
[ pure $ either (const False) (`S.member` is) i
, inks <$> isAnnexLink (toRawFilePath f)
, inks <$> liftIO (isPointerFile (toRawFilePath f))
]
| otherwise = return False
conflictCruftBase :: FilePath -> FilePath
conflictCruftBase f = reverse $ drop 1 $ dropWhile (/= '~') $ reverse f
{- When possible, reuse an existing file from the srcmap as the
- content of a worktree file in the resolved merge. It must have the
- same name as the origfile, or a name that git would use for conflict
- cruft. And, its inode cache must be a known one for the key. -}
reuseOldFile :: InodeMap -> Key -> FilePath -> FilePath -> Annex Bool
reuseOldFile srcmap key origfile destfile = do
is <- map (inodeCacheToKey Strongly)
<$> Database.Keys.getInodeCaches key
liftIO $ go $ mapMaybe (\i -> M.lookup (Right i) srcmap) is
where
go [] = return False
go (f:fs)
| f == origfile || conflictCruftBase f == origfile =
ifM (doesFileExist f)
( do
renameFile f destfile
return True
, go fs
)
| otherwise = go fs
commitResolvedMerge :: Git.Branch.CommitMode -> Annex Bool
commitResolvedMerge commitmode = do
commitquiet <- Git.Branch.CommitQuiet <$> commandProgressDisabled
inRepo $ Git.Branch.commitCommand commitmode commitquiet
[ Param "--no-verify"
, Param "-m"
, Param "git-annex automatic merge conflict fix"
]
type InodeMap = M.Map (Either FilePath InodeCacheKey) FilePath
inodeMap :: Annex ([RawFilePath], IO Bool) -> Annex InodeMap
inodeMap getfiles = do
(fs, cleanup) <- getfiles
fsis <- forM fs $ \f -> do
s <- liftIO $ R.getSymbolicLinkStatus f
let f' = fromRawFilePath f
if isSymbolicLink s
then pure $ Just (Left f', f')
else withTSDelta (\d -> liftIO $ toInodeCache d f s)
>>= return . \case
Just i -> Just (Right (inodeCacheToKey Strongly i), f')
Nothing -> Nothing
void $ liftIO cleanup
return $ M.fromList $ catMaybes fsis

42
Annex/Balanced.hs Normal file
View file

@ -0,0 +1,42 @@
{- Balancing between UUIDs
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Balanced where
import Key
import Types.UUID
import Utility.Hash
import Data.List
import Data.Maybe
import Data.Bits (shiftL)
import qualified Data.Set as S
import qualified Data.ByteArray as BA
type BalancedPicker = S.Set UUID -> Key -> UUID
-- The set of UUIDs provided here are all the UUIDs that are ever
-- expected to be picked amoung. A subset of that can be provided
-- when later using the BalancedPicker.
balancedPicker :: S.Set UUID -> BalancedPicker
balancedPicker s = \s' key ->
let n = calcMac tointeger HmacSha256 combineduuids (serializeKey' key)
in S.elemAt (fromIntegral (n `mod` m)) s'
where
combineduuids = mconcat (map fromUUID (S.toAscList s))
m = fromIntegral (S.size s)
tointeger :: Digest a -> Integer
tointeger = foldl' (\i b -> (i `shiftL` 8) + fromIntegral b) 0
. BA.unpack
{- The selection for a given key never changes. -}
prop_balanced_stable :: Bool
prop_balanced_stable = balancedPicker us us k == toUUID "332"
where
us = S.fromList $ map (toUUID . show) [1..500 :: Int]
k = fromJust $ deserializeKey "WORM--test"

54
Annex/BloomFilter.hs Normal file
View file

@ -0,0 +1,54 @@
{- git-annex bloom filter
-
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.BloomFilter where
import Annex.Common
import qualified Annex
import Utility.Bloom
import Control.Monad.ST
{- A bloom filter capable of holding half a million keys with a
- false positive rate of 1 in 10000000 uses around 16 mb of memory,
- so will easily fit on even my lowest memory systems.
-}
bloomCapacity :: Annex Int
bloomCapacity = fromMaybe 500000 . annexBloomCapacity <$> Annex.getGitConfig
bloomAccuracy :: Annex Int
bloomAccuracy = fromMaybe 10000000 . annexBloomAccuracy <$> Annex.getGitConfig
bloomBitsHashes :: Annex (Int, Int)
bloomBitsHashes = do
capacity <- bloomCapacity
accuracy <- bloomAccuracy
case safeSuggestSizing capacity (1 / fromIntegral accuracy) of
Left e -> do
warning $ UnquotedString $
"bloomfilter " ++ e ++ "; falling back to sane value"
-- precaulculated value for 500000 (1/10000000)
return (16777216,23)
Right v -> return v
{- Creates a bloom filter, and runs an action to populate it.
-
- The action is passed a callback that it can use to feed values into the
- bloom filter.
-
- Once the action completes, the mutable filter is frozen
- for later use.
-}
genBloomFilter :: Hashable v => ((v -> Annex ()) -> Annex ()) -> Annex (Bloom v)
genBloomFilter populate = do
(numbits, numhashes) <- bloomBitsHashes
bloom <- lift $ newMB (cheapHashes numhashes) numbits
populate $ \v -> lift $ insertMB bloom v
lift $ unsafeFreezeMB bloom
where
lift = liftIO . stToIO
bloomFilter :: [v] -> Bloom v -> [v]
bloomFilter l bloom = filter (\v -> v `notElemB` bloom) l

1084
Annex/Branch.hs Normal file

File diff suppressed because it is too large Load diff

108
Annex/Branch/Transitions.hs Normal file
View file

@ -0,0 +1,108 @@
{- git-annex branch transitions
-
- Copyright 2013-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Branch.Transitions (
getTransitionCalculator,
filterBranch,
) where
import Common
import Logs
import Logs.Transitions
import qualified Logs.UUIDBased as UUIDBased
import qualified Logs.Presence.Pure as Presence
import qualified Logs.Chunk.Pure as Chunk
import qualified Logs.MetaData.Pure as MetaData
import qualified Logs.Remote.Pure as Remote
import Logs.MapLog
import Types.TrustLevel
import Types.UUID
import Types.MetaData
import Types.Remote
import Types.Transitions
import Types.GitConfig (GitConfig)
import Types.ProposedAccepted
import Annex.SpecialRemote.Config
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
getTransitionCalculator :: Transition -> Maybe (TrustMap -> M.Map UUID RemoteConfig -> GitConfig -> TransitionCalculator)
getTransitionCalculator ForgetGitHistory = Nothing
getTransitionCalculator ForgetDeadRemotes = Just dropDead
-- Removes data about all dead repos.
--
-- The trust log is not changed, because other, unmerged clones
-- may contain other data about the dead repos. So we need to remember
-- which are dead to later remove that.
--
-- When the remote log contains a sameas-uuid pointing to a dead uuid,
-- the uuid of that remote configuration is also effectively dead,
-- though not in the trust log. There may be per-remote state stored using
-- the latter uuid, that also needs to be removed. The sameas-uuid
-- is not removed from the remote log, for the same reason the trust log
-- is not changed.
dropDead :: TrustMap -> M.Map UUID RemoteConfig -> GitConfig -> TransitionCalculator
dropDead trustmap remoteconfigmap gc f content
| f == trustLog = PreserveFile
| f == remoteLog = ChangeFile $
Remote.buildRemoteConfigLog $
mapLogWithKey minimizesameasdead $
filterMapLog (notdead trustmap) id $
Remote.parseRemoteConfigLog content
| otherwise = filterBranch (notdead trustmap') gc f content
where
notdead m u = M.findWithDefault def u m /= DeadTrusted
trustmap' = trustmap `M.union`
M.map (const DeadTrusted) (M.filter sameasdead remoteconfigmap)
sameasdead cm =
case toUUID . fromProposedAccepted <$> M.lookup sameasUUIDField cm of
Nothing -> False
Just u' -> M.lookup u' trustmap == Just DeadTrusted
minimizesameasdead u l
| M.lookup u trustmap' == Just DeadTrusted =
l { UUIDBased.value = minimizesameasdead' (UUIDBased.value l) }
| otherwise = l
minimizesameasdead' c = M.restrictKeys c (S.singleton sameasUUIDField)
filterBranch :: (UUID -> Bool) -> GitConfig -> TransitionCalculator
filterBranch wantuuid gc f content = case getLogVariety gc f of
Just OldUUIDBasedLog -> ChangeFile $
UUIDBased.buildLogOld byteString $
filterMapLog wantuuid id $
UUIDBased.parseLogOld A.takeByteString content
Just NewUUIDBasedLog -> ChangeFile $
UUIDBased.buildLogNew byteString $
filterMapLog wantuuid id $
UUIDBased.parseLogNew A.takeByteString content
Just (ChunkLog _) -> ChangeFile $
Chunk.buildLog $ filterMapLog wantuuid fst $
Chunk.parseLog content
Just (LocationLog _) -> ChangeFile $ Presence.buildLog $
Presence.compactLog $
filterLocationLog wantuuid $
Presence.parseLog content
Just (UrlLog _) -> PreserveFile
Just RemoteMetaDataLog -> ChangeFile $ MetaData.buildLog $
filterRemoteMetaDataLog wantuuid $
MetaData.simplifyLog $ MetaData.parseLog content
Just OtherLog -> PreserveFile
Nothing -> PreserveFile
filterMapLog :: (UUID -> Bool) -> (k -> UUID) -> MapLog k v -> MapLog k v
filterMapLog wantuuid getuuid = filterMapLogWith (\k _v -> wantuuid (getuuid k))
filterLocationLog :: (UUID -> Bool) -> [Presence.LogLine] -> [Presence.LogLine]
filterLocationLog wantuuid = filter $
wantuuid . toUUID . Presence.fromLogInfo . Presence.info
filterRemoteMetaDataLog :: (UUID -> Bool) -> MetaData.Log MetaData -> MetaData.Log MetaData
filterRemoteMetaDataLog wantuuid =
MetaData.filterOutEmpty . MetaData.filterRemoteMetaData wantuuid

144
Annex/BranchState.hs Normal file
View file

@ -0,0 +1,144 @@
{- git-annex branch state management
-
- Runtime state about the git-annex branch, and a small cache.
-
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.BranchState where
import Annex.Common
import Types.BranchState
import Types.Transitions
import qualified Annex
import Logs
import qualified Git
import Control.Concurrent
import qualified Data.ByteString.Lazy as L
getState :: Annex BranchState
getState = do
v <- Annex.getRead Annex.branchstate
liftIO $ readMVar v
changeState :: (BranchState -> BranchState) -> Annex ()
changeState changer = do
v <- Annex.getRead Annex.branchstate
liftIO $ modifyMVar_ v $ return . changer
{- Runs an action to check that the index file exists, if it's not been
- checked before in this run of git-annex. -}
checkIndexOnce :: Annex () -> Annex ()
checkIndexOnce a = unlessM (indexChecked <$> getState) $ do
a
changeState $ \s -> s { indexChecked = True }
data UpdateMade
= UpdateMade
{ refsWereMerged :: Bool
, journalClean :: Bool
}
| UpdateFailedPermissions
{ refsUnmerged :: [Git.Sha]
, newTransitions :: [TransitionCalculator]
}
{- Runs an action to update the branch, if it's not been updated before
- in this run of git-annex.
-
- When interactive access is enabled, the journal is always checked when
- reading values from the branch, and so this does not need to update
- the branch.
-
- When the action leaves the journal clean, by staging anything that
- was in it, an optimisation is enabled: The journal does not need to
- be checked going forward, until new information gets written to it.
-
- When the action is unable to update the branch due to a permissions
- problem, the journal is still read every time.
-}
runUpdateOnce :: Annex UpdateMade -> Annex BranchState
runUpdateOnce update = do
st <- getState
if branchUpdated st || needInteractiveAccess st
then return st
else do
um <- update
let stf = case um of
UpdateMade {} -> \st' -> st'
{ branchUpdated = True
, journalIgnorable = journalClean um
}
UpdateFailedPermissions {} -> \st' -> st'
{ branchUpdated = True
, journalIgnorable = False
, unmergedRefs = refsUnmerged um
, unhandledTransitions = newTransitions um
, cachedFileContents = []
}
changeState stf
return (stf st)
{- Avoids updating the branch. A useful optimisation when the branch
- is known to have not changed, or git-annex won't be relying on info
- queried from it being as up-to-date as possible. -}
disableUpdate :: Annex ()
disableUpdate = changeState $ \s -> s { branchUpdated = True }
{- Called when a change is made to the journal. -}
journalChanged :: Annex ()
journalChanged = do
-- Optimisation: Typically journalIgnorable will already be True
-- (when one thing gets journalled, often other things do to),
-- so avoid an unnecessary write to the MVar that changeState
-- would do.
--
-- This assumes that another thread is not setting journalIgnorable
-- at the same time, but since runUpdateOnce is the only
-- thing that sets it, and it only runs once, that
-- should not happen.
st <- getState
when (journalIgnorable st) $
changeState $ \st' -> st' { journalIgnorable = False }
{- When git-annex is somehow interactive, eg in --batch mode,
- and needs to always notice changes made to the journal by other
- processes, this disables optimisations that avoid normally reading the
- journal.
-
- It also avoids using the cache, so changes committed by other processes
- will be seen.
-}
enableInteractiveBranchAccess :: Annex ()
enableInteractiveBranchAccess = changeState $ \s -> s
{ needInteractiveAccess = True
, journalIgnorable = False
}
setCache :: RawFilePath -> L.ByteString -> Annex ()
setCache file content = changeState $ \s -> s
{ cachedFileContents = add (cachedFileContents s) }
where
add l
| length l < logFilesToCache = (file, content) : l
| otherwise = (file, content) : Prelude.init l
getCache :: RawFilePath -> BranchState -> Maybe L.ByteString
getCache file state = go (cachedFileContents state)
where
go [] = Nothing
go ((f,c):rest)
| f == file && not (needInteractiveAccess state) = Just c
| otherwise = go rest
invalidateCache :: RawFilePath -> Annex ()
invalidateCache f = changeState $ \s -> s
{ cachedFileContents = filter (\(f', _) -> f' /= f)
(cachedFileContents s)
}
invalidateCacheAll :: Annex ()
invalidateCacheAll = changeState $ \s -> s { cachedFileContents = [] }

221
Annex/CatFile.hs Normal file
View file

@ -0,0 +1,221 @@
{- git cat-file interface, with handle automatically stored in the Annex monad
-
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
module Annex.CatFile (
catFile,
catFileDetails,
catObject,
catTree,
catCommit,
catObjectDetails,
withCatFileHandle,
catObjectMetaData,
catFileStop,
catKey,
catKey',
catSymLinkTarget,
catKeyFile,
catKeyFileHEAD,
catKeyFileHidden,
catObjectMetaDataHidden,
) where
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import System.PosixCompat.Types
import Control.Concurrent.STM
import Annex.Common
import qualified Git
import qualified Git.CatFile
import qualified Annex
import Git.Types
import Git.FilePath
import Git.Index
import qualified Git.Ref
import Annex.Link
import Annex.CurrentBranch
import Types.AdjustedBranch
import Types.CatFileHandles
import Utility.ResourcePool
catFile :: Git.Branch -> RawFilePath -> Annex L.ByteString
catFile branch file = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catFile h branch file
catFileDetails :: Git.Branch -> RawFilePath -> Annex (Maybe (L.ByteString, Sha, ObjectType))
catFileDetails branch file = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catFileDetails h branch file
catObject :: Git.Ref -> Annex L.ByteString
catObject ref = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catObject h ref
catObjectMetaData :: Git.Ref -> Annex (Maybe (Sha, Integer, ObjectType))
catObjectMetaData ref = withCatFileMetaDataHandle $ \h ->
liftIO $ Git.CatFile.catObjectMetaData h ref
catTree :: Git.Ref -> Annex [(FilePath, FileMode)]
catTree ref = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catTree h ref
catCommit :: Git.Ref -> Annex (Maybe Commit)
catCommit ref = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catCommit h ref
catObjectDetails :: Git.Ref -> Annex (Maybe (L.ByteString, Sha, ObjectType))
catObjectDetails ref = withCatFileHandle $ \h ->
liftIO $ Git.CatFile.catObjectDetails h ref
{- There can be multiple index files, and a different cat-file is needed
- for each. That is selected by setting GIT_INDEX_FILE in the gitEnv
- before running this. -}
withCatFileHandle :: (Git.CatFile.CatFileHandle -> Annex a) -> Annex a
withCatFileHandle = withCatFileHandle'
Git.CatFile.catFileStart
catFileMap
(\v m -> v { catFileMap = m })
withCatFileMetaDataHandle :: (Git.CatFile.CatFileMetaDataHandle -> Annex a) -> Annex a
withCatFileMetaDataHandle = withCatFileHandle'
Git.CatFile.catFileMetaDataStart
catFileMetaDataMap
(\v m -> v { catFileMetaDataMap = m })
withCatFileHandle'
:: (Repo -> IO hdl)
-> (CatMap -> M.Map FilePath (ResourcePool hdl))
-> (CatMap -> M.Map FilePath (ResourcePool hdl) -> CatMap)
-> (hdl -> Annex a)
-> Annex a
withCatFileHandle' startcat get set a = do
cfh <- Annex.getState Annex.catfilehandles
indexfile <- fromMaybe "" . maybe Nothing (lookup indexEnv)
<$> fromRepo gitEnv
p <- case cfh of
CatFileHandlesNonConcurrent m -> case M.lookup indexfile (get m) of
Just p -> return p
Nothing -> do
p <- mkResourcePoolNonConcurrent startcatfile
let !m' = set m (M.insert indexfile p (get m))
Annex.changeState $ \s -> s
{ Annex.catfilehandles = CatFileHandlesNonConcurrent m' }
return p
CatFileHandlesPool tm -> do
m <- liftIO $ atomically $ takeTMVar tm
case M.lookup indexfile (get m) of
Just p -> do
liftIO $ atomically $ putTMVar tm m
return p
Nothing -> do
p <- mkResourcePool maxCatFiles
let !m' = set m (M.insert indexfile p (get m))
liftIO $ atomically $ putTMVar tm m'
return p
withResourcePool p startcatfile a
where
startcatfile = inRepo startcat
{- A lot of git cat-file processes are unlikely to improve concurrency,
- because a query to them takes only a little bit of CPU, and tends to be
- bottlenecked on disk. Also, they each open a number of files, so
- using too many might run out of file handles. So, only start a maximum
- of 2.
-
- Note that each different index file gets its own pool of cat-files;
- this is the size of each pool. In all, 4 times this many cat-files
- may end up running.
-}
maxCatFiles :: Int
maxCatFiles = 2
{- Stops all running cat-files. Should only be run when it's known that
- nothing is using the handles, eg at shutdown. -}
catFileStop :: Annex ()
catFileStop = do
cfh <- Annex.getState Annex.catfilehandles
m <- case cfh of
CatFileHandlesNonConcurrent m -> do
Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent emptyCatMap }
return m
CatFileHandlesPool tm ->
liftIO $ atomically $ swapTMVar tm emptyCatMap
liftIO $ forM_ (M.elems (catFileMap m)) $ \p ->
freeResourcePool p Git.CatFile.catFileStop
liftIO $ forM_ (M.elems (catFileMetaDataMap m)) $ \p ->
freeResourcePool p Git.CatFile.catFileMetaDataStop
{- From ref to a symlink or a pointer file, get the key. -}
catKey :: Ref -> Annex (Maybe Key)
catKey ref = catObjectMetaData ref >>= \case
Just (_, sz, _) -> catKey' ref sz
Nothing -> return Nothing
catKey' :: Ref -> FileSize -> Annex (Maybe Key)
catKey' ref sz
-- Avoid catting large files, that cannot be symlinks or
-- pointer files, which would require buffering their
-- content in memory, as well as a lot of IO.
| sz <= fromIntegral maxPointerSz =
parseLinkTargetOrPointer . L.toStrict <$> catObject ref
catKey' _ _ = return Nothing
{- Gets a symlink target. -}
catSymLinkTarget :: Sha -> Annex RawFilePath
catSymLinkTarget sha = fromInternalGitPath . L.toStrict <$> get
where
-- Avoid buffering the whole file content, which might be large.
-- 8192 is enough if it really is a symlink.
get = L.take 8192 <$> catObject sha
{- From a file in the repository back to the key.
-
- Ideally, this should reflect the key that's staged in the index,
- not the key that's committed to HEAD. Unfortunately, git cat-file
- does not refresh the index file after it's started up, so things
- newly staged in the index won't show up. It does, however, notice
- when branches change.
-
- For command-line git-annex use, that doesn't matter. It's perfectly
- reasonable for things staged in the index after the currently running
- git-annex process to not be noticed by it. However, we do want to see
- what's in the index, since it may have uncommitted changes not in HEAD
-
- For the assistant, this is much more of a problem, since it commits
- files and then needs to be able to immediately look up their keys.
- OTOH, the assistant doesn't keep changes staged in the index for very
- long at all before committing them -- and it won't look at the keys
- of files until after committing them.
-
- So, this gets info from the index, unless running as a daemon.
-}
catKeyFile :: RawFilePath -> Annex (Maybe Key)
catKeyFile f = ifM (Annex.getState Annex.daemon)
( catKeyFileHEAD f
, maybe (pure Nothing) catKey =<< inRepo (Git.Ref.fileRef f)
)
catKeyFileHEAD :: RawFilePath -> Annex (Maybe Key)
catKeyFileHEAD f = maybe (pure Nothing) catKey
=<< inRepo (Git.Ref.fileFromRef Git.Ref.headRef f)
{- Look in the original branch from whence an adjusted branch is based
- to find the file. But only when the adjustment hides some files. -}
catKeyFileHidden :: RawFilePath -> CurrBranch -> Annex (Maybe Key)
catKeyFileHidden = hiddenCat catKey
catObjectMetaDataHidden :: RawFilePath -> CurrBranch -> Annex (Maybe (Sha, Integer, ObjectType))
catObjectMetaDataHidden = hiddenCat catObjectMetaData
hiddenCat :: (Ref -> Annex (Maybe a)) -> RawFilePath -> CurrBranch -> Annex (Maybe a)
hiddenCat a f (Just origbranch, Just adj)
| adjustmentHidesFiles adj =
maybe (pure Nothing) a
=<< inRepo (Git.Ref.fileFromRef origbranch f)
hiddenCat _ _ _ = return Nothing

111
Annex/ChangedRefs.hs Normal file
View file

@ -0,0 +1,111 @@
{- Waiting for changed git refs
-
- Copyright 2014-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.ChangedRefs
( ChangedRefs(..)
, ChangedRefsHandle
, waitChangedRefs
, drainChangedRefs
, stopWatchingChangedRefs
, watchChangedRefs
) where
import Annex.Common
import Utility.DirWatcher
import Utility.DirWatcher.Types
import Utility.Directory.Create
import qualified Git
import Git.Sha
import qualified Utility.SimpleProtocol as Proto
import Control.Concurrent
import Control.Concurrent.STM
import Control.Concurrent.STM.TBMChan
import qualified Data.ByteString as S
import qualified System.FilePath.ByteString as P
newtype ChangedRefs = ChangedRefs [Git.Ref]
deriving (Show)
instance Proto.Serializable ChangedRefs where
serialize (ChangedRefs l) = unwords $ map Git.fromRef l
deserialize = Just . ChangedRefs . map (Git.Ref . encodeBS) . words
data ChangedRefsHandle = ChangedRefsHandle DirWatcherHandle (TBMChan Git.Sha)
-- | Wait for one or more git refs to change.
--
-- When possible, coalesce ref writes that occur closely together
-- in time. Delay up to 0.05 seconds to get more ref writes.
waitChangedRefs :: ChangedRefsHandle -> IO ChangedRefs
waitChangedRefs (ChangedRefsHandle _ chan) =
atomically (readTBMChan chan) >>= \case
Nothing -> return $ ChangedRefs []
Just r -> do
threadDelay 50000
rs <- atomically $ loop []
return $ ChangedRefs (r:rs)
where
loop rs = tryReadTBMChan chan >>= \case
Just (Just r) -> loop (r:rs)
_ -> return rs
-- | Remove any changes that might be buffered in the channel,
-- without waiting for any new changes.
drainChangedRefs :: ChangedRefsHandle -> IO ()
drainChangedRefs (ChangedRefsHandle _ chan) = atomically go
where
go = tryReadTBMChan chan >>= \case
Just (Just _) -> go
_ -> return ()
stopWatchingChangedRefs :: ChangedRefsHandle -> IO ()
stopWatchingChangedRefs h@(ChangedRefsHandle wh chan) = do
stopWatchDir wh
atomically $ closeTBMChan chan
drainChangedRefs h
watchChangedRefs :: Annex (Maybe ChangedRefsHandle)
watchChangedRefs = do
-- This channel is used to accumulate notifications,
-- because the DirWatcher might have multiple threads that find
-- changes at the same time. It is bounded to allow a watcher
-- to be started once and reused, without too many changes being
-- buffered in memory.
chan <- liftIO $ newTBMChanIO 100
g <- gitRepo
let gittop = Git.localGitDir g
let refdir = gittop P.</> "refs"
liftIO $ createDirectoryUnder [gittop] refdir
let notifyhook = Just $ notifyHook chan
let hooks = mkWatchHooks
{ addHook = notifyhook
, modifyHook = notifyhook
}
if canWatch
then do
h <- liftIO $ watchDir
(fromRawFilePath refdir)
(const False) True hooks id
return $ Just $ ChangedRefsHandle h chan
else return Nothing
notifyHook :: TBMChan Git.Sha -> FilePath -> Maybe FileStatus -> IO ()
notifyHook chan reffile _
| ".lock" `isSuffixOf` reffile = noop
| otherwise = void $ do
sha <- catchDefaultIO Nothing $
extractSha <$> S.readFile reffile
-- When the channel is full, there is probably no reader
-- running, or ref changes have been occurring very fast,
-- so it's ok to not write the change to it.
maybe noop (void . atomically . tryWriteTBMChan chan) sha

74
Annex/CheckAttr.hs Normal file
View file

@ -0,0 +1,74 @@
{- git check-attr interface
-
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.CheckAttr (
annexAttrs,
checkAttr,
checkAttrs,
checkAttrStop,
mkConcurrentCheckAttrHandle,
) where
import Annex.Common
import qualified Git.CheckAttr as Git
import qualified Annex
import Utility.ResourcePool
import Types.Concurrency
import Annex.Concurrent.Utility
{- All gitattributes used by git-annex. -}
annexAttrs :: [Git.Attr]
annexAttrs =
[ "annex.backend"
, "annex.largefiles"
, "annex.numcopies"
, "annex.mincopies"
]
checkAttr :: Git.Attr -> RawFilePath -> Annex String
checkAttr attr file = withCheckAttrHandle $ \h -> do
r <- liftIO $ Git.checkAttr h attr file
if r == Git.unspecifiedAttr
then return ""
else return r
checkAttrs :: [Git.Attr] -> RawFilePath -> Annex [String]
checkAttrs attrs file = withCheckAttrHandle $ \h ->
liftIO $ Git.checkAttrs h attrs file
withCheckAttrHandle :: (Git.CheckAttrHandle -> Annex a) -> Annex a
withCheckAttrHandle a =
maybe mkpool go =<< Annex.getState Annex.checkattrhandle
where
go p = withResourcePool p start a
start = inRepo $ Git.checkAttrStart annexAttrs
mkpool = do
-- This only runs in non-concurrent code paths;
-- a concurrent pool is set up earlier when needed.
p <- mkResourcePoolNonConcurrent start
Annex.changeState $ \s -> s { Annex.checkattrhandle = Just p }
go p
mkConcurrentCheckAttrHandle :: Concurrency -> Annex (ResourcePool Git.CheckAttrHandle)
mkConcurrentCheckAttrHandle c =
Annex.getState Annex.checkattrhandle >>= \case
Just p@(ResourcePool {}) -> return p
_ -> mkResourcePool =<< liftIO (maxCheckAttrs c)
{- git check-attr is typically CPU bound, and is not likely to be the main
- bottleneck for any command. So limit to the number of CPU cores, maximum,
- while respecting the -Jn value.
-}
maxCheckAttrs :: Concurrency -> IO Int
maxCheckAttrs = concurrencyUpToCpus
checkAttrStop :: Annex ()
checkAttrStop = maybe noop stop =<< Annex.getState Annex.checkattrhandle
where
stop p = do
liftIO $ freeResourcePool p Git.checkAttrStop
Annex.changeState $ \s -> s { Annex.checkattrhandle = Nothing }

64
Annex/CheckIgnore.hs Normal file
View file

@ -0,0 +1,64 @@
{- git check-ignore interface, with handle automatically stored in
- the Annex monad
-
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.CheckIgnore (
CheckGitIgnore(..),
checkIgnored,
checkIgnoreStop,
mkConcurrentCheckIgnoreHandle,
) where
import Annex.Common
import qualified Git.CheckIgnore as Git
import qualified Annex
import Utility.ResourcePool
import Types.Concurrency
import Annex.Concurrent.Utility
newtype CheckGitIgnore = CheckGitIgnore Bool
checkIgnored :: CheckGitIgnore -> RawFilePath -> Annex Bool
checkIgnored (CheckGitIgnore False) _ = pure False
checkIgnored (CheckGitIgnore True) file =
ifM (Annex.getRead Annex.force)
( pure False
, withCheckIgnoreHandle $ \h -> liftIO $ Git.checkIgnored h file
)
withCheckIgnoreHandle :: (Git.CheckIgnoreHandle -> Annex a) -> Annex a
withCheckIgnoreHandle a =
maybe mkpool go =<< Annex.getState Annex.checkignorehandle
where
go p = withResourcePool p start a
start = inRepo Git.checkIgnoreStart
mkpool = do
-- This only runs in non-concurrent code paths;
-- a concurrent pool is set up earlier when needed.
p <- mkResourcePoolNonConcurrent start
Annex.changeState $ \s -> s { Annex.checkignorehandle = Just p }
go p
mkConcurrentCheckIgnoreHandle :: Concurrency -> Annex (ResourcePool Git.CheckIgnoreHandle)
mkConcurrentCheckIgnoreHandle c =
Annex.getState Annex.checkignorehandle >>= \case
Just p@(ResourcePool {}) -> return p
_ -> mkResourcePool =<< liftIO (maxCheckIgnores c)
{- git check-ignore is typically CPU bound, and is not likely to be the main
- bottleneck for any command. So limit to the number of CPU cores, maximum,
- while respecting the -Jn value.
-}
maxCheckIgnores :: Concurrency -> IO Int
maxCheckIgnores = concurrencyUpToCpus
checkIgnoreStop :: Annex ()
checkIgnoreStop = maybe noop stop =<< Annex.getState Annex.checkignorehandle
where
stop p = do
liftIO $ freeResourcePool p Git.checkIgnoreStop
Annex.changeState $ \s -> s { Annex.checkignorehandle = Nothing }

180
Annex/Cluster.hs Normal file
View file

@ -0,0 +1,180 @@
{- clusters
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE RankNTypes, OverloadedStrings #-}
module Annex.Cluster where
import Annex.Common
import qualified Annex
import Types.Cluster
import Logs.Cluster
import P2P.Proxy
import P2P.Protocol
import P2P.IO
import Annex.Proxy
import Annex.UUID
import Annex.BranchState
import Logs.Location
import Logs.PreferredContent
import Types.Command
import Remote.List
import qualified Remote
import qualified Types.Remote as Remote
import qualified Data.Map as M
import qualified Data.Set as S
import System.Random
{- Proxy to a cluster. -}
proxyCluster
:: ClusterUUID
-> CommandPerform
-> ServerMode
-> ClientSide
-> (forall a. Annex () -> ((a -> CommandPerform) -> Annex (Either ProtoFailure a) -> CommandPerform))
-> CommandPerform
proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do
enableInteractiveBranchAccess
getClientProtocolVersion (fromClusterUUID clusteruuid) clientside
withclientversion (protoerrhandler noop)
where
withclientversion (Just (clientmaxversion, othermsg)) = do
-- The protocol versions supported by the nodes are not
-- known at this point, and would be too expensive to
-- determine. Instead, pick the newest protocol version
-- that we and the client both speak. The proxy code
-- checks protocol versions of remotes, so nodes can
-- have different protocol versions.
let protocolversion = min maxProtocolVersion clientmaxversion
sendClientProtocolVersion clientside othermsg protocolversion
(getclientbypass protocolversion) (protoerrhandler noop)
withclientversion Nothing = proxydone
getclientbypass protocolversion othermsg =
getClientBypass clientside protocolversion othermsg
(withclientbypass protocolversion) (protoerrhandler noop)
withclientbypass protocolversion (bypassuuids, othermsg) = do
(selectnode, closenodes) <-
clusterProxySelector clusteruuid
protocolversion bypassuuids
proxystate <- liftIO mkProxyState
concurrencyconfig <- concurrencyConfigJobs
let proxyparams = ProxyParams
{ proxyMethods = mkProxyMethods
, proxyState = proxystate
, proxyServerMode = servermode
, proxyClientSide = clientside
, proxyUUID = fromClusterUUID clusteruuid
, proxySelector = selectnode
, proxyConcurrencyConfig = concurrencyconfig
, proxyClientProtocolVersion = protocolversion
}
proxy proxydone proxyparams othermsg
(protoerrhandler closenodes)
clusterProxySelector
:: ClusterUUID
-> ProtocolVersion
-> Bypass
-> Annex (ProxySelector, Annex ())
clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do
nodeuuids <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs)
<$> getClusters
myclusters <- annexClusters <$> Annex.getGitConfig
allremotes <- concat . Remote.byCost <$> remoteList
hereu <- getUUID
let bypass' = S.insert hereu bypass
let clusterremotes = filter (isnode bypass' allremotes nodeuuids myclusters) allremotes
fastDebug "Annex.Cluster" $ unwords
[ "cluster gateway at", fromUUID hereu
, "connecting to", show (map Remote.name clusterremotes)
, "bypass", show (S.toList bypass)
]
nodes <- mapM (proxyRemoteSide protocolversion (Bypass bypass')) clusterremotes
let closenodes = mapM_ closeRemoteSide nodes
let proxyselector = ProxySelector
{ proxyCHECKPRESENT = nodecontaining nodes
, proxyGET = nodecontaining nodes
-- The key is sent to multiple nodes at the same time,
-- skipping nodes where it's known/expected to already be
-- present to avoid needing to connect to those, and
-- skipping nodes where it's not preferred content.
, proxyPUT = \af k -> do
locs <- S.fromList <$> loggedLocations k
let l = filter (flip S.notMember locs . Remote.uuid . remote) nodes
l' <- filterM (\n -> isPreferredContent (Just (Remote.uuid (remote n))) mempty (Just k) af True) l
-- PUT to no nodes doesn't work, so fall
-- back to all nodes.
return $ nonempty [l', l] nodes
-- Remove the key from every node that contains it.
-- But, since it's possible the location log for some nodes
-- could be out of date, actually try to remove from every
-- node.
, proxyREMOVE = const (pure nodes)
, proxyGETTIMESTAMP = pure nodes
-- Content is not locked on the cluster as a whole,
-- instead it can be locked on individual nodes that are
-- proxied to the client.
, proxyLOCKCONTENT = const (pure Nothing)
}
return (proxyselector, closenodes)
where
-- Nodes of the cluster have remote.name.annex-cluster-node
-- containing its name.
--
-- Or, a node can be the cluster proxied by another gateway.
isnode bypass' rs nodeuuids myclusters r =
case remoteAnnexClusterNode (Remote.gitconfig r) of
Just names
| any (isclustername myclusters) names ->
flip S.member nodeuuids $
ClusterNodeUUID $ Remote.uuid r
| otherwise -> False
Nothing -> isclusterviagateway bypass' rs r
-- Is this remote the same cluster, proxied via another gateway?
--
-- Must avoid bypassed gateways to prevent cycles.
isclusterviagateway bypass' rs r =
case mkClusterUUID (Remote.uuid r) of
Just cu | cu == clusteruuid ->
case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid | proxyuuid `S.notMember` bypass' ->
not $ null $
filter isclustergateway $
filter (\p -> Remote.uuid p == proxyuuid) rs
_ -> False
_ -> False
isclustergateway r = any (== clusteruuid) $
remoteAnnexClusterGateway $ Remote.gitconfig r
isclustername myclusters name =
M.lookup name myclusters == Just clusteruuid
nodecontaining nodes k = do
locs <- S.fromList <$> loggedLocations k
case filter (flip S.member locs . Remote.uuid . remote) nodes of
[] -> return Nothing
(node:[]) -> return (Just node)
(node:rest) ->
-- The list of nodes is ordered by cost.
-- Use any of the ones with equally low
-- cost.
let lowestcost = Remote.cost (remote node)
samecost = node : takeWhile (\n -> Remote.cost (remote n) == lowestcost) rest
in do
n <- liftIO $ getStdRandom $
randomR (0, length samecost - 1)
return (Just (samecost !! n))
nonempty (l:ls) fallback
| null l = nonempty ls fallback
| otherwise = l
nonempty [] fallback = fallback

16
Annex/Common.hs Normal file
View file

@ -0,0 +1,16 @@
{-# LANGUAGE CPP #-}
module Annex.Common (module X) where
import Common as X
import Types as X
import Key as X
import Types.UUID as X
import Annex as X (gitRepo, inRepo, fromRepo, calcRepo, calcRepo')
import Annex.Locations as X
import Annex.Debug as X (fastDebug, debug)
import Messages as X
import Git.Quote as X
#ifndef mingw32_HOST_OS
import System.Posix.IO as X hiding (createPipe, append)
#endif

113
Annex/Concurrent.hs Normal file
View file

@ -0,0 +1,113 @@
{- git-annex concurrent state
-
- Copyright 2015-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Concurrent (
module Annex.Concurrent,
module Annex.Concurrent.Utility
) where
import Annex
import Annex.Common
import Annex.Concurrent.Utility
import qualified Annex.Queue
import Types.Concurrency
import Types.CatFileHandles
import Annex.CatFile
import Annex.CheckAttr
import Annex.HashObject
import Annex.CheckIgnore
import qualified Data.Map as M
setConcurrency :: ConcurrencySetting -> Annex ()
setConcurrency (ConcurrencyCmdLine s) = setConcurrency' s ConcurrencyCmdLine
setConcurrency (ConcurrencyGitConfig s) = setConcurrency' s ConcurrencyGitConfig
setConcurrency' :: Concurrency -> (Concurrency -> ConcurrencySetting) -> Annex ()
setConcurrency' NonConcurrent f =
Annex.changeState $ \s -> s
{ Annex.concurrency = f NonConcurrent
}
setConcurrency' c f = do
oldc <- Annex.getState Annex.concurrency
case oldc of
ConcurrencyCmdLine NonConcurrent -> fromnonconcurrent
ConcurrencyGitConfig NonConcurrent -> fromnonconcurrent
_
| oldc == newc -> return ()
| otherwise ->
Annex.changeState $ \s -> s
{ Annex.concurrency = newc
}
where
newc = f c
fromnonconcurrent = do
catFileStop
checkAttrStop
hashObjectStop
checkIgnoreStop
cfh <- liftIO catFileHandlesPool
cah <- mkConcurrentCheckAttrHandle c
hoh <- mkConcurrentHashObjectHandle c
cih <- mkConcurrentCheckIgnoreHandle c
Annex.changeState $ \s -> s
{ Annex.concurrency = newc
, Annex.catfilehandles = cfh
, Annex.checkattrhandle = Just cah
, Annex.hashobjecthandle = Just hoh
, Annex.checkignorehandle = Just cih
}
{- Allows forking off a thread that uses a copy of the current AnnexState
- to run an Annex action.
-
- The returned IO action can be used to start the thread.
- It returns an Annex action that must be run in the original
- calling context to merge the forked AnnexState back into the
- current AnnexState.
-}
forkState :: Annex a -> Annex (IO (Annex a))
forkState a = do
rd <- Annex.getRead id
st <- dupState
return $ do
(ret, (newst, _rd)) <- run (st, rd) a
return $ do
mergeState newst
return ret
{- Returns a copy of the current AnnexState that is safe to be
- used when forking off a thread.
-
- After an Annex action is run using this AnnexState, it
- should be merged back into the current Annex's state,
- by calling mergeState.
-}
dupState :: Annex AnnexState
dupState = do
st <- Annex.getState id
-- Make sure that concurrency is enabled, if it was not already,
-- so the concurrency-safe resource pools are set up.
st' <- case getConcurrency' (Annex.concurrency st) of
NonConcurrent -> do
setConcurrency (ConcurrencyCmdLine (Concurrent 1))
Annex.getState id
_ -> return st
return $ st'
-- each thread has its own repoqueue
{ Annex.repoqueue = Nothing
-- no errors from this thread yet
, Annex.errcounter = 0
}
{- Merges the passed AnnexState into the current Annex state. -}
mergeState :: AnnexState -> Annex ()
mergeState st = do
forM_ (M.toList $ Annex.cleanupactions st) $
uncurry addCleanupAction
Annex.Queue.mergeFrom st
changeState $ \s -> s { errcounter = errcounter s + errcounter st }

View file

@ -0,0 +1,31 @@
{- git-annex concurrency utilities
-
- Copyright 2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Concurrent.Utility where
import Annex
import Types.Concurrency
import GHC.Conc
getConcurrency :: Annex Concurrency
getConcurrency = getConcurrency' <$> getState concurrency
getConcurrency' :: ConcurrencySetting -> Concurrency
getConcurrency' (ConcurrencyCmdLine c) = c
getConcurrency' (ConcurrencyGitConfig c) = c
{- Honor the requested level of concurrency, but only up to the number of
- CPU cores. Useful for things that are known to be CPU bound. -}
concurrencyUpToCpus :: Concurrency -> IO Int
concurrencyUpToCpus c = do
let cn = case c of
Concurrent n -> n
NonConcurrent -> 1
ConcurrentPerCpu -> 1
pn <- getNumProcessors
return (min cn pn)

1116
Annex/Content.hs Normal file

File diff suppressed because it is too large Load diff

141
Annex/Content/LowLevel.hs Normal file
View file

@ -0,0 +1,141 @@
{- git-annex low-level content functions
-
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Content.LowLevel where
import Annex.Common
import Logs.Transfer
import qualified Annex
import Utility.DiskFree
import Utility.FileMode
import Utility.DataUnits
import Utility.CopyFile
import qualified Utility.RawFilePath as R
import qualified System.FilePath.ByteString as P
import System.PosixCompat.Files (linkCount)
{- Runs the secure erase command if set, otherwise does nothing.
- File may or may not be deleted at the end; caller is responsible for
- making sure it's deleted. -}
secureErase :: RawFilePath -> Annex ()
secureErase file = maybe noop go =<< annexSecureEraseCommand <$> Annex.getGitConfig
where
go basecmd = void $ liftIO $
boolSystem "sh" [Param "-c", Param $ gencmd basecmd]
gencmd = massReplace [ ("%file", shellEscape (fromRawFilePath file)) ]
data LinkedOrCopied = Linked | Copied
{- Hard links or copies src to dest, which must not already exist.
-
- Only uses a hard link when annex.thin is enabled and when src is
- not already hardlinked to elsewhere.
-
- Checks disk reserve before copying against the size of the key,
- and will fail if not enough space, or if the dest file already exists.
-
- The FileMode, if provided, influences the mode of the dest file.
- In particular, if it has an execute bit set, the dest file's
- execute bit will be set. The mode is not fully copied over because
- git doesn't support file modes beyond execute.
-}
linkOrCopy :: Key -> RawFilePath -> RawFilePath -> Maybe FileMode -> Annex (Maybe LinkedOrCopied)
linkOrCopy = linkOrCopy' (annexThin <$> Annex.getGitConfig)
linkOrCopy' :: Annex Bool -> Key -> RawFilePath -> RawFilePath -> Maybe FileMode -> Annex (Maybe LinkedOrCopied)
linkOrCopy' canhardlink key src dest destmode = catchDefaultIO Nothing $
ifM canhardlink
( hardlink
, copy =<< getstat
)
where
hardlink = do
s <- getstat
if linkCount s > 1
then copy s
else liftIO (R.createLink src dest >> preserveGitMode dest destmode >> return (Just Linked))
`catchIO` const (copy s)
copy s = ifM (checkedCopyFile' key src dest destmode s)
( return (Just Copied)
, return Nothing
)
getstat = liftIO $ R.getFileStatus src
{- Checks disk space before copying. -}
checkedCopyFile :: Key -> RawFilePath -> RawFilePath -> Maybe FileMode -> Annex Bool
checkedCopyFile key src dest destmode = catchBoolIO $
checkedCopyFile' key src dest destmode
=<< liftIO (R.getFileStatus src)
checkedCopyFile' :: Key -> RawFilePath -> RawFilePath -> Maybe FileMode -> FileStatus -> Annex Bool
checkedCopyFile' key src dest destmode s = catchBoolIO $ do
sz <- liftIO $ getFileSize' src s
ifM (checkDiskSpace' sz (Just $ P.takeDirectory dest) key 0 True)
( liftIO $
copyFileExternal CopyAllMetaData (fromRawFilePath src) (fromRawFilePath dest)
<&&> preserveGitMode dest destmode
, return False
)
preserveGitMode :: RawFilePath -> Maybe FileMode -> IO Bool
preserveGitMode f (Just mode)
| isExecutable mode = catchBoolIO $ do
modifyFileMode f $ addModes executeModes
return True
| otherwise = catchBoolIO $ do
modifyFileMode f $ removeModes executeModes
return True
preserveGitMode _ _ = return True
{- Checks that there is disk space available to store a given key,
- in a destination directory (or the annex) printing a warning if not.
-
- If the destination is on the same filesystem as the annex,
- checks for any other running downloads, removing the amount of data still
- to be downloaded from the free space. This way, we avoid overcommitting
- when doing concurrent downloads.
-}
checkDiskSpace :: Maybe FileSize -> Maybe RawFilePath -> Key -> Integer -> Bool -> Annex Bool
checkDiskSpace msz destdir key = checkDiskSpace' sz destdir key
where
sz = fromMaybe 1 (fromKey keySize key <|> msz)
checkDiskSpace' :: FileSize -> Maybe RawFilePath -> Key -> Integer -> Bool -> Annex Bool
checkDiskSpace' sz destdir key alreadythere samefilesystem = ifM (Annex.getRead Annex.force)
( return True
, do
-- We can't get inprogress and free at the same
-- time, and both can be changing, so there's a
-- small race here. Err on the side of caution
-- by getting inprogress first, so if it takes
-- a while, we'll see any decrease in the free
-- disk space.
inprogress <- if samefilesystem
then sizeOfDownloadsInProgress (/= key)
else pure 0
dir >>= liftIO . getDiskFree . fromRawFilePath >>= \case
Just have -> do
reserve <- annexDiskReserve <$> Annex.getGitConfig
let delta = sz + reserve - have - alreadythere + inprogress
let ok = delta <= 0
unless ok $
warning $ UnquotedString $
needMoreDiskSpace delta
return ok
_ -> return True
)
where
dir = maybe (fromRepo gitAnnexDir) return destdir
needMoreDiskSpace :: Integer -> String
needMoreDiskSpace n = "not enough free space, need " ++
roughSize storageUnits True n ++ " more" ++ forcemsg
where
forcemsg = " (use --force to override this check or adjust annex.diskreserve)"

View file

@ -0,0 +1,71 @@
{- git-annex pointer files
-
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Content.PointerFile where
import Annex.Common
import Annex.Perms
import Annex.Link
import Annex.ReplaceFile
import Annex.InodeSentinal
import Annex.Content.LowLevel
import Utility.InodeCache
import qualified Utility.RawFilePath as R
#if ! defined(mingw32_HOST_OS)
import Utility.Touch
import qualified System.Posix.Files as Posix
#endif
import System.PosixCompat.Files (fileMode)
{- Populates a pointer file with the content of a key.
-
- If the file already has some other content, it is not modified.
-
- Returns an InodeCache if it populated the pointer file.
-}
populatePointerFile :: Restage -> Key -> RawFilePath -> RawFilePath -> Annex (Maybe InodeCache)
populatePointerFile restage k obj f = go =<< liftIO (isPointerFile f)
where
go (Just k') | k == k' = do
let f' = fromRawFilePath f
destmode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus f
liftIO $ removeWhenExistsWith R.removeLink f
(ic, populated) <- replaceWorkTreeFile f' $ \tmp -> do
ok <- linkOrCopy k obj tmp destmode >>= \case
Just _ -> thawContent tmp >> return True
Nothing -> liftIO (writePointerFile tmp k destmode) >> return False
ic <- withTSDelta (liftIO . genInodeCache tmp)
return (ic, ok)
maybe noop (restagePointerFile restage f) ic
if populated
then return ic
else return Nothing
go _ = return Nothing
{- Removes the content from a pointer file, replacing it with a pointer.
-
- Does not check if the pointer file is modified. -}
depopulatePointerFile :: Key -> RawFilePath -> Annex ()
depopulatePointerFile key file = do
st <- liftIO $ catchMaybeIO $ R.getFileStatus file
let mode = fmap fileMode st
secureErase file
liftIO $ removeWhenExistsWith R.removeLink file
ic <- replaceWorkTreeFile (fromRawFilePath file) $ \tmp -> do
liftIO $ writePointerFile tmp key mode
#if ! defined(mingw32_HOST_OS)
-- Don't advance mtime; this avoids unnecessary re-smudging
-- by git in some cases.
liftIO $ maybe noop
(\t -> touch tmp t False)
(fmap Posix.modificationTimeHiRes st)
#endif
withTSDelta (liftIO . genInodeCache tmp)
maybe noop (restagePointerFile (Restage True) file) ic

215
Annex/Content/Presence.hs Normal file
View file

@ -0,0 +1,215 @@
{- git-annex object content presence
-
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Content.Presence (
inAnnex,
inAnnex',
inAnnexSafe,
inAnnexCheck,
objectFileExists,
withObjectLoc,
isUnmodified,
isUnmodified',
isUnmodifiedCheap,
isUnmodifiedCheap',
withContentLockFile,
contentLockFile,
) where
import Annex.Content.Presence.LowLevel
import Annex.Common
import qualified Annex
import Annex.LockPool
import Annex.LockFile
import Annex.Version
import Types.RepoVersion
import qualified Database.Keys
import Annex.InodeSentinal
import Utility.InodeCache
import qualified Utility.RawFilePath as R
import qualified Git
import Config
#ifdef mingw32_HOST_OS
import Annex.Perms
#endif
import qualified System.FilePath.ByteString as P
{- Checks if a given key's content is currently present. -}
inAnnex :: Key -> Annex Bool
inAnnex key = inAnnexCheck key $ liftIO . R.doesPathExist
{- Runs an arbitrary check on a key's content. -}
inAnnexCheck :: Key -> (RawFilePath -> Annex Bool) -> Annex Bool
inAnnexCheck key check = inAnnex' id False check key
{- inAnnex that performs an arbitrary check of the key's content. -}
inAnnex' :: (a -> Bool) -> a -> (RawFilePath -> Annex a) -> Key -> Annex a
inAnnex' isgood bad check key = withObjectLoc key $ \loc -> do
r <- check loc
if isgood r
then ifM (annexThin <$> Annex.getGitConfig)
-- When annex.thin is set, the object file
-- could be modified; make sure it's not.
-- (Suppress any messages about
-- checksumming, to avoid them cluttering
-- the display.)
( ifM (doQuietAction $ isUnmodified key loc)
( return r
, return bad
)
, return r
)
else return bad
{- Like inAnnex, checks if the object file for a key exists,
- but there are no guarantees it has the right content. -}
objectFileExists :: Key -> Annex Bool
objectFileExists key =
calcRepo (gitAnnexLocation key)
>>= liftIO . R.doesPathExist
{- A safer check; the key's content must not only be present, but
- is not in the process of being removed. -}
inAnnexSafe :: Key -> Annex (Maybe Bool)
inAnnexSafe key = inAnnex' (fromMaybe True) (Just False) go key
where
is_locked = Nothing
is_unlocked = Just True
is_missing = Just False
go contentfile = withContentLockFile key $ flip checklock contentfile
#ifndef mingw32_HOST_OS
checklock Nothing contentfile = checkOr is_missing contentfile
{- The content file must exist, but the lock file generally
- won't exist unless a removal is in process. -}
checklock (Just lockfile) contentfile =
ifM (liftIO $ doesFileExist (fromRawFilePath contentfile))
( checkOr is_unlocked lockfile
, return is_missing
)
checkOr d lockfile = checkLocked lockfile >>= return . \case
Nothing -> d
Just True -> is_locked
Just False -> is_unlocked
#else
checklock Nothing contentfile = liftIO $ ifM (doesFileExist (fromRawFilePath contentfile))
( lockShared contentfile >>= \case
Nothing -> return is_locked
Just lockhandle -> do
dropLock lockhandle
return is_unlocked
, return is_missing
)
{- In Windows, see if we can take a shared lock. If so,
- remove the lock file to clean up after ourselves. -}
checklock (Just lockfile) contentfile =
ifM (liftIO $ doesFileExist (fromRawFilePath contentfile))
( modifyContentDir lockfile $ liftIO $
lockShared lockfile >>= \case
Nothing -> return is_locked
Just lockhandle -> do
dropLock lockhandle
void $ tryIO $ removeWhenExistsWith R.removeLink lockfile
return is_unlocked
, return is_missing
)
#endif
{- Runs an action with the lock file to use to lock a key's content.
- When the content file itself should be locked, runs the action with
- Nothing.
-
- In v9 and below, while the action is running, a shared lock is held of the
- gitAnnexContentLockLock. That prevents the v10 upgrade, which changes how
- content locking works, from running at the same time as content is locked
- using the old method.
-}
withContentLockFile :: Key -> (Maybe RawFilePath -> Annex a) -> Annex a
withContentLockFile k a = do
v <- getVersion
if versionNeedsWritableContentFiles v
then fromRepo gitAnnexContentLockLock >>= \lck -> withSharedLock lck $ do
{- While the lock is held, check to see if the git
- config has changed, and reload it if so. This
- updates the annex.version after the v10 upgrade,
- so that a process that started in a v9 repository
- will switch over to v10 content lock files at the
- right time. -}
gitdir <- fromRepo Git.localGitDir
let gitconfig = gitdir P.</> "config"
ic <- withTSDelta (liftIO . genInodeCache gitconfig)
oldic <- Annex.getState Annex.gitconfiginodecache
v' <- if fromMaybe False (compareStrong <$> ic <*> oldic)
then pure v
else do
Annex.changeState $ \s ->
s { Annex.gitconfiginodecache = ic }
reloadConfig
getVersion
go (v')
else go v
where
go v = contentLockFile k v >>= a
contentLockFile :: Key -> Maybe RepoVersion -> Annex (Maybe RawFilePath)
#ifndef mingw32_HOST_OS
{- Older versions of git-annex locked content files themselves, but newer
- versions use a separate lock file, to better support repos shared
- among users in eg a group. -}
contentLockFile key v
| versionNeedsWritableContentFiles v = pure Nothing
| otherwise = Just <$> calcRepo (gitAnnexContentLock key)
#else
{- Windows always has to use a separate lock file from the content, since
- locking the actual content file would interfere with the user's
- use of it. -}
contentLockFile key _ = Just <$> calcRepo (gitAnnexContentLock key)
#endif
{- Performs an action, passing it the location to use for a key's content. -}
withObjectLoc :: Key -> (RawFilePath -> Annex a) -> Annex a
withObjectLoc key a = a =<< calcRepo (gitAnnexLocation key)
{- Check if a file contains the unmodified content of the key.
-
- The expensive way to tell is to do a verification of its content.
- The cheaper way is to see if the InodeCache for the key matches the
- file. -}
isUnmodified :: Key -> RawFilePath -> Annex Bool
isUnmodified key f =
withTSDelta (liftIO . genInodeCache f) >>= \case
Just fc -> do
ic <- Database.Keys.getInodeCaches key
isUnmodified' key f fc ic
Nothing -> return False
isUnmodified' :: Key -> RawFilePath -> InodeCache -> [InodeCache] -> Annex Bool
isUnmodified' = isUnmodifiedLowLevel Database.Keys.addInodeCaches
{- Cheap check if a file contains the unmodified content of the key,
- only checking the InodeCache of the key.
-
- When the InodeCache is stale, this may incorrectly report that a file is
- modified.
-
- Note that, on systems not supporting high-resolution mtimes,
- this may report a false positive when repeated edits are made to a file
- within a small time window (eg 1 second).
-}
isUnmodifiedCheap :: Key -> RawFilePath -> Annex Bool
isUnmodifiedCheap key f = maybe (pure False) (isUnmodifiedCheap' key)
=<< withTSDelta (liftIO . genInodeCache f)
isUnmodifiedCheap' :: Key -> InodeCache -> Annex Bool
isUnmodifiedCheap' key fc = isUnmodifiedCheapLowLevel fc
=<< Database.Keys.getInodeCaches key

View file

@ -0,0 +1,36 @@
{- git-annex object content presence, low-level functions
-
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Content.Presence.LowLevel where
import Annex.Common
import Annex.Verify
import Annex.InodeSentinal
import Utility.InodeCache
isUnmodifiedLowLevel :: (Key -> [InodeCache] -> Annex ()) -> Key -> RawFilePath -> InodeCache -> [InodeCache] -> Annex Bool
isUnmodifiedLowLevel addinodecaches key f fc ic =
isUnmodifiedCheapLowLevel fc ic <||> expensivecheck
where
expensivecheck = ifM (verifyKeyContent key f)
( do
-- The file could have been modified while it was
-- being verified. Detect that.
ifM (geti >>= maybe (return False) (compareInodeCaches fc))
( do
-- Update the InodeCache to avoid
-- performing this expensive check again.
addinodecaches key [fc]
return True
, return False
)
, return False
)
geti = withTSDelta (liftIO . genInodeCache f)
isUnmodifiedCheapLowLevel :: InodeCache -> [InodeCache] -> Annex Bool
isUnmodifiedCheapLowLevel fc ic = anyM (compareInodeCaches fc) ic

179
Annex/CopyFile.hs Normal file
View file

@ -0,0 +1,179 @@
{- Copying files.
-
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.CopyFile where
import Annex.Common
import Utility.Metered
import Utility.CopyFile
import Utility.FileMode
import Utility.Touch
import Utility.Hash (IncrementalVerifier(..))
import qualified Utility.RawFilePath as R
import Control.Concurrent
import qualified Data.ByteString as S
import Data.Time.Clock.POSIX
import System.PosixCompat.Files (fileMode)
-- To avoid the overhead of trying copy-on-write every time, it's tried
-- once and if it fails, is not tried again.
newtype CopyCoWTried = CopyCoWTried (MVar Bool)
newCopyCoWTried :: IO CopyCoWTried
newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
{- Copies a file is copy-on-write is supported. Otherwise, returns False.
-
- The destination file must not exist yet (or may exist but be empty),
- or it will fail to make a CoW copy, and will return false.
-}
tryCopyCoW :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> IO Bool
tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
-- If multiple threads reach this at the same time, they
-- will both try CoW, which is acceptable.
ifM (isEmptyMVar copycowtried)
( ifM destfilealreadypopulated
( return False
, do
ok <- docopycow
void $ tryPutMVar copycowtried ok
return ok
)
, ifM (readMVar copycowtried)
( do
-- CoW is known to work, so delete
-- dest if it exists in order to do a fast
-- CoW copy.
void $ tryIO $ removeFile dest
docopycow
, return False
)
)
where
docopycow = watchFileSize dest' meterupdate $ const $
copyCoW CopyTimeStamps src dest
dest' = toRawFilePath dest
-- Check if the dest file already exists, which would prevent
-- probing CoW. If the file exists but is empty, there's no benefit
-- to resuming from it when CoW does not work, so remove it.
destfilealreadypopulated =
tryIO (R.getFileStatus dest') >>= \case
Left _ -> return False
Right st -> do
sz <- getFileSize' dest' st
if sz == 0
then tryIO (removeFile dest) >>= \case
Right () -> return False
Left _ -> return True
else return True
data CopyMethod = CopiedCoW | Copied
{- Copies from src to dest, updating a meter. Preserves mode and mtime.
- Uses copy-on-write if it is supported. If the the destination already
- exists, an interrupted copy will resume where it left off.
-
- The IncrementalVerifier is updated with the content of the file as it's
- being copied. But it is not finalized at the end.
-
- When copy-on-write is used, the IncrementalVerifier is not fed
- the content of the file, and verification using it will fail.
-
- Note that, when the destination file already exists, it's read both
- to start calculating the hash, and also to verify that its content is
- the same as the start of the source file. It's possible that the
- destination file was created from some other source file,
- (eg when isStableKey is false), and doing this avoids getting a
- corrupted file in such cases.
-}
fileCopier :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> IO CopyMethod
#ifdef mingw32_HOST_OS
fileCopier _ src dest meterupdate iv = docopy
#else
fileCopier copycowtried src dest meterupdate iv =
ifM (tryCopyCoW copycowtried src dest meterupdate)
( do
maybe noop unableIncrementalVerifier iv
return CopiedCoW
, docopy
)
#endif
where
docopy = do
-- The file might have had the write bit removed,
-- so make sure we can write to it.
void $ tryIO $ allowWrite dest'
withBinaryFile src ReadMode $ \hsrc ->
fileContentCopier hsrc dest meterupdate iv
-- Copy src mode and mtime.
mode <- fileMode <$> R.getFileStatus (toRawFilePath src)
mtime <- utcTimeToPOSIXSeconds <$> getModificationTime src
R.setFileMode dest' mode
touch dest' mtime False
return Copied
dest' = toRawFilePath dest
{- Copies content from a handle to a destination file. Does not
- use copy-on-write, and does not copy file mode and mtime.
-}
fileContentCopier :: Handle -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> IO ()
fileContentCopier hsrc dest meterupdate iv =
withBinaryFile dest ReadWriteMode $ \hdest -> do
sofar <- compareexisting hdest zeroBytesProcessed
docopy hdest sofar
where
docopy hdest sofar = do
s <- S.hGet hsrc defaultChunkSize
if s == S.empty
then return ()
else do
let sofar' = addBytesProcessed sofar (S.length s)
S.hPut hdest s
maybe noop (flip updateIncrementalVerifier s) iv
meterupdate sofar'
docopy hdest sofar'
-- Leaves hdest and hsrc seeked to wherever the two diverge,
-- so typically hdest will be seeked to end, and hsrc to the same
-- position.
compareexisting hdest sofar = do
s <- S.hGet hdest defaultChunkSize
if s == S.empty
then return sofar
else do
s' <- getnoshort (S.length s) hsrc
if s == s'
then do
maybe noop (flip updateIncrementalVerifier s) iv
let sofar' = addBytesProcessed sofar (S.length s)
meterupdate sofar'
compareexisting hdest sofar'
else do
seekbefore hdest s
seekbefore hsrc s'
return sofar
seekbefore h s = hSeek h RelativeSeek (fromIntegral (-1*S.length s))
-- Like hGet, but never returns less than the requested number of
-- bytes, unless it reaches EOF.
getnoshort n h = do
s <- S.hGet h n
if S.length s == n || S.empty == s
then return s
else do
s' <- getnoshort (n - S.length s) h
return (s <> s')

41
Annex/CurrentBranch.hs Normal file
View file

@ -0,0 +1,41 @@
{- currently checked out branch
-
- Copyright 2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.CurrentBranch where
import Annex.Common
import Types.AdjustedBranch
import Annex.AdjustedBranch.Name
import qualified Annex
import qualified Git
import qualified Git.Branch
type CurrBranch = (Maybe Git.Branch, Maybe Adjustment)
{- Gets the currently checked out branch.
- When on an adjusted branch, gets the original branch, and the adjustment.
-
- Cached for speed.
-
- Until a commit is made in a new repository, no branch is checked out.
- Since git-annex may make the first commit, this does not cache
- the absence of a branch.
-}
getCurrentBranch :: Annex CurrBranch
getCurrentBranch = maybe cache return
=<< Annex.getState Annex.cachedcurrentbranch
where
cache = inRepo Git.Branch.current >>= \case
Just b -> do
let v = case adjustedToOriginal b of
Nothing -> (Just b, Nothing)
Just (adj, origbranch) ->
(Just origbranch, Just adj)
Annex.changeState $ \s ->
s { Annex.cachedcurrentbranch = Just v }
return v
Nothing -> return (Nothing, Nothing)

35
Annex/Debug.hs Normal file
View file

@ -0,0 +1,35 @@
{- git-annex debugging
-
- Copyright 2021-2023 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Debug (
DebugSelector(..),
DebugSource(..),
debug,
fastDebug,
fastDebug',
configureDebug,
debugSelectorFromGitConfig,
parseDebugSelector,
) where
import Common
import qualified Annex
import Utility.Debug hiding (fastDebug)
import qualified Utility.Debug
import Annex.Debug.Utility
-- | This is faster than using debug, because the DebugSelector
-- is read from the Annex monad, which avoids any IORef access overhead
-- when debugging is not enabled.
fastDebug :: DebugSource -> String -> Annex.Annex ()
fastDebug src msg = do
rd <- Annex.getRead id
fastDebug' rd src msg
fastDebug' :: Annex.AnnexRead -> DebugSource -> String -> Annex.Annex ()
fastDebug' rd src msg = when (Annex.debugenabled rd) $
liftIO $ Utility.Debug.fastDebug (Annex.debugselector rd) src msg

32
Annex/Debug/Utility.hs Normal file
View file

@ -0,0 +1,32 @@
{- git-annex debugging, utility functions
-
- Copyright 2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Debug.Utility (
debugSelectorFromGitConfig,
parseDebugSelector,
DebugSelector,
) where
import Types.GitConfig
import Utility.Debug
import Utility.Split
import Utility.FileSystemEncoding
import qualified Data.ByteString as S
debugSelectorFromGitConfig :: GitConfig -> DebugSelector
debugSelectorFromGitConfig =
maybe NoDebugSelector parseDebugSelector . annexDebugFilter
parseDebugSelector :: String -> DebugSelector
parseDebugSelector = DebugSelector . matchDebugSource . splitSelectorNames
splitSelectorNames :: String -> [S.ByteString]
splitSelectorNames = map encodeBS . splitc ','
matchDebugSource :: [S.ByteString] -> DebugSource -> Bool
matchDebugSource names (DebugSource s) = any (`S.isInfixOf` s) names

60
Annex/Difference.hs Normal file
View file

@ -0,0 +1,60 @@
{- git-annex repository differences
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Difference (
module Types.Difference,
setDifferences,
) where
import Annex.Common
import Types.Difference
import Logs.Difference
import Config
import Annex.UUID
import Logs.UUID
import Annex.Version
import qualified Annex
import qualified Data.Map as M
-- Differences are only allowed to be tweaked when initializing a
-- repository for the first time, and then only if there is not another
-- known uuid. If the repository was cloned from elsewhere, it inherits
-- the existing settings.
--
-- Must be called before setVersion, so it can check if this is the first
-- time the repository is being initialized.
setDifferences :: Annex ()
setDifferences = do
u <- getUUID
otherds <- allDifferences <$> recordedDifferences
ds <- mappend otherds . annexDifferences <$> Annex.getGitConfig
when (ds /= mempty) $ do
ds' <- ifM (isJust <$> getVersion)
( do
oldds <- recordedDifferencesFor u
when (ds /= oldds) $
warning "Cannot change tunable parameters in already initialized repository."
return oldds
, if otherds == mempty
then ifM (any (/= u) . M.keys <$> uuidDescMap)
( do
warning "Cannot change tunable parameters in a clone of an existing repository."
return mempty
, return ds
)
else if otherds /= ds
then do
warning "The specified tunable parameters differ from values being used in other clones of this repository."
return otherds
else return ds
)
forM_ (listDifferences ds') $ \d ->
setConfig (differenceConfigKey d) (differenceConfigVal d)
recordDifferences ds' u

90
Annex/DirHashes.hs Normal file
View file

@ -0,0 +1,90 @@
{- git-annex file locations
-
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.DirHashes (
Hasher,
HashLevels(..),
objectHashLevels,
branchHashLevels,
branchHashDir,
dirHashes,
hashDirMixed,
hashDirLower,
display_32bits_as_dir
) where
import Data.Default
import Data.Bits
import qualified Data.ByteArray as BA
import qualified Data.ByteArray.Encoding as BA
import qualified Data.ByteString as S
import qualified System.FilePath.ByteString as P
import Common
import Key
import Types.GitConfig
import Types.Difference
import Utility.Hash
import Utility.MD5
type Hasher = Key -> RawFilePath
-- Number of hash levels to use. 2 is the default.
newtype HashLevels = HashLevels Int
instance Default HashLevels where
def = HashLevels 2
objectHashLevels :: GitConfig -> HashLevels
objectHashLevels = configHashLevels OneLevelObjectHash
branchHashLevels :: GitConfig -> HashLevels
branchHashLevels = configHashLevels OneLevelBranchHash
configHashLevels :: Difference -> GitConfig -> HashLevels
configHashLevels d config
| hasDifference d (annexDifferences config) = HashLevels 1
| otherwise = def
branchHashDir :: GitConfig -> Key -> S.ByteString
branchHashDir = hashDirLower . branchHashLevels
{- Two different directory hashes may be used. The mixed case hash
- came first, and is fine, except for the problem of case-strict
- filesystems such as Linux VFAT (mounted with shortname=mixed),
- which do not allow using a directory "XX" when "xx" already exists.
- To support that, some git-annex repositories use the lower case-hash.
- All special remotes use the lower-case hash for new data, but old data
- may still use the mixed case hash. -}
dirHashes :: [HashLevels -> Hasher]
dirHashes = [hashDirLower, hashDirMixed]
hashDirs :: HashLevels -> Int -> S.ByteString -> RawFilePath
hashDirs (HashLevels 1) sz s = P.addTrailingPathSeparator $ S.take sz s
hashDirs _ sz s = P.addTrailingPathSeparator $ h P.</> t
where
(h, t) = S.splitAt sz s
hashDirLower :: HashLevels -> Hasher
hashDirLower n k = hashDirs n 3 $ S.pack $ take 6 $ conv $
md5s $ serializeKey' $ nonChunkKey k
where
conv v = BA.unpack $
(BA.convertToBase BA.Base16 v :: BA.Bytes)
{- This was originally using Data.Hash.MD5 from MissingH. This new version
- is faster, but ugly as it has to replicate the 4 Word32's that produced. -}
hashDirMixed :: HashLevels -> Hasher
hashDirMixed n k = hashDirs n 2 $ S.pack $ take 4 $
concatMap display_32bits_as_dir $
encodeWord32 $ map fromIntegral $ BA.unpack $
Utility.Hash.md5s $ serializeKey' $ nonChunkKey k
where
encodeWord32 (b1:b2:b3:b4:rest) =
(shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1)
: encodeWord32 rest
encodeWord32 _ = []

131
Annex/Drop.hs Normal file
View file

@ -0,0 +1,131 @@
{- dropping of unwanted content
-
- Copyright 2012-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Drop where
import Annex.Common
import Logs.Trust
import Annex.NumCopies
import Types.Remote (uuid, appendonly, config, remotetype, thirdPartyPopulated)
import qualified Remote
import qualified Command.Drop
import Command
import Annex.Wanted
import Annex.Content
import Annex.SpecialRemote.Config
import qualified Database.Keys
import qualified Data.Set as S
type Reason = String
{- Drop a key from local and/or remote when allowed by the preferred content,
- required content, and numcopies settings.
-
- Skips trying to drop from remotes that are appendonly, since those drops
- would presumably fail. Also skips dropping from exporttree/importtree remotes,
- which don't allow dropping individual keys, and from thirdPartyPopulated
- remotes.
-
- The UUIDs are ones where the content is believed to be present.
- The Remote list can include other remotes that do not have the content;
- only ones that match the UUIDs will be dropped from.
-
- If allowed to drop fromhere, that drop will be done last. This is done
- because local drops do not need any LockedCopy evidence, and so dropping
- from local last allows the content to be removed from more remotes.
-
- A VerifiedCopy can be provided as an optimisation when eg, a key
- has just been uploaded to a remote.
-
- The runner is used to run CommandStart sequentially, it's typically
- callCommandAction.
-}
handleDropsFrom :: [UUID] -> [Remote] -> Reason -> Bool -> Key -> AssociatedFile -> SeekInput -> [VerifiedCopy] -> (CommandStart -> CommandCleanup) -> Annex ()
handleDropsFrom locs rs reason fromhere key afile si preverified runner = do
fs <- Database.Keys.getAssociatedFilesIncluding afile key
n <- getcopies fs
void $ if fromhere && checkcopies n Nothing
then go fs rs n >>= dropl fs
else go fs rs n
where
getcopies fs = do
(untrusted, have) <- trustPartition UnTrusted locs
(numcopies, mincopies) <- getSafestNumMinCopies' afile key fs
return (numCopiesCount have, numcopies, mincopies, S.fromList untrusted)
{- Check that we have enough copies still to drop the content.
- When the remote being dropped from is untrusted, it was not
- counted as a copy, so having only numcopies suffices. Otherwise,
- we need more than numcopies to safely drop.
-
- This is not the final check that it's safe to drop, but it
- avoids doing extra work to do that check later in cases where it
- will surely fail.
-}
checkcopies (have, numcopies, mincopies, _untrusted) Nothing =
have > fromNumCopies numcopies && have > fromMinCopies mincopies
checkcopies (have, numcopies, mincopies, untrusted) (Just u)
| S.member u untrusted = have >= fromNumCopies numcopies && have >= fromMinCopies mincopies
| otherwise = have > fromNumCopies numcopies && have > fromMinCopies mincopies
decrcopies (have, numcopies, mincopies, untrusted) Nothing =
(have - 1, numcopies, mincopies, untrusted)
decrcopies v@(_have, _numcopies, _mincopies, untrusted) (Just u)
| S.member u untrusted = v
| otherwise = decrcopies v Nothing
go _ [] n = pure n
go fs (r:rest) n
| uuid r `S.notMember` slocs = go fs rest n
| appendonly r = go fs rest n
| exportTree (config r) = go fs rest n
| importTree (config r) = go fs rest n
| thirdPartyPopulated (remotetype r) = go fs rest n
| checkcopies n (Just $ Remote.uuid r) =
dropr fs r n >>= go fs rest
| otherwise = pure n
checkdrop fs n u a =
let afs = map (AssociatedFile . Just) fs
pcc = Command.Drop.PreferredContentChecked True
in ifM (wantDrop True u (Just key) afile (Just afs))
( dodrop n u (a pcc)
, return n
)
dodrop n@(have, numcopies, mincopies, _untrusted) u a =
ifM (safely $ runner $ a numcopies mincopies)
( do
fastDebug "Annex.Drop" $ unwords
[ "dropped"
, case afile of
AssociatedFile Nothing -> serializeKey key
AssociatedFile (Just af) -> fromRawFilePath af
, "(from " ++ maybe "here" show u ++ ")"
, "(copies now " ++ show (have - 1) ++ ")"
, ": " ++ reason
]
return $ decrcopies n u
, return n
)
dropl fs n = checkdrop fs n Nothing $ \pcc numcopies mincopies ->
stopUnless (inAnnex key) $
Command.Drop.startLocal pcc afile ai si numcopies mincopies key preverified (Command.Drop.DroppingUnused False)
dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \pcc numcopies mincopies ->
Command.Drop.startRemote pcc afile ai si numcopies mincopies key (Command.Drop.DroppingUnused False) r
ai = mkActionItem (key, afile)
slocs = S.fromList locs
safely a = either (const False) id <$> tryNonAsync a

73
Annex/Environment.hs Normal file
View file

@ -0,0 +1,73 @@
{- git-annex environment
-
- Copyright 2012-2023 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Environment (
checkEnvironment,
checkEnvironmentIO,
ensureCommit,
) where
import Annex.Common
import Utility.UserInfo
import qualified Git.Config
import Config
import Utility.Env.Set
import Control.Exception
{- Checks that the system's environment allows git to function.
- Git requires a GECOS username, or suitable git configuration, or
- environment variables. When none of those are set, this will set the
- environment variables.
-
- Git also requires the system have a hostname containing a dot.
- Otherwise, it tries various methods to find a FQDN, and will fail if it
- does not. To avoid replicating that code here, which would break if its
- methods change, this function does not check the hostname is valid.
- Instead, git-annex init calls ensureCommit, which makes sure that git
- gets set up to allow committing.
-}
checkEnvironment :: Annex ()
checkEnvironment = do
gitusername <- fromRepo $ Git.Config.getMaybe "user.name"
when (isNothing gitusername || gitusername == Just "") $
unlessM userConfigOnly $
liftIO checkEnvironmentIO
checkEnvironmentIO :: IO ()
checkEnvironmentIO = whenM (isNothing <$> myUserGecos) $ do
username <- either (const "unknown") id <$> myUserName
ensureEnv "GIT_AUTHOR_NAME" username
ensureEnv "GIT_COMMITTER_NAME" username
where
-- existing environment is not overwritten
ensureEnv var val = setEnv var val False
{- Runs an action that commits to the repository, and if it fails,
- sets user.email and user.name to a dummy value and tries the action again.
-
- Note that user.email and user.name are left set afterwards, so this only
- needs to be used once to make sure that future commits will succeed.
-}
ensureCommit :: Annex a -> Annex a
ensureCommit a = either retry return =<< tryNonAsync a
where
retry e = ifM userConfigOnly
( liftIO (throwIO e)
, do
name <- liftIO $ either (const "unknown") id <$> myUserName
setConfig "user.name" name
setConfig "user.email" name
a
)
userConfigOnly :: Annex Bool
userConfigOnly = do
v <- fromRepo $ Git.Config.getMaybe "user.useconfigonly"
return (fromMaybe False (Git.Config.isTrueFalse' =<< v))

72
Annex/Export.hs Normal file
View file

@ -0,0 +1,72 @@
{- git-annex exports
-
- Copyright 2017-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Export where
import Annex
import Annex.CatFile
import Types
import Types.Key
import qualified Git
import qualified Types.Remote as Remote
import Git.Quote
import Messages
import Data.Maybe
import qualified Data.ByteString.Short as S (fromShort, toShort)
-- From a sha pointing to the content of a file to the key
-- to use to export it. When the file is annexed, it's the annexed key.
-- When the file is stored in git, it's a special type of key to indicate
-- that.
exportKey :: Git.Sha -> Annex Key
exportKey sha = mk <$> catKey sha
where
mk (Just k) = k
mk Nothing = gitShaKey sha
-- Encodes a git sha as a key. This is used to represent a non-annexed
-- file that is stored on a special remote, which necessarily needs a
-- key.
--
-- This is not the same as a SHA1 key, because the mapping needs to be
-- bijective, also because git may not always use SHA1, and because git
-- takes a SHA1 of the file size + content, while git-annex SHA1 keys
-- only checksum the content.
gitShaKey :: Git.Sha -> Key
gitShaKey (Git.Ref s) = mkKey $ \kd -> kd
{ keyName = S.toShort s
, keyVariety = OtherKey "GIT"
}
-- Reverse of gitShaKey
keyGitSha :: Key -> Maybe Git.Sha
keyGitSha k
| fromKey keyVariety k == OtherKey "GIT" =
Just (Git.Ref (S.fromShort (fromKey keyName k)))
| otherwise = Nothing
-- Is a key storing a git sha, and not used for an annexed file?
isGitShaKey :: Key -> Bool
isGitShaKey = isJust . keyGitSha
warnExportImportConflict :: Remote -> Annex ()
warnExportImportConflict r = do
isimport <- Remote.isImportSupported r
isexport <- Remote.isExportSupported r
let (ops, resolvcmd) = case (isexport, isimport) of
(False, True) -> ("imported from", "git-annex import")
(True, False) -> ("exported to", "git-annex export")
_ -> ("exported to and/or imported from", "git-annex export")
toplevelWarning True $ UnquotedString $ unwords
[ "Conflict detected. Different trees have been"
, ops, Remote.name r ++ ". Use"
, resolvcmd
, "to resolve this conflict."
]

View file

@ -0,0 +1,100 @@
{- External addon processes for special remotes and backends.
-
- Copyright 2013-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.ExternalAddonProcess where
import qualified Annex
import Annex.Common
import Git.Env
import Utility.Shell
import Messages.Progress
import Control.Concurrent.Async
data ExternalAddonProcess = ExternalAddonProcess
{ externalSend :: Handle
, externalReceive :: Handle
-- Shut down the process. With True, it's forced to stop
-- immediately.
, externalShutdown :: Bool -> IO ()
, externalPid :: ExternalAddonPID
, externalProgram :: String
}
type ExternalAddonPID = Int
data ExternalAddonStartError
= ProgramNotInstalled String
| ProgramFailure String
startExternalAddonProcess :: String -> [CommandParam] -> ExternalAddonPID -> Annex (Either ExternalAddonStartError ExternalAddonProcess)
startExternalAddonProcess basecmd ps pid = do
errrelayer <- mkStderrRelayer
g <- Annex.gitRepo
cmdpath <- liftIO $ searchPath basecmd
liftIO $ start errrelayer g cmdpath
where
start errrelayer g cmdpath = do
(cmd, cmdps) <- maybe (pure (basecmd, [])) findShellCommand cmdpath
let basep = (proc cmd (toCommand (cmdps ++ ps)))
{ std_in = CreatePipe
, std_out = CreatePipe
, std_err = CreatePipe
}
p <- propgit g basep
tryNonAsync (createProcess p) >>= \case
Right v -> (Right <$> started cmd errrelayer v)
`catchNonAsync` const (runerr cmdpath)
Left _ -> runerr cmdpath
started cmd errrelayer pall@(Just hin, Just hout, Just herr, ph) = do
stderrelay <- async $ errrelayer ph herr
let shutdown forcestop = do
-- Close the process's stdin, to let it know there
-- are no more requests, so it will exit.
hClose hout
-- Close the procces's stdout as we're not going to
-- process any more output from it.
hClose hin
if forcestop
then cleanupProcess pall
else void (waitForProcess ph)
`onException` cleanupProcess pall
-- This thread will exit after consuming any
-- remaining stderr from the process.
() <- wait stderrelay
hClose herr
return $ ExternalAddonProcess
{ externalSend = hin
, externalReceive = hout
, externalPid = pid
, externalShutdown = shutdown
, externalProgram = cmd
}
started _ _ _ = giveup "internal"
propgit g p = do
environ <- propGitEnv g
return $ p { env = Just environ }
runerr (Just cmd) =
return $ Left $ ProgramFailure $
"Cannot run " ++ cmd ++ " -- Make sure it's executable and that its dependencies are installed."
runerr Nothing = do
path <- intercalate ":" <$> getSearchPath
return $ Left $ ProgramNotInstalled $
"Cannot run " ++ basecmd ++ " -- It is not installed in PATH (" ++ path ++ ")"
protocolDebug :: ExternalAddonProcess -> Bool -> String -> IO ()
protocolDebug external sendto line = debug "Annex.ExternalAddonProcess" $ unwords
[ externalProgram external ++
"[" ++ show (externalPid external) ++ "]"
, if sendto then "<--" else "-->"
, line
]

280
Annex/FileMatcher.hs Normal file
View file

@ -0,0 +1,280 @@
{- git-annex file matching
-
- Copyright 2012-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.FileMatcher (
GetFileMatcher,
checkFileMatcher,
checkFileMatcher',
checkMatcher,
checkMatcher',
matchAll,
PreferredContentData(..),
preferredContentTokens,
preferredContentParser,
ParseToken,
parsedToMatcher,
mkMatchExpressionParser,
largeFilesMatcher,
AddUnlockedMatcher,
addUnlockedMatcher,
checkAddUnlockedMatcher,
LimitBy(..),
module Types.FileMatcher
) where
import qualified Data.Map as M
import Annex.Common
import Limit
import Utility.Matcher
import Types.Group
import Types.FileMatcher
import Types.GitConfig
import Config.GitConfig
import Annex.SpecialRemote.Config (preferreddirField)
import Git.FilePath
import Types.Remote (RemoteConfig)
import Types.ProposedAccepted
import Annex.CheckAttr
import qualified Git.Config
#ifdef WITH_MAGICMIME
import Annex.Magic
#endif
import Data.Either
import qualified Data.Set as S
import Control.Monad.Writer
type GetFileMatcher = RawFilePath -> Annex (FileMatcher Annex)
checkFileMatcher :: GetFileMatcher -> RawFilePath -> Annex Bool
checkFileMatcher getmatcher file =
checkFileMatcher' getmatcher file (return True)
-- | Allows running an action when no matcher is configured for the file.
checkFileMatcher' :: GetFileMatcher -> RawFilePath -> Annex Bool -> Annex Bool
checkFileMatcher' getmatcher file notconfigured = do
matcher <- getmatcher file
checkMatcher matcher Nothing afile S.empty notconfigured d
where
afile = AssociatedFile (Just file)
-- checkMatcher will never use this, because afile is provided.
d = return True
checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Annex Bool -> Annex Bool -> Annex Bool
checkMatcher matcher mkey afile notpresent notconfigured d
| isEmpty (fst matcher) = notconfigured
| otherwise = case (mkey, afile) of
(_, AssociatedFile (Just file)) ->
go =<< fileMatchInfo file mkey
(Just key, AssociatedFile Nothing) ->
let i = ProvidedInfo
{ providedFilePath = Nothing
, providedKey = Just key
, providedFileSize = Nothing
, providedMimeType = Nothing
, providedMimeEncoding = Nothing
, providedLinkType = Nothing
}
in go (MatchingInfo i)
(Nothing, _) -> d
where
go mi = checkMatcher' matcher mi notpresent
checkMatcher' :: FileMatcher Annex -> MatchInfo -> AssumeNotPresent -> Annex Bool
checkMatcher' (matcher, (MatcherDesc matcherdesc)) mi notpresent = do
(matches, desc) <- runWriterT $ matchMrun' matcher $ \op ->
matchAction op notpresent mi
explain (mkActionItem mi) $ UnquotedString <$>
describeMatchResult matchDesc desc
((if matches then "matches " else "does not match ") ++ matcherdesc ++ ": ")
return matches
fileMatchInfo :: RawFilePath -> Maybe Key -> Annex MatchInfo
fileMatchInfo file mkey = do
matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
return $ MatchingFile FileInfo
{ matchFile = matchfile
, contentFile = file
, matchKey = mkey
}
matchAll :: Matcher (MatchFiles Annex)
matchAll = generate []
parsedToMatcher :: MatcherDesc -> [ParseResult (MatchFiles Annex)] -> Either String (FileMatcher Annex)
parsedToMatcher matcherdesc parsed = case partitionEithers parsed of
([], vs) -> Right (generate vs, matcherdesc)
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
data ParseToken t
= SimpleToken String (ParseResult t)
| ValueToken String (String -> ParseResult t)
type ParseResult t = Either String (Token t)
parseToken :: [ParseToken t] -> String -> ParseResult t
parseToken l t = case syntaxToken t of
Right st -> Right st
Left _ -> go l
where
go [] = Left $ "near " ++ show t
go (SimpleToken s r : _) | s == t = r
go (ValueToken s mkr : _) | s == k = mkr v
go (_ : ps) = go ps
(k, v) = separate (== '=') t
{- This is really dumb tokenization; there's no support for quoted values.
- Open and close parens are always treated as standalone tokens;
- otherwise tokens must be separated by whitespace. -}
tokenizeMatcher :: String -> [String]
tokenizeMatcher = filter (not . null) . concatMap splitparens . words
where
splitparens = segmentDelim (`elem` "()")
commonTokens :: LimitBy -> [ParseToken (MatchFiles Annex)]
commonTokens lb =
[ SimpleToken "anything" (simply limitAnything)
, SimpleToken "nothing" (simply limitNothing)
, ValueToken "include" (usev limitInclude)
, ValueToken "exclude" (usev limitExclude)
, ValueToken "largerthan" (usev $ limitSize lb "largerthan" (>))
, ValueToken "smallerthan" (usev $ limitSize lb "smallerthan" (<))
, SimpleToken "unused" (simply limitUnused)
]
data PreferredContentData = PCD
{ matchStandard :: Either String (Matcher (MatchFiles Annex))
, matchGroupWanted :: Either String (Matcher (MatchFiles Annex))
, getGroupMap :: Annex GroupMap
, configMap :: M.Map UUID RemoteConfig
, repoUUID :: Maybe UUID
}
preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentTokens pcd =
[ SimpleToken "standard" (call "standard" $ matchStandard pcd)
, SimpleToken "groupwanted" (call "groupwanted" $ matchGroupWanted pcd)
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir "inpreferreddir")
, SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
, SimpleToken "securehash" (simply limitSecureHash)
, ValueToken "copies" (usev limitCopies)
, ValueToken "lackingcopies" (usev $ limitLackingCopies "lackingcopies" False)
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies "approxlackingcopies" True)
, ValueToken "inbackend" (usev limitInBackend)
, ValueToken "metadata" (usev limitMetaData)
, ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd)
, ValueToken "onlyingroup" (usev $ limitOnlyInGroup $ getGroupMap pcd)
, ValueToken "balanced" (usev $ limitBalanced (repoUUID pcd) (getGroupMap pcd))
, ValueToken "fullybalanced" (usev $ limitFullyBalanced (repoUUID pcd) (getGroupMap pcd))
] ++ commonTokens LimitAnnexFiles
where
preferreddir = maybe "public" fromProposedAccepted $
M.lookup preferreddirField =<< (`M.lookup` configMap pcd) =<< repoUUID pcd
preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher
mkMatchExpressionParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
mkMatchExpressionParser = do
#ifdef WITH_MAGICMIME
magicmime <- liftIO initMagicMime
let mimer n f = ValueToken n (usev $ f magicmime)
#else
let mimer n = ValueToken n $
const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
#endif
let parse = parseToken $
commonTokens LimitDiskFiles ++
#ifdef WITH_MAGICMIME
[ mimer "mimetype" $
matchMagic "mimetype" getMagicMimeType providedMimeType userProvidedMimeType
, mimer "mimeencoding" $
matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding userProvidedMimeEncoding
]
#else
[ mimer "mimetype"
, mimer "mimeencoding"
]
#endif
return $ map parse . tokenizeMatcher
{- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git.
-
- annex.largefiles is configured in git config, or git attributes,
- or global git-annex config, in that order.
-}
largeFilesMatcher :: Annex GetFileMatcher
largeFilesMatcher = go =<< getGitConfigVal' annexLargeFiles
where
matcherdesc = MatcherDesc "annex.largefiles"
go (HasGitConfig (Just expr)) = do
matcher <- mkmatcher expr "git config"
return $ const $ return matcher
go v = return $ \file -> do
expr <- checkAttr "annex.largefiles" file
if null expr
then case v of
HasGlobalConfig (Just expr') ->
mkmatcher expr' "git-annex config"
_ -> return (matchAll, matcherdesc)
else mkmatcher expr "gitattributes"
mkmatcher expr cfgfrom = do
parser <- mkMatchExpressionParser
either (badexpr cfgfrom) return $ parsedToMatcher matcherdesc $ parser expr
badexpr cfgfrom e = giveup $ "bad annex.largefiles configuration in " ++ cfgfrom ++ ": " ++ e
newtype AddUnlockedMatcher = AddUnlockedMatcher (FileMatcher Annex)
addUnlockedMatcher :: Annex AddUnlockedMatcher
addUnlockedMatcher = AddUnlockedMatcher <$>
(go =<< getGitConfigVal' annexAddUnlocked)
where
go (HasGitConfig (Just expr)) = mkmatcher expr "git config"
go (HasGlobalConfig (Just expr)) = mkmatcher expr "git annex config"
go _ = matchalways False
matcherdesc = MatcherDesc "annex.addunlocked"
mkmatcher :: String -> String -> Annex (FileMatcher Annex)
mkmatcher expr cfgfrom = case Git.Config.isTrueFalse expr of
Just b -> matchalways b
Nothing -> do
parser <- mkMatchExpressionParser
either (badexpr cfgfrom) return $ parsedToMatcher matcherdesc $ parser expr
badexpr cfgfrom e = giveup $ "bad annex.addunlocked configuration in " ++ cfgfrom ++ ": " ++ e
matchalways True = return (MOp limitAnything, matcherdesc)
matchalways False = return (MOp limitNothing, matcherdesc)
checkAddUnlockedMatcher :: AddUnlockedMatcher -> MatchInfo -> Annex Bool
checkAddUnlockedMatcher (AddUnlockedMatcher matcher) mi =
checkMatcher' matcher mi S.empty
simply :: MatchFiles Annex -> ParseResult (MatchFiles Annex)
simply = Right . Operation
usev :: MkLimit Annex -> String -> ParseResult (MatchFiles Annex)
usev a v = Operation <$> a v
call :: String -> Either String (Matcher (MatchFiles Annex)) -> ParseResult (MatchFiles Annex)
call desc (Right sub) = Right $ Operation $ MatchFiles
{ matchAction = \notpresent mi ->
matchMrun sub $ \o -> matchAction o notpresent mi
, matchNeedsFileName = any matchNeedsFileName sub
, matchNeedsFileContent = any matchNeedsFileContent sub
, matchNeedsKey = any matchNeedsKey sub
, matchNeedsLocationLog = any matchNeedsLocationLog sub
, matchDesc = matchDescSimple desc
}
call _ (Left err) = Left err

155
Annex/Fixup.hs Normal file
View file

@ -0,0 +1,155 @@
{- git-annex repository fixups
-
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Fixup where
import Git.Types
import Git.Config
import Types.GitConfig
import Utility.Path
import Utility.Path.AbsRel
import Utility.SafeCommand
import Utility.Directory
import Utility.Exception
import Utility.Monad
import Utility.FileSystemEncoding
import qualified Utility.RawFilePath as R
import Utility.PartialPrelude
import System.IO
import Data.List
import Data.Maybe
import Control.Monad
import Control.Monad.IfElse
import qualified Data.Map as M
import qualified Data.ByteString as S
import System.FilePath.ByteString
import Control.Applicative
import Prelude
fixupRepo :: Repo -> GitConfig -> IO Repo
fixupRepo r c = do
let r' = disableWildcardExpansion r
r'' <- fixupUnusualRepos r' c
if annexDirect c
then return (fixupDirect r'')
else return r''
{- Disable git's built-in wildcard expansion, which is not wanted
- when using it as plumbing by git-annex. -}
disableWildcardExpansion :: Repo -> Repo
disableWildcardExpansion r = r
{ gitGlobalOpts = gitGlobalOpts r ++ [Param "--literal-pathspecs"] }
{- Direct mode repos have core.bare=true, but are not really bare.
- Fix up the Repo to be a non-bare repo, and arrange for git commands
- run by git-annex to be passed parameters that override this setting. -}
fixupDirect :: Repo -> Repo
fixupDirect r@(Repo { location = l@(Local { gitdir = d, worktree = Nothing }) }) = do
r
{ location = l { worktree = Just (parentDir d) }
, gitGlobalOpts = gitGlobalOpts r ++
[ Param "-c"
, Param $ fromConfigKey coreBare ++ "=" ++ boolConfig False
]
}
fixupDirect r = r
{- Submodules have their gitdir containing ".git/modules/", and
- have core.worktree set, and also have a .git file in the top
- of the repo. We need to unset core.worktree, and change the .git
- file into a symlink to the git directory. This way, annex symlinks will be
- of the usual .git/annex/object form, and will consistently work
- whether a repo is used as a submodule or not, and wheverever the
- submodule is mounted.
-
- git-worktree directories have a .git file.
- That needs to be converted to a symlink, and .git/annex made a symlink
- to the main repository's git-annex directory.
- The worktree shares git config with the main repository, so the same
- annex uuid and other configuration will be used in the worktree as in
- the main repository.
-
- git clone or init with --separate-git-dir similarly makes a .git file,
- which in that case points to a different git directory. It's
- also converted to a symlink so links to .git/annex will work.
-
- When the filesystem doesn't support symlinks, we cannot make .git
- into a symlink. But we don't need too, since the repo will use adjusted
- unlocked branches.
-
- Don't do any of this if the repo has not been initialized for git-annex
- use yet.
-}
fixupUnusualRepos :: Repo -> GitConfig -> IO Repo
fixupUnusualRepos r@(Repo { location = l@(Local { worktree = Just w, gitdir = d }) }) c
| isNothing (annexVersion c) = return r
| needsSubmoduleFixup r = do
when (coreSymlinks c) $
(replacedotgit >> unsetcoreworktree)
`catchNonAsync` \e -> hPutStrLn stderr $
"warning: unable to convert submodule to form that will work with git-annex: " ++ show e
return $ r'
{ config = M.delete "core.worktree" (config r)
}
| otherwise = ifM (needsGitLinkFixup r)
( do
when (coreSymlinks c) $
(replacedotgit >> worktreefixup)
`catchNonAsync` \e -> hPutStrLn stderr $
"warning: unable to convert .git file to symlink that will work with git-annex: " ++ show e
return r'
, return r
)
where
dotgit = w </> ".git"
replacedotgit = whenM (doesFileExist (fromRawFilePath dotgit)) $ do
linktarget <- relPathDirToFile w d
removeWhenExistsWith R.removeLink dotgit
R.createSymbolicLink linktarget dotgit
-- Unsetting a config fails if it's not set, so ignore failure.
unsetcoreworktree = void $ Git.Config.unset "core.worktree" r
worktreefixup =
-- git-worktree sets up a "commondir" file that contains
-- the path to the main git directory.
-- Using --separate-git-dir does not.
catchDefaultIO Nothing (headMaybe . lines <$> readFile (fromRawFilePath (d </> "commondir"))) >>= \case
Just gd -> do
-- Make the worktree's git directory
-- contain an annex symlink to the main
-- repository's annex directory.
let linktarget = toRawFilePath gd </> "annex"
R.createSymbolicLink linktarget
(dotgit </> "annex")
Nothing -> return ()
-- Repo adjusted, so that symlinks to objects that get checked
-- in will have the usual path, rather than pointing off to the
-- real .git directory.
r'
| coreSymlinks c = r { location = l { gitdir = dotgit } }
| otherwise = r
fixupUnusualRepos r _ = return r
needsSubmoduleFixup :: Repo -> Bool
needsSubmoduleFixup (Repo { location = (Local { worktree = Just _, gitdir = d }) }) =
(".git" </> "modules") `S.isInfixOf` d
needsSubmoduleFixup _ = False
needsGitLinkFixup :: Repo -> IO Bool
needsGitLinkFixup (Repo { location = (Local { worktree = Just wt, gitdir = d }) })
-- Optimization: Avoid statting .git in the common case; only
-- when the gitdir is not in the usual place inside the worktree
-- might .git be a file.
| wt </> ".git" == d = return False
| otherwise = doesFileExist (fromRawFilePath (wt </> ".git"))
needsGitLinkFixup _ = return False

124
Annex/GitOverlay.hs Normal file
View file

@ -0,0 +1,124 @@
{- Temporarily changing how git-annex runs git commands.
-
- Copyright 2014-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.GitOverlay (
module Annex.GitOverlay,
AltIndexFile(..),
) where
import qualified Control.Exception as E
import Annex.Common
import Types.IndexFiles
import Git
import Git.Types
import Git.Index
import Git.Env
import qualified Annex
import qualified Annex.Queue
import Config.Smudge
{- Runs an action using a different git index file. -}
withIndexFile :: AltIndexFile -> (FilePath -> Annex a) -> Annex a
withIndexFile i = withAltRepo usecachedgitenv restoregitenv
where
-- This is an optimisation. Since withIndexFile is run repeatedly,
-- typically with the same file, and addGitEnv uses the slow
-- getEnvironment when gitEnv is Nothing, and has to do a
-- nontrivial amount of work, we cache the modified environment
-- the first time, and reuse it in subsequent calls for the same
-- index file.
--
-- (This could be done at another level; eg when creating the
-- Git object in the first place, but it's more efficient to let
-- the environment be inherited in all calls to git where it
-- does not need to be modified.)
--
-- Also, the use of AltIndexFile avoids needing to construct
-- the FilePath each time, which saves enough time to be worth the
-- added complication.
usecachedgitenv g = case gitEnv g of
Nothing -> Annex.withState $ \s -> case Annex.cachedgitenv s of
Just (cachedi, cachedf, cachede) | i == cachedi ->
return (s, (g { gitEnv = Just cachede }, cachedf))
_ -> do
r@(g', f) <- addindex g
let cache = (,,)
<$> Just i
<*> Just f
<*> gitEnv g'
return (s { Annex.cachedgitenv = cache }, r)
Just _ -> liftIO $ addindex g
addindex g = do
f <- indexEnvVal $ case i of
AnnexIndexFile -> gitAnnexIndex g
ViewIndexFile -> gitAnnexViewIndex g
g' <- addGitEnv g indexEnv f
return (g', f)
restoregitenv g g' = g' { gitEnv = gitEnv g }
{- Runs an action using a different git work tree.
-
- Smudge and clean filters are disabled in this work tree. -}
withWorkTree :: FilePath -> Annex a -> Annex a
withWorkTree d a = withAltRepo
(\g -> return $ (g { location = modlocation (location g), gitGlobalOpts = gitGlobalOpts g ++ bypassSmudgeConfig }, ()))
(\g g' -> g' { location = location g, gitGlobalOpts = gitGlobalOpts g })
(const a)
where
modlocation l@(Local {}) = l { worktree = Just (toRawFilePath d) }
modlocation _ = giveup "withWorkTree of non-local git repo"
{- Runs an action with the git index file and HEAD, and a few other
- files that are related to the work tree coming from an overlay
- directory other than the usual. This is done by pointing
- GIT_COMMON_DIR at the regular git directory, and GIT_DIR at the
- overlay directory.
-
- Needs git 2.2.0 or newer.
-}
withWorkTreeRelated :: FilePath -> Annex a -> Annex a
withWorkTreeRelated d a = withAltRepo modrepo unmodrepo (const a)
where
modrepo g = liftIO $ do
g' <- addGitEnv g "GIT_COMMON_DIR" . fromRawFilePath
=<< absPath (localGitDir g)
g'' <- addGitEnv g' "GIT_DIR" d
return (g'' { gitEnvOverridesGitDir = True }, ())
unmodrepo g g' = g'
{ gitEnv = gitEnv g
, gitEnvOverridesGitDir = gitEnvOverridesGitDir g
}
withAltRepo
:: (Repo -> Annex (Repo, t))
-- ^ modify Repo
-> (Repo -> Repo -> Repo)
-- ^ undo modifications; first Repo is the original and second
-- is the one after running the action.
-> (t -> Annex a)
-> Annex a
withAltRepo modrepo unmodrepo a = do
g <- gitRepo
(g', t) <- modrepo g
q <- Annex.Queue.get
v <- tryNonAsync $ do
Annex.changeState $ \s -> s
{ Annex.repo = g'
-- Start a separate queue for any changes made
-- with the modified repo.
, Annex.repoqueue = Nothing
}
a t
void $ tryNonAsync Annex.Queue.flush
Annex.changeState $ \s -> s
{ Annex.repo = unmodrepo g (Annex.repo s)
, Annex.repoqueue = Just q
}
either E.throw return v

66
Annex/HashObject.hs Normal file
View file

@ -0,0 +1,66 @@
{- git hash-object interface
-
- Copyright 2016-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.HashObject (
hashFile,
hashBlob,
hashObjectStop,
mkConcurrentHashObjectHandle,
withHashObjectHandle,
) where
import Annex.Common
import qualified Git.HashObject
import qualified Annex
import Git.Types
import Utility.ResourcePool
import Types.Concurrency
import Annex.Concurrent.Utility
hashObjectStop :: Annex ()
hashObjectStop = maybe noop stop =<< Annex.getState Annex.hashobjecthandle
where
stop p = do
liftIO $ freeResourcePool p Git.HashObject.hashObjectStop
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Nothing }
hashFile :: RawFilePath -> Annex Sha
hashFile f = withHashObjectHandle $ \h ->
liftIO $ Git.HashObject.hashFile h f
{- Note that the content will be written to a temp file.
- So it may be faster to use Git.HashObject.hashObject for large
- blob contents. -}
hashBlob :: Git.HashObject.HashableBlob b => b -> Annex Sha
hashBlob content = withHashObjectHandle $ \h ->
liftIO $ Git.HashObject.hashBlob h content
withHashObjectHandle :: (Git.HashObject.HashObjectHandle -> Annex a) -> Annex a
withHashObjectHandle a =
maybe mkpool go =<< Annex.getState Annex.hashobjecthandle
where
go p = withResourcePool p start a
start = inRepo $ Git.HashObject.hashObjectStart True
mkpool = do
-- This only runs in non-concurrent code paths;
-- a concurrent pool is set up earlier when needed.
p <- mkResourcePoolNonConcurrent start
Annex.changeState $ \s -> s { Annex.hashobjecthandle = Just p }
go p
mkConcurrentHashObjectHandle :: Concurrency -> Annex (ResourcePool Git.HashObject.HashObjectHandle)
mkConcurrentHashObjectHandle c =
Annex.getState Annex.hashobjecthandle >>= \case
Just p@(ResourcePool {}) -> return p
_ -> mkResourcePool =<< liftIO (maxHashObjects c)
{- git hash-object is typically CPU bound, and is not likely to be the main
- bottleneck for any command. So limit to the number of CPU cores, maximum,
- while respecting the -Jn value.
-}
maxHashObjects :: Concurrency -> IO Int
maxHashObjects = concurrencyUpToCpus

88
Annex/Hook.hs Normal file
View file

@ -0,0 +1,88 @@
{- git-annex git hooks
-
- Note that it's important that the content of scripts installed by
- git-annex not change, otherwise removing old hooks using an old
- version of the script would fail.
-
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Hook where
import Annex.Common
import qualified Git.Hook as Git
import qualified Annex
import Utility.Shell
import qualified Data.Map as M
preCommitHook :: Git.Hook
preCommitHook = Git.Hook "pre-commit" (mkHookScript "git annex pre-commit .") []
postReceiveHook :: Git.Hook
postReceiveHook = Git.Hook "post-receive"
-- Only run git-annex post-receive when git-annex supports it,
-- to avoid failing if the repository with this hook is used
-- with an older version of git-annex.
(mkHookScript "if git annex post-receive --help >/dev/null 2>&1; then git annex post-receive; fi")
-- This is an old version of the hook script.
[ mkHookScript "git annex post-receive"
]
postCheckoutHook :: Git.Hook
postCheckoutHook = Git.Hook "post-checkout" smudgeHook []
postMergeHook :: Git.Hook
postMergeHook = Git.Hook "post-merge" smudgeHook []
-- Older versions of git-annex didn't support this command, but neither did
-- they support v7 repositories.
smudgeHook :: String
smudgeHook = mkHookScript "git annex smudge --update"
preCommitAnnexHook :: Git.Hook
preCommitAnnexHook = Git.Hook "pre-commit-annex" "" []
postUpdateAnnexHook :: Git.Hook
postUpdateAnnexHook = Git.Hook "post-update-annex" "" []
mkHookScript :: String -> String
mkHookScript s = unlines
[ shebang
, "# automatically configured by git-annex"
, s
]
hookWrite :: Git.Hook -> Annex ()
hookWrite h = unlessM (inRepo $ Git.hookWrite h) $
hookWarning h "already exists, not configuring"
hookUnWrite :: Git.Hook -> Annex ()
hookUnWrite h = unlessM (inRepo $ Git.hookUnWrite h) $
hookWarning h "contents modified; not deleting. Edit it to remove call to git annex."
hookWarning :: Git.Hook -> String -> Annex ()
hookWarning h msg = do
r <- gitRepo
warning $ UnquotedString $
Git.hookName h ++ " hook (" ++ Git.hookFile h r ++ ") " ++ msg
{- Runs a hook. To avoid checking if the hook exists every time,
- the existing hooks are cached. -}
runAnnexHook :: Git.Hook -> Annex ()
runAnnexHook hook = do
m <- Annex.getState Annex.existinghooks
case M.lookup hook m of
Just True -> run
Just False -> noop
Nothing -> do
exists <- inRepo $ Git.hookExists hook
Annex.changeState $ \s -> s
{ Annex.existinghooks = M.insert hook exists m }
when exists run
where
run = unlessM (inRepo $ Git.runHook hook) $ do
h <- fromRepo $ Git.hookFile hook
warning $ UnquotedString $ h ++ " failed"

1106
Annex/Import.hs Normal file

File diff suppressed because it is too large Load diff

425
Annex/Ingest.hs Normal file
View file

@ -0,0 +1,425 @@
{- git-annex content ingestion
-
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Ingest (
LockedDown(..),
LockDownConfig(..),
lockDown,
checkLockedDownWritePerms,
ingestAdd,
ingestAdd',
ingest,
ingest',
finishIngestUnlocked,
cleanOldKeys,
addSymlink,
genSymlink,
makeLink,
addUnlocked,
CheckGitIgnore(..),
gitAddParams,
addAnnexedFile,
addingExistingLink,
) where
import Annex.Common
import Types.KeySource
import Types.FileMatcher
import Backend
import Annex.Content
import Annex.Perms
import Annex.Link
import Annex.MetaData
import Annex.CurrentBranch
import Annex.CheckIgnore
import Logs.Location
import qualified Git
import qualified Annex
import qualified Database.Keys
import Config
import Utility.InodeCache
import Annex.ReplaceFile
import Utility.Tmp
import Utility.CopyFile
import Utility.Touch
import Utility.Metered
import Git.FilePath
import Annex.InodeSentinal
import Annex.AdjustedBranch
import Annex.FileMatcher
import qualified Utility.RawFilePath as R
import System.PosixCompat.Files (fileMode)
data LockedDown = LockedDown
{ lockDownConfig :: LockDownConfig
, keySource :: KeySource
}
deriving (Show)
data LockDownConfig = LockDownConfig
{ lockingFile :: Bool
-- ^ write bit removed during lock down
, hardlinkFileTmpDir :: Maybe RawFilePath
-- ^ hard link to temp directory
, checkWritePerms :: Bool
-- ^ check that write perms are successfully removed
}
deriving (Show)
{- The file that's being ingested is locked down before a key is generated,
- to prevent it from being modified in between. This lock down is not
- perfect at best (and pretty weak at worst). For example, it does not
- guard against files that are already opened for write by another process.
- So, the InodeCache can be used to detect any changes that might be made
- to the file after it was locked down.
-
- When possible, the file is hard linked to a temp directory. This guards
- against some changes, like deletion or overwrite of the file, and
- allows lsof checks to be done more efficiently when adding a lot of files.
-
- Lockdown can fail if a file gets deleted, or if it's unable to remove
- write permissions, and Nothing will be returned.
-}
lockDown :: LockDownConfig-> FilePath -> Annex (Maybe LockedDown)
lockDown cfg file = either
(\e -> warning (UnquotedString (show e)) >> return Nothing)
(return . Just)
=<< lockDown' cfg file
lockDown' :: LockDownConfig -> FilePath -> Annex (Either SomeException LockedDown)
lockDown' cfg file = tryNonAsync $ ifM crippledFileSystem
( nohardlink
, case hardlinkFileTmpDir cfg of
Nothing -> nohardlink
Just tmpdir -> withhardlink tmpdir
)
where
file' = toRawFilePath file
nohardlink = do
setperms
withTSDelta $ liftIO . nohardlink'
nohardlink' delta = do
cache <- genInodeCache file' delta
return $ LockedDown cfg $ KeySource
{ keyFilename = file'
, contentLocation = file'
, inodeCache = cache
}
withhardlink tmpdir = do
setperms
withTSDelta $ \delta -> liftIO $ do
(tmpfile, h) <- openTmpFileIn (fromRawFilePath tmpdir) $
relatedTemplate $ "ingest-" ++ takeFileName file
hClose h
removeWhenExistsWith R.removeLink (toRawFilePath tmpfile)
withhardlink' delta tmpfile
`catchIO` const (nohardlink' delta)
withhardlink' delta tmpfile = do
let tmpfile' = toRawFilePath tmpfile
R.createLink file' tmpfile'
cache <- genInodeCache tmpfile' delta
return $ LockedDown cfg $ KeySource
{ keyFilename = file'
, contentLocation = tmpfile'
, inodeCache = cache
}
setperms = when (lockingFile cfg) $ do
freezeContent file'
when (checkWritePerms cfg) $ do
qp <- coreQuotePath <$> Annex.getGitConfig
maybe noop (giveup . decodeBS . quote qp)
=<< checkLockedDownWritePerms file' file'
checkLockedDownWritePerms :: RawFilePath -> RawFilePath -> Annex (Maybe StringContainingQuotedPath)
checkLockedDownWritePerms file displayfile = checkContentWritePerm file >>= return . \case
Just False -> Just $ "Unable to remove all write permissions from "
<> QuotedPath displayfile
<> " -- perhaps it has an xattr or ACL set."
_ -> Nothing
{- Ingests a locked down file into the annex. Updates the work tree and
- index. -}
ingestAdd :: MeterUpdate -> Maybe LockedDown -> Annex (Maybe Key)
ingestAdd meterupdate ld = ingestAdd' meterupdate ld Nothing
ingestAdd' :: MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key)
ingestAdd' _ Nothing _ = return Nothing
ingestAdd' meterupdate ld@(Just (LockedDown cfg source)) mk = do
(mk', mic) <- ingest meterupdate ld mk
case mk' of
Nothing -> return Nothing
Just k -> do
let f = keyFilename source
if lockingFile cfg
then addSymlink f k mic
else do
mode <- liftIO $ catchMaybeIO $
fileMode <$> R.getFileStatus (contentLocation source)
stagePointerFile f mode =<< hashPointerFile k
return (Just k)
{- Ingests a locked down file into the annex. Does not update the working
- tree or the index. -}
ingest :: MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key, Maybe InodeCache)
ingest meterupdate ld mk = ingest' Nothing meterupdate ld mk (Restage True)
ingest' :: Maybe Backend -> MeterUpdate -> Maybe LockedDown -> Maybe Key -> Restage -> Annex (Maybe Key, Maybe InodeCache)
ingest' _ _ Nothing _ _ = return (Nothing, Nothing)
ingest' preferredbackend meterupdate (Just (LockedDown cfg source)) mk restage = withTSDelta $ \delta -> do
k <- case mk of
Nothing -> do
backend <- maybe
(chooseBackend $ keyFilename source)
return
preferredbackend
fst <$> genKey source meterupdate backend
Just k -> return k
let src = contentLocation source
ms <- liftIO $ catchMaybeIO $ R.getFileStatus src
mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms
case (mcache, inodeCache source) of
(_, Nothing) -> go k mcache
(Just newc, Just c) | compareStrong c newc -> go k mcache
_ -> failure "changed while it was being added"
where
go key mcache
| lockingFile cfg = golocked key mcache
| otherwise = gounlocked key mcache
golocked key mcache =
tryNonAsync (moveAnnex key naf (contentLocation source)) >>= \case
Right True -> success key mcache
Right False -> giveup "failed to add content to annex"
Left e -> restoreFile (keyFilename source) key e
-- moveAnnex uses the AssociatedFile provided to it to unlock
-- locked files when getting a file in an adjusted branch.
-- That case does not apply here, where we're adding an unlocked
-- file, so provide it nothing.
naf = AssociatedFile Nothing
gounlocked key (Just cache) = do
-- Remove temp directory hard link first because
-- linkToAnnex falls back to copying if a file
-- already has a hard link.
cleanCruft source
cleanOldKeys (keyFilename source) key
linkToAnnex key (keyFilename source) (Just cache) >>= \case
LinkAnnexFailed -> failure "failed to link to annex"
lar -> do
finishIngestUnlocked' key source restage (Just lar)
success key (Just cache)
gounlocked _ _ = failure "failed statting file"
success k mcache = do
genMetaData k (keyFilename source) (fmap inodeCacheToMtime mcache)
return (Just k, mcache)
failure msg = do
warning $ QuotedPath (keyFilename source) <> " " <> UnquotedString msg
cleanCruft source
return (Nothing, Nothing)
finishIngestUnlocked :: Key -> KeySource -> Annex ()
finishIngestUnlocked key source = do
cleanCruft source
finishIngestUnlocked' key source (Restage True) Nothing
finishIngestUnlocked' :: Key -> KeySource -> Restage -> Maybe LinkAnnexResult -> Annex ()
finishIngestUnlocked' key source restage lar = do
Database.Keys.addAssociatedFile key
=<< inRepo (toTopFilePath (keyFilename source))
populateUnlockedFiles key source restage lar
{- Copy to any other unlocked files using the same key.
-
- When linkToAnnex did not have to do anything, the object file
- was already present, and so other unlocked files are already populated,
- and nothing needs to be done here.
-}
populateUnlockedFiles :: Key -> KeySource -> Restage -> Maybe LinkAnnexResult -> Annex ()
populateUnlockedFiles _ _ _ (Just LinkAnnexNoop) = return ()
populateUnlockedFiles key source restage _ = do
obj <- calcRepo (gitAnnexLocation key)
g <- Annex.gitRepo
ingestedf <- flip fromTopFilePath g
<$> inRepo (toTopFilePath (keyFilename source))
afs <- map (`fromTopFilePath` g) <$> Database.Keys.getAssociatedFiles key
forM_ (filter (/= ingestedf) afs) $
populatePointerFile restage key obj
cleanCruft :: KeySource -> Annex ()
cleanCruft source = when (contentLocation source /= keyFilename source) $
liftIO $ removeWhenExistsWith R.removeLink $ contentLocation source
-- If a worktree file was was hard linked to an annex object before,
-- modifying the file would have caused the object to have the wrong
-- content. Clean up from that.
cleanOldKeys :: RawFilePath -> Key -> Annex ()
cleanOldKeys file newkey = do
g <- Annex.gitRepo
topf <- inRepo (toTopFilePath file)
ingestedf <- fromRepo $ fromTopFilePath topf
oldkeys <- filter (/= newkey)
<$> Database.Keys.getAssociatedKey topf
forM_ oldkeys $ \key ->
unlessM (isUnmodified key =<< calcRepo (gitAnnexLocation key)) $ do
caches <- Database.Keys.getInodeCaches key
unlinkAnnex key
fs <- filter (/= ingestedf)
. map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
filterM (`sameInodeCache` caches) fs >>= \case
-- If linkToAnnex fails, the associated
-- file with the content is still present,
-- so no need for any recovery.
(f:_) -> do
ic <- withTSDelta (liftIO . genInodeCache f)
void $ linkToAnnex key f ic
_ -> logStatus key InfoMissing
{- On error, put the file back so it doesn't seem to have vanished.
- This can be called before or after the symlink is in place. -}
restoreFile :: RawFilePath -> Key -> SomeException -> Annex a
restoreFile file key e = do
whenM (inAnnex key) $ do
liftIO $ removeWhenExistsWith R.removeLink file
-- The key could be used by other files too, so leave the
-- content in the annex, and make a copy back to the file.
obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
unlessM (liftIO $ copyFileExternal CopyTimeStamps obj (fromRawFilePath file)) $
warning $ "Unable to restore content of " <> QuotedPath file <> "; it should be located in " <> QuotedPath (toRawFilePath obj)
thawContent file
throwM e
{- Creates the symlink to the annexed content, returns the link target. -}
makeLink :: RawFilePath -> Key -> Maybe InodeCache -> Annex LinkTarget
makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do
l <- calcRepo $ gitAnnexLink file key
replaceWorkTreeFile file' $ makeAnnexLink l
-- touch symlink to have same time as the original file,
-- as provided in the InodeCache
case mcache of
Just c -> liftIO $ touch file (inodeCacheToMtime c) False
Nothing -> noop
return l
where
file' = fromRawFilePath file
{- Creates the symlink to the annexed content, and stages it in git. -}
addSymlink :: RawFilePath -> Key -> Maybe InodeCache -> Annex ()
addSymlink file key mcache = stageSymlink file =<< genSymlink file key mcache
genSymlink :: RawFilePath -> Key -> Maybe InodeCache -> Annex Git.Sha
genSymlink file key mcache = do
linktarget <- makeLink file key mcache
hashSymlink linktarget
{- Parameters to pass to git add, forcing addition of ignored files.
-
- Note that, when git add is being run on an ignored file that is already
- checked in, CheckGitIgnore True has no effect.
-}
gitAddParams :: CheckGitIgnore -> Annex [CommandParam]
gitAddParams (CheckGitIgnore True) = ifM (Annex.getRead Annex.force)
( return [Param "-f"]
, return []
)
gitAddParams (CheckGitIgnore False) = return [Param "-f"]
{- Whether a file should be added unlocked or not. Default is to not,
- unless symlinks are not supported. annex.addunlocked can override that.
- Also, when in an adjusted branch that unlocked files, always add files
- unlocked.
-}
addUnlocked :: AddUnlockedMatcher -> MatchInfo -> Bool -> Annex Bool
addUnlocked matcher mi contentpresent =
((not . coreSymlinks <$> Annex.getGitConfig) <||>
(checkAddUnlockedMatcher matcher mi) <||>
(maybe False go . snd <$> getCurrentBranch)
)
where
go (LinkAdjustment UnlockAdjustment) = True
go (LinkAdjustment LockAdjustment) = False
go (LinkAdjustment FixAdjustment) = False
go (LinkAdjustment UnFixAdjustment) = False
go (PresenceAdjustment _ (Just la)) = go (LinkAdjustment la)
go (PresenceAdjustment _ Nothing) = False
go (LinkPresentAdjustment UnlockPresentAdjustment) = contentpresent
go (LinkPresentAdjustment LockPresentAdjustment) = False
{- Adds a file to the work tree for the key, and stages it in the index.
- The content of the key may be provided in a temp file, which will be
- moved into place. If no content is provided, adds an annex link but does
- not ingest the content.
-
- When the content of the key is not accepted into the annex, returns False.
-}
addAnnexedFile :: AddUnlockedMatcher -> RawFilePath -> Key -> Maybe RawFilePath -> Annex Bool
addAnnexedFile matcher file key mtmp = ifM (addUnlocked matcher mi (isJust mtmp))
( do
mode <- maybe
(pure Nothing)
(\tmp -> liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus tmp)
mtmp
stagePointerFile file mode =<< hashPointerFile key
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
case mtmp of
Just tmp -> ifM (moveAnnex key af tmp)
( linkunlocked mode >> return True
, writepointer mode >> return False
)
Nothing -> ifM (inAnnex key)
( linkunlocked mode >> return True
, writepointer mode >> return True
)
, do
addSymlink file key Nothing
case mtmp of
Just tmp -> moveAnnex key af tmp
Nothing -> return True
)
where
af = AssociatedFile (Just file)
mi = case mtmp of
Just tmp -> MatchingFile $ FileInfo
{ contentFile = tmp
, matchFile = file
, matchKey = Just key
}
Nothing -> keyMatchInfoWithoutContent key file
linkunlocked mode = linkFromAnnex key file mode >>= \case
LinkAnnexFailed -> writepointer mode
_ -> return ()
writepointer mode = liftIO $ writePointerFile file key mode
{- Use with actions that add an already existing annex symlink or pointer
- file. The warning avoids a confusing situation where the file got copied
- from another git-annex repo, probably by accident. -}
addingExistingLink :: RawFilePath -> Key -> Annex a -> Annex a
addingExistingLink f k a = do
unlessM (isKnownKey k <||> inAnnex k) $ do
islink <- isJust <$> isAnnexLink f
warning $
QuotedPath f
<> " is a git-annex "
<> if islink then "symlink." else "pointer file."
<> " Its content is not available in this repository."
<> " (Maybe " <> QuotedPath f <> " was copied from another repository?)"
a

475
Annex/Init.hs Normal file
View file

@ -0,0 +1,475 @@
{- git-annex repository initialization
-
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Init (
checkInitializeAllowed,
ensureInitialized,
autoInitialize,
autoInitialize',
isInitialized,
initialize,
initialize',
uninitialize,
probeCrippledFileSystem,
probeCrippledFileSystem',
) where
import Annex.Common
import qualified Annex
import qualified Git
import qualified Git.Config
import qualified Git.Objects
import Git.Types (fromConfigValue)
import Git.ConfigTypes (SharedRepository(..))
import qualified Annex.Branch
import qualified Database.Fsck
import Logs.UUID
import Logs.Trust.Basic
import Logs.Config
import Types.TrustLevel
import Types.RepoVersion
import Annex.Version
import Annex.Difference
import Annex.UUID
import Annex.Fixup
import Annex.Path
import Config
import Config.Files
import Config.Smudge
import qualified Upgrade.V5.Direct as Direct
import qualified Annex.AdjustedBranch as AdjustedBranch
import Remote.List.Util (remotesChanged)
import Annex.Environment
import Annex.Hook
import Annex.InodeSentinal
import Upgrade
import Annex.Tmp
import Utility.UserInfo
import Annex.Perms
#ifndef mingw32_HOST_OS
import Utility.ThreadScheduler
import qualified Utility.RawFilePath as R
import Utility.FileMode
import System.Posix.User
import qualified Utility.LockFile.Posix as Posix
#endif
import qualified Data.Map as M
import Control.Monad.IO.Class (MonadIO)
#ifndef mingw32_HOST_OS
import System.PosixCompat.Files (ownerReadMode, isNamedPipe)
import Data.Either
import qualified System.FilePath.ByteString as P
import Control.Concurrent.Async
#endif
data InitializeAllowed = InitializeAllowed
checkInitializeAllowed :: (InitializeAllowed -> Annex a) -> Annex a
checkInitializeAllowed a = guardSafeToUseRepo $ noAnnexFileContent' >>= \case
Nothing -> do
checkSqliteWorks
a InitializeAllowed
Just noannexmsg -> do
warning "Initialization prevented by .noannex file (remove the file to override)"
unless (null noannexmsg) $
warning (UnquotedString noannexmsg)
giveup "Not initialized."
initializeAllowed :: Annex Bool
initializeAllowed = isNothing <$> noAnnexFileContent'
noAnnexFileContent' :: Annex (Maybe String)
noAnnexFileContent' = inRepo $
noAnnexFileContent . fmap fromRawFilePath . Git.repoWorkTree
genDescription :: Maybe String -> Annex UUIDDesc
genDescription (Just d) = return $ UUIDDesc $ encodeBS d
genDescription Nothing = do
reldir <- liftIO . relHome . fromRawFilePath
=<< liftIO . absPath
=<< fromRepo Git.repoPath
hostname <- fromMaybe "" <$> liftIO getHostname
let at = if null hostname then "" else "@"
v <- liftIO myUserName
return $ UUIDDesc $ encodeBS $ concat $ case v of
Right username -> [username, at, hostname, ":", reldir]
Left _ -> [hostname, ":", reldir]
initialize :: Annex () -> Maybe String -> Maybe RepoVersion -> Annex ()
initialize startupannex mdescription mversion = checkInitializeAllowed $ \initallowed -> do
{- Has to come before any commits are made as the shared
- clone heuristic expects no local objects. -}
sharedclone <- checkSharedClone
{- This will make the first commit to git, so ensure git is set up
- properly to allow commits when running it. -}
ensureCommit $ Annex.Branch.create
prepUUID
initialize' startupannex mversion initallowed
initSharedClone sharedclone
u <- getUUID
when (u == NoUUID) $
giveup "Failed to read annex.uuid from git config after setting it. This should never happen. Please file a bug report."
{- Avoid overwriting existing description with a default
- description. -}
whenM (pure (isJust mdescription) <||> not . M.member u <$> uuidDescMapRaw) $
describeUUID u =<< genDescription mdescription
-- Everything except for uuid setup, shared clone setup, and initial
-- description.
initialize' :: Annex () -> Maybe RepoVersion -> InitializeAllowed -> Annex ()
initialize' startupannex mversion _initallowed = do
checkLockSupport
checkFifoSupport
checkCrippledFileSystem
unlessM isBareRepo $ do
hookWrite preCommitHook
hookWrite postReceiveHook
setDifferences
unlessM (isJust <$> getVersion) $
setVersion (fromMaybe defaultVersion mversion)
supportunlocked <- annexSupportUnlocked <$> Annex.getGitConfig
if supportunlocked
then configureSmudgeFilter
else deconfigureSmudgeFilter
unlessM isBareRepo $ do
hookWrite postCheckoutHook
hookWrite postMergeHook
AdjustedBranch.checkAdjustedClone >>= \case
AdjustedBranch.InAdjustedClone -> return ()
AdjustedBranch.NotInAdjustedClone ->
ifM (crippledFileSystem <&&> (not <$> isBareRepo))
( AdjustedBranch.adjustToCrippledFileSystem
-- Handle case where this repo was cloned from a
-- direct mode repo
, unlessM isBareRepo
Direct.switchHEADBack
)
propigateSecureHashesOnly
createInodeSentinalFile False
fixupUnusualReposAfterInit
-- This is usually run at Annex startup, but when git-annex was
-- not already initialized, it will not yet have run.
startupannex
uninitialize :: Annex ()
uninitialize = do
-- Remove hooks that are written when initializing.
hookUnWrite preCommitHook
hookUnWrite postReceiveHook
hookUnWrite postCheckoutHook
hookUnWrite postMergeHook
deconfigureSmudgeFilter
removeRepoUUID
removeVersion
{- Gets the version that the repo is initialized with.
-
- To make sure the repo is fully initialized, also checks that it has a
- uuid configured. In the unusual case where one is set and the other is
- not, errors out to avoid running in an inconsistent state.
-}
getInitializedVersion :: Annex (Maybe RepoVersion)
getInitializedVersion = do
um <- (\u -> if u == NoUUID then Nothing else Just u) <$> getUUID
vm <- getVersion
case (um, vm) of
(Just _, Just v) -> return (Just v)
(Nothing, Nothing) -> return Nothing
(Just _, Nothing) -> onemissing "annex.version" "annex.uuid"
(Nothing, Just _) -> onemissing "annex.uuid" "annex.version"
where
onemissing missing have = giveup $ unwords
[ "This repository has " ++ have ++ " set,"
, "but " ++ missing ++ " is not set. Perhaps that"
, "git config was lost. Cannot use the repository"
, "in this state; set back " ++ missing ++ " to fix this."
]
{- Will automatically initialize if there is already a git-annex
- branch from somewhere. Otherwise, require a manual init
- to avoid git-annex accidentally being run in git
- repos that did not intend to use it.
-
- Checks repository version and handles upgrades too.
-}
ensureInitialized :: Annex () -> Annex [Remote] -> Annex ()
ensureInitialized startupannex remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade
where
needsinit = ifM autoInitializeAllowed
( do
tryNonAsync (initialize startupannex Nothing Nothing) >>= \case
Right () -> noop
Left e -> giveup $ show e ++ "\n" ++
"git-annex: automatic initialization failed due to above problems"
autoEnableSpecialRemotes remotelist
, giveup "First run: git-annex init"
)
{- Check if auto-initialize is allowed. -}
autoInitializeAllowed :: Annex Bool
autoInitializeAllowed = Annex.Branch.hasSibling <&&> objectDirNotPresent
objectDirNotPresent :: Annex Bool
objectDirNotPresent = do
d <- fromRawFilePath <$> fromRepo gitAnnexObjectDir
exists <- liftIO $ doesDirectoryExist d
when exists $ guardSafeToUseRepo $
giveup $ unwords $
[ "This repository is not initialized for use"
, "by git-annex, but " ++ d ++ " exists,"
, "which indicates this repository was used by"
, "git-annex before, and may have lost its"
, "annex.uuid and annex.version configs. Either"
, "set back missing configs, or run git-annex init"
, "to initialize with a new uuid."
]
return (not exists)
guardSafeToUseRepo :: Annex a -> Annex a
guardSafeToUseRepo a = ifM (inRepo Git.Config.checkRepoConfigInaccessible)
( do
repopath <- fromRepo Git.repoPath
p <- liftIO $ absPath repopath
giveup $ unlines $
[ "Git refuses to operate in this repository,"
, "probably because it is owned by someone else."
, ""
-- This mirrors git's wording.
, "To add an exception for this directory, call:"
, "\tgit config --global --add safe.directory " ++ fromRawFilePath p
]
, a
)
{- Initialize if it can do so automatically. Avoids failing if it cannot.
-
- Checks repository version and handles upgrades too.
-}
autoInitialize :: Annex () -> Annex [Remote] -> Annex ()
autoInitialize = autoInitialize' autoInitializeAllowed
autoInitialize' :: Annex Bool -> Annex () -> Annex [Remote] -> Annex ()
autoInitialize' check startupannex remotelist =
getInitializedVersion >>= maybe needsinit checkUpgrade
where
needsinit =
whenM (initializeAllowed <&&> check) $ do
initialize startupannex Nothing Nothing
autoEnableSpecialRemotes remotelist
{- Checks if a repository is initialized. Does not check version for upgrade. -}
isInitialized :: Annex Bool
isInitialized = maybe Annex.Branch.hasSibling (const $ return True) =<< getVersion
{- A crippled filesystem is one that does not allow making symlinks,
- or removing write access from files. -}
probeCrippledFileSystem :: Annex Bool
probeCrippledFileSystem = withEventuallyCleanedOtherTmp $ \tmp -> do
(r, warnings) <- probeCrippledFileSystem' tmp
(Just (freezeContent' UnShared))
(Just (thawContent' UnShared))
=<< hasFreezeHook
mapM_ (warning . UnquotedString) warnings
return r
probeCrippledFileSystem'
:: (MonadIO m, MonadCatch m)
=> RawFilePath
-> Maybe (RawFilePath -> m ())
-> Maybe (RawFilePath -> m ())
-> Bool
-> m (Bool, [String])
#ifdef mingw32_HOST_OS
probeCrippledFileSystem' _ _ _ _ = return (True, [])
#else
probeCrippledFileSystem' tmp freezecontent thawcontent hasfreezehook = do
let f = tmp P.</> "gaprobe"
let f' = fromRawFilePath f
liftIO $ writeFile f' ""
r <- probe f'
void $ tryNonAsync $ (fromMaybe (liftIO . allowWrite) thawcontent) f
liftIO $ removeFile f'
return r
where
probe f = catchDefaultIO (True, []) $ do
let f2 = f ++ "2"
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f2)
liftIO $ R.createSymbolicLink (toRawFilePath f) (toRawFilePath f2)
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f2)
(fromMaybe (liftIO . preventWrite) freezecontent) (toRawFilePath f)
-- Should be unable to write to the file (unless
-- running as root). But some crippled
-- filesystems ignore write bit removals or ignore
-- permissions entirely.
ifM ((== Just False) <$> liftIO (checkContentWritePerm' UnShared (toRawFilePath f) Nothing hasfreezehook))
( return (True, ["Filesystem does not allow removing write bit from files."])
, liftIO $ ifM ((== 0) <$> getRealUserID)
( return (False, [])
, do
r <- catchBoolIO $ do
writeFile f "2"
return True
if r
then return (True, ["Filesystem allows writing to files whose write bit is not set."])
else return (False, [])
)
)
#endif
checkCrippledFileSystem :: Annex ()
checkCrippledFileSystem = whenM probeCrippledFileSystem $ do
warning "Detected a crippled filesystem."
setCrippledFileSystem True
{- Normally git disables core.symlinks itself when the:w
-
- filesystem does not support them. But, even if symlinks are
- supported, we don't use them by default in a crippled
- filesystem. -}
whenM (coreSymlinks <$> Annex.getGitConfig) $ do
warning "Disabling core.symlinks."
setConfig "core.symlinks"
(Git.Config.boolConfig False)
probeLockSupport :: Annex Bool
#ifdef mingw32_HOST_OS
probeLockSupport = return True
#else
probeLockSupport = withEventuallyCleanedOtherTmp $ \tmp -> do
let f = tmp P.</> "lockprobe"
mode <- annexFileMode
annexrunner <- Annex.makeRunner
liftIO $ withAsync (warnstall annexrunner) (const (go f mode))
where
go f mode = do
removeWhenExistsWith R.removeLink f
let locktest = bracket
(Posix.lockExclusive (Just mode) f)
Posix.dropLock
(const noop)
ok <- isRight <$> tryNonAsync locktest
removeWhenExistsWith R.removeLink f
return ok
warnstall annexrunner = do
threadDelaySeconds (Seconds 10)
annexrunner $ do
warning "Probing the filesystem for POSIX fcntl lock support is taking a long time."
warning "(Setting annex.pidlock will avoid this probe.)"
#endif
probeFifoSupport :: Annex Bool
probeFifoSupport = do
#ifdef mingw32_HOST_OS
return False
#else
withEventuallyCleanedOtherTmp $ \tmp -> do
let f = tmp P.</> "gaprobe"
let f2 = tmp P.</> "gaprobe2"
liftIO $ do
removeWhenExistsWith R.removeLink f
removeWhenExistsWith R.removeLink f2
ms <- tryIO $ do
R.createNamedPipe f ownerReadMode
R.createLink f f2
R.getFileStatus f
removeWhenExistsWith R.removeLink f
removeWhenExistsWith R.removeLink f2
return $ either (const False) isNamedPipe ms
#endif
checkLockSupport :: Annex ()
checkLockSupport =
unlessM (annexPidLock <$> Annex.getGitConfig) $
unlessM probeLockSupport $ do
warning "Detected a filesystem without POSIX fcntl lock support."
warning "Enabling annex.pidlock."
setConfig (annexConfig "pidlock") (Git.Config.boolConfig True)
checkFifoSupport :: Annex ()
checkFifoSupport = unlessM probeFifoSupport $ do
warning "Detected a filesystem without fifo support."
warning "Disabling ssh connection caching."
setConfig (annexConfig "sshcaching") (Git.Config.boolConfig False)
{- Sqlite needs the filesystem to support range locking. Some like CIFS
- do not, which will cause sqlite to fail with ErrorBusy. -}
checkSqliteWorks :: Annex ()
checkSqliteWorks = do
u <- getUUID
tryNonAsync (Database.Fsck.openDb u >>= Database.Fsck.closeDb) >>= \case
Right () -> return ()
Left e -> do
showLongNote $ "Detected a filesystem where Sqlite does not work."
showLongNote $ UnquotedString $ "(" ++ show e ++ ")"
showLongNote $ "To work around this problem, you can set annex.dbdir " <>
"to a directory on another filesystem."
showLongNote $ "For example: git config annex.dbdir $HOME/cache/git-annex"
giveup "Not initialized."
checkSharedClone :: Annex Bool
checkSharedClone = inRepo Git.Objects.isSharedClone
initSharedClone :: Bool -> Annex ()
initSharedClone False = return ()
initSharedClone True = do
showLongNote "Repository was cloned with --shared; setting annex.hardlink=true and making repository untrusted."
u <- getUUID
trustSet u UnTrusted
setConfig (annexConfig "hardlink") (Git.Config.boolConfig True)
{- Propagate annex.securehashesonly from then global config to local
- config. This makes a clone inherit a parent's setting, but once
- a repository has a local setting, changes to the global config won't
- affect it. -}
propigateSecureHashesOnly :: Annex ()
propigateSecureHashesOnly =
maybe noop (setConfig "annex.securehashesonly" . fromConfigValue)
=<< getGlobalConfig "annex.securehashesonly"
fixupUnusualReposAfterInit :: Annex ()
fixupUnusualReposAfterInit = do
gc <- Annex.getGitConfig
void $ inRepo $ \r -> fixupUnusualRepos r gc
{- Try to enable any special remotes that are configured to do so.
-
- The enabling is done in a child process to avoid it using stdio.
-
- The remotelist should be Remote.List.remoteList, which cannot
- be imported here due to a dependency loop.
-}
autoEnableSpecialRemotes :: Annex [Remote] -> Annex ()
autoEnableSpecialRemotes remotelist = do
-- Get all existing git remotes to probe for their uuid here,
-- so it is not done inside the child process. Doing it in there
-- could result in password prompts for http credentials,
-- which would then not end up cached in this process's state.
_ <- remotelist
rp <- fromRawFilePath <$> fromRepo Git.repoPath
withNullHandle $ \nullh -> gitAnnexChildProcess "init"
[ Param "--autoenable" ]
(\p -> p
{ std_out = UseHandle nullh
, std_err = UseHandle nullh
, std_in = UseHandle nullh
, cwd = Just rp
}
)
(\_ _ _ pid -> void $ waitForProcess pid)
remotesChanged

112
Annex/InodeSentinal.hs Normal file
View file

@ -0,0 +1,112 @@
{- git-annex inode sentinal file
-
- Copyright 2012-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.InodeSentinal where
import Annex.Common
import qualified Annex
import Utility.InodeCache
import Annex.Perms
{- If the sendinal shows the inodes have changed, only the size and mtime
- are compared. -}
compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool
compareInodeCaches x y
| compareStrong x y = return True
| otherwise = ifM inodesChanged
( return $ compareWeak x y
, return False
)
compareInodeCachesWith :: Annex InodeComparisonType
compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
{- Checks if one of the provided old InodeCache matches the current
- version of a file. -}
sameInodeCache :: RawFilePath -> [InodeCache] -> Annex Bool
sameInodeCache file [] = do
fastDebug "Annex.InodeSentinal" $
fromRawFilePath file ++ " inode cache empty"
return False
sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file)
where
go Nothing = do
fastDebug "Annex.InodeSentinal" $
fromRawFilePath file ++ " not present, cannot compare with inode cache"
return False
go (Just curr) = ifM (elemInodeCaches curr old)
( return True
, do
fastDebug "Annex.InodeSentinal" $
fromRawFilePath file ++ " (" ++ show curr ++ ") does not match inode cache (" ++ show old ++ ")"
return False
)
elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool
elemInodeCaches _ [] = return False
elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l)
( return True
, elemInodeCaches c ls
)
{- Some filesystems get new inodes each time they are mounted.
- In order to work on such a filesystem, a sentinal file is used to detect
- when the inodes have changed.
-
- If the sentinal file does not exist, we have to assume that the
- inodes have changed.
-}
inodesChanged :: Annex Bool
inodesChanged = sentinalInodesChanged <$> sentinalStatus
withTSDelta :: (TSDelta -> Annex a) -> Annex a
withTSDelta a = a =<< getTSDelta
getTSDelta :: Annex TSDelta
#ifdef mingw32_HOST_OS
getTSDelta = sentinalTSDelta <$> sentinalStatus
#else
getTSDelta = pure noTSDelta -- optimisation
#endif
sentinalStatus :: Annex SentinalStatus
sentinalStatus = maybe check return =<< Annex.getState Annex.sentinalstatus
where
check = do
sc <- liftIO . checkSentinalFile =<< annexSentinalFile
Annex.changeState $ \s -> s { Annex.sentinalstatus = Just sc }
return sc
{- The sentinal file is only created when first initializing a repository.
- If there are any annexed objects in the repository already, creating
- the file would invalidate their inode caches. -}
createInodeSentinalFile :: Bool -> Annex ()
createInodeSentinalFile evenwithobjects =
unlessM (alreadyexists <||> hasobjects) $ do
s <- annexSentinalFile
createAnnexDirectory (parentDir (sentinalFile s))
liftIO $ writeSentinalFile s
setAnnexFilePerm (sentinalFile s)
setAnnexFilePerm (sentinalCacheFile s)
where
alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile
hasobjects
| evenwithobjects = pure False
| otherwise = liftIO . doesDirectoryExist . fromRawFilePath
=<< fromRepo gitAnnexObjectDir
annexSentinalFile :: Annex SentinalFile
annexSentinalFile = do
sentinalfile <- fromRepo gitAnnexInodeSentinal
sentinalcachefile <- fromRepo gitAnnexInodeSentinalCache
return SentinalFile
{ sentinalFile = sentinalfile
, sentinalCacheFile = sentinalcachefile
}

303
Annex/Journal.hs Normal file
View file

@ -0,0 +1,303 @@
{- management of the git-annex journal
-
- The journal is used to queue up changes before they are committed to the
- git-annex branch. Among other things, it ensures that if git-annex is
- interrupted, its recorded data is not lost.
-
- All files in the journal must be a series of lines separated by
- newlines.
-
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
module Annex.Journal where
import Annex.Common
import qualified Annex
import qualified Git
import Annex.Perms
import Annex.Tmp
import Annex.LockFile
import Annex.BranchState
import Types.BranchState
import Utility.Directory.Stream
import qualified Utility.RawFilePath as R
import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as B
import qualified System.FilePath.ByteString as P
import Data.ByteString.Builder
import Data.Char
class Journalable t where
writeJournalHandle :: Handle -> t -> IO ()
journalableByteString :: t -> L.ByteString
instance Journalable L.ByteString where
writeJournalHandle = L.hPut
journalableByteString = id
-- This is more efficient than the ByteString instance.
instance Journalable Builder where
writeJournalHandle = hPutBuilder
journalableByteString = toLazyByteString
{- When a file in the git-annex branch is changed, this indicates what
- repository UUID (or in some cases, UUIDs) a change is regarding.
-
- Using this lets changes regarding private UUIDs be stored separately
- from the git-annex branch, so its information does not get exposed
- outside the repo.
-}
data RegardingUUID = RegardingUUID [UUID]
regardingPrivateUUID :: RegardingUUID -> Annex Bool
regardingPrivateUUID (RegardingUUID []) = pure False
regardingPrivateUUID (RegardingUUID us) = do
s <- annexPrivateRepos <$> Annex.getGitConfig
return (any (flip S.member s) us)
{- Are any private UUIDs known to exist? If so, extra work has to be done,
- to check for information separately recorded for them, outside the usual
- locations.
-}
privateUUIDsKnown :: Annex Bool
privateUUIDsKnown = privateUUIDsKnown' <$> Annex.getState id
privateUUIDsKnown' :: Annex.AnnexState -> Bool
privateUUIDsKnown' = not . S.null . annexPrivateRepos . Annex.gitconfig
{- Records content for a file in the branch to the journal.
-
- Using the journal, rather than immediately staging content to the index
- avoids git needing to rewrite the index after every change.
-
- The file in the journal is updated atomically. This avoids an
- interrupted write truncating information that was earlier read from the
- file, and so losing data.
-}
setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex ()
setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
st <- getState
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
( return (gitAnnexPrivateJournalDir st)
, return (gitAnnexJournalDir st)
)
-- journal file is written atomically
let jfile = journalFile file
let tmpfile = tmp P.</> jfile
liftIO $ withFile (fromRawFilePath tmpfile) WriteMode $ \h ->
writeJournalHandle h content
let dest = jd P.</> jfile
let mv = do
liftIO $ moveFile tmpfile dest
setAnnexFilePerm dest
-- avoid overhead of creating the journal directory when it already
-- exists
mv `catchIO` (const (createAnnexDirectory jd >> mv))
newtype AppendableJournalFile = AppendableJournalFile (RawFilePath, RawFilePath)
{- If the journal file does not exist, it cannot be appended to, because
- that would overwrite whatever content the file has in the git-annex
- branch. -}
checkCanAppendJournalFile :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex (Maybe AppendableJournalFile)
checkCanAppendJournalFile _jl ru file = do
st <- getState
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
( return (gitAnnexPrivateJournalDir st)
, return (gitAnnexJournalDir st)
)
let jfile = jd P.</> journalFile file
ifM (liftIO $ R.doesPathExist jfile)
( return (Just (AppendableJournalFile (jd, jfile)))
, return Nothing
)
{- Appends content to an existing journal file.
-
- Appends are not necessarily atomic, though short appends often are.
- So, when this is interrupted, it can leave only part of the content
- written to the file. To deal with that situation, both this and
- getJournalFileStale check if the file ends with a newline, and if
- not discard the incomplete line.
-
- Due to the lack of atomicity, this should not be used when multiple
- lines need to be written to the file as an atomic unit.
-}
appendJournalFile :: Journalable content => JournalLocked -> AppendableJournalFile -> content -> Annex ()
appendJournalFile _jl (AppendableJournalFile (jd, jfile)) content = do
let write = liftIO $ withFile (fromRawFilePath jfile) ReadWriteMode $ \h -> do
sz <- hFileSize h
when (sz /= 0) $ do
hSeek h SeekFromEnd (-1)
lastchar <- B.hGet h 1
unless (lastchar == "\n") $ do
hSeek h AbsoluteSeek 0
goodpart <- L.length . discardIncompleteAppend
<$> L.hGet h (fromIntegral sz)
hSetFileSize h (fromIntegral goodpart)
hSeek h SeekFromEnd 0
writeJournalHandle h content
write `catchIO` (const (createAnnexDirectory jd >> write))
data JournalledContent
= NoJournalledContent
| JournalledContent L.ByteString
| PossiblyStaleJournalledContent L.ByteString
-- ^ This is used when the journalled content may have been
-- supersceded by content in the git-annex branch. The returned
-- content should be combined with content from the git-annex branch.
-- This is particularly the case when a file is in the private
-- journal, which does not get written to the git-annex branch,
-- and so the git-annex branch can contain changes to non-private
-- information that were made after that journal file was written.
{- Gets any journalled content for a file in the branch. -}
getJournalFile :: JournalLocked -> GetPrivate -> RawFilePath -> Annex JournalledContent
getJournalFile _jl = getJournalFileStale
data GetPrivate = GetPrivate Bool
{- Without locking, this is not guaranteed to be the most recent
- content of the file in the journal, so should not be used as a basis for
- making changes to the file.
-
- The file is read strictly so that its content can safely be fed into
- an operation that modifies the file (when getJournalFile calls this).
- The minor loss of laziness doesn't matter much, as the files are not
- very large.
-
- To recover from an append of a line that is interrupted part way through
- (or is in progress when this is called), if the file content does not end
- with a newline, it is truncated back to the previous newline.
-}
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent
getJournalFileStale (GetPrivate getprivate) file = do
st <- Annex.getState id
let repo = Annex.repo st
bs <- getState
liftIO $
if getprivate && privateUUIDsKnown' st
then do
x <- getfrom (gitAnnexJournalDir bs repo)
getfrom (gitAnnexPrivateJournalDir bs repo) >>= \case
Nothing -> return $ case x of
Nothing -> NoJournalledContent
Just b -> JournalledContent b
Just y -> return $ PossiblyStaleJournalledContent $ case x of
Nothing -> y
-- This concacenation is the same as
-- happens in a merge of two
-- git-annex branches.
Just x' -> x' <> y
else getfrom (gitAnnexJournalDir bs repo) >>= return . \case
Nothing -> NoJournalledContent
Just b -> JournalledContent b
where
jfile = journalFile file
getfrom d = catchMaybeIO $
discardIncompleteAppend . L.fromStrict
<$> B.readFile (fromRawFilePath (d P.</> jfile))
-- Note that this forces read of the whole lazy bytestring.
discardIncompleteAppend :: L.ByteString -> L.ByteString
discardIncompleteAppend v
| L.null v = v
| L.last v == nl = v
| otherwise = dropwhileend (/= nl) v
where
nl = fromIntegral (ord '\n')
#if MIN_VERSION_bytestring(0,11,2)
dropwhileend = L.dropWhileEnd
#else
dropwhileend p = L.reverse . L.dropWhile p . L.reverse
#endif
{- List of existing journal files in a journal directory, but without locking,
- may miss new ones just being added, or may have false positives if the
- journal is staged as it is run. -}
getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath]
getJournalledFilesStale getjournaldir = do
bs <- getState
repo <- Annex.gitRepo
let d = getjournaldir bs repo
fs <- liftIO $ catchDefaultIO [] $
getDirectoryContents (fromRawFilePath d)
return $ filter (`notElem` [".", ".."]) $
map (fileJournal . toRawFilePath) fs
{- Directory handle open on a journal directory. -}
withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
withJournalHandle getjournaldir a = do
bs <- getState
repo <- Annex.gitRepo
let d = getjournaldir bs repo
bracket (opendir d) (liftIO . closeDirectory) (liftIO . a)
where
-- avoid overhead of creating the journal directory when it already
-- exists
opendir d = liftIO (openDirectory (fromRawFilePath d))
`catchIO` (const (createAnnexDirectory d >> opendir d))
{- Checks if there are changes in the journal. -}
journalDirty :: (BranchState -> Git.Repo -> RawFilePath) -> Annex Bool
journalDirty getjournaldir = do
st <- getState
d <- fromRawFilePath <$> fromRepo (getjournaldir st)
liftIO $
(not <$> isDirectoryEmpty d)
`catchIO` (const $ doesDirectoryExist d)
{- Produces a filename to use in the journal for a file on the branch.
- The filename does not include the journal directory.
-
- The journal typically won't have a lot of files in it, so the hashing
- used in the branch is not necessary, and all the files are put directly
- in the journal directory.
-}
journalFile :: RawFilePath -> RawFilePath
journalFile file = B.concatMap mangle file
where
mangle c
| P.isPathSeparator c = B.singleton underscore
| c == underscore = B.pack [underscore, underscore]
| otherwise = B.singleton c
underscore = fromIntegral (ord '_')
{- Converts a journal file (relative to the journal dir) back to the
- filename on the branch. -}
fileJournal :: RawFilePath -> RawFilePath
fileJournal = go
where
go b =
let (h, t) = B.break (== underscore) b
in h <> case B.uncons t of
Nothing -> t
Just (_u, t') -> case B.uncons t' of
Nothing -> t'
Just (w, t'')
| w == underscore ->
B.cons underscore (go t'')
| otherwise ->
B.cons P.pathSeparator (go t')
underscore = fromIntegral (ord '_')
{- Sentinal value, only produced by lockJournal; required
- as a parameter by things that need to ensure the journal is
- locked. -}
data JournalLocked = ProduceJournalLocked
{- Runs an action that modifies the journal, using locking to avoid
- contention with other git-annex processes. -}
lockJournal :: (JournalLocked -> Annex a) -> Annex a
lockJournal a = do
lck <- fromRepo gitAnnexJournalLock
withExclusiveLock lck $ a ProduceJournalLocked

476
Annex/Link.hs Normal file
View file

@ -0,0 +1,476 @@
{- git-annex links to content
-
- On file systems that support them, symlinks are used.
-
- On other filesystems, git instead stores the symlink target in a regular
- file.
-
- Pointer files are used instead of symlinks for unlocked files.
-
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP, BangPatterns, OverloadedStrings #-}
module Annex.Link where
import Annex.Common
import qualified Annex
import qualified Annex.Queue
import qualified Git.Queue
import qualified Git.UpdateIndex
import qualified Git.Index
import qualified Git.LockFile
import qualified Git.Env
import qualified Git
import Logs.Restage
import Git.Types
import Git.FilePath
import Git.Config
import Annex.HashObject
import Annex.InodeSentinal
import Annex.PidLock
import Utility.FileMode
import Utility.InodeCache
import Utility.Tmp.Dir
import Utility.CopyFile
import qualified Database.Keys.Handle
import qualified Utility.RawFilePath as R
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import qualified System.FilePath.ByteString as P
#ifndef mingw32_HOST_OS
#if MIN_VERSION_unix(2,8,0)
#else
import System.PosixCompat.Files (isSymbolicLink)
#endif
#endif
type LinkTarget = S.ByteString
{- Checks if a file is a link to a key. -}
isAnnexLink :: RawFilePath -> Annex (Maybe Key)
isAnnexLink file = maybe Nothing parseLinkTargetOrPointer <$> getAnnexLinkTarget file
{- Gets the link target of a symlink.
-
- On a filesystem that does not support symlinks, fall back to getting the
- link target by looking inside the file.
-
- Returns Nothing if the file is not a symlink, or not a link to annex
- content.
-}
getAnnexLinkTarget :: RawFilePath -> Annex (Maybe LinkTarget)
getAnnexLinkTarget f = getAnnexLinkTarget' f
=<< (coreSymlinks <$> Annex.getGitConfig)
{- Pass False to force looking inside file, for when git checks out
- symlinks as plain files. -}
getAnnexLinkTarget' :: RawFilePath -> Bool -> Annex (Maybe S.ByteString)
getAnnexLinkTarget' file coresymlinks = if coresymlinks
then check probesymlink $
return Nothing
else check probesymlink $
check probefilecontent $
return Nothing
where
check getlinktarget fallback =
liftIO (catchMaybeIO getlinktarget) >>= \case
Just l
| isLinkToAnnex l -> return (Just l)
| otherwise -> return Nothing
Nothing -> fallback
probesymlink = R.readSymbolicLink file
probefilecontent = withFile (fromRawFilePath file) ReadMode $ \h -> do
s <- S.hGet h maxSymlinkSz
-- If we got the full amount, the file is too large
-- to be a symlink target.
return $ if S.length s == maxSymlinkSz
then mempty
else
-- If there are any NUL or newline
-- characters, or whitespace, we
-- certainly don't have a symlink to a
-- git-annex key.
if any (`S8.elem` s) ("\0\n\r \t" :: [Char])
then mempty
else s
makeAnnexLink :: LinkTarget -> RawFilePath -> Annex ()
makeAnnexLink = makeGitLink
{- Creates a link on disk.
-
- On a filesystem that does not support symlinks, writes the link target
- to a file. Note that git will only treat the file as a symlink if
- it's staged as such, so use addAnnexLink when adding a new file or
- modified link to git.
-}
makeGitLink :: LinkTarget -> RawFilePath -> Annex ()
makeGitLink linktarget file = ifM (coreSymlinks <$> Annex.getGitConfig)
( liftIO $ do
void $ tryIO $ R.removeLink file
R.createSymbolicLink linktarget file
, liftIO $ S.writeFile (fromRawFilePath file) linktarget
)
{- Creates a link on disk, and additionally stages it in git. -}
addAnnexLink :: LinkTarget -> RawFilePath -> Annex ()
addAnnexLink linktarget file = do
makeAnnexLink linktarget file
stageSymlink file =<< hashSymlink linktarget
{- Injects a symlink target into git, returning its Sha. -}
hashSymlink :: LinkTarget -> Annex Sha
hashSymlink = hashBlob . toInternalGitPath
{- Stages a symlink to an annexed object, using a Sha of its target. -}
stageSymlink :: RawFilePath -> Sha -> Annex ()
stageSymlink file sha =
Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.stageSymlink file sha)
{- Injects a pointer file content into git, returning its Sha. -}
hashPointerFile :: Key -> Annex Sha
hashPointerFile key = hashBlob $ formatPointer key
{- Stages a pointer file, using a Sha of its content -}
stagePointerFile :: RawFilePath -> Maybe FileMode -> Sha -> Annex ()
stagePointerFile file mode sha =
Annex.Queue.addUpdateIndex =<<
inRepo (Git.UpdateIndex.stageFile sha treeitemtype file)
where
treeitemtype
| maybe False isExecutable mode = TreeExecutable
| otherwise = TreeFile
writePointerFile :: RawFilePath -> Key -> Maybe FileMode -> IO ()
writePointerFile file k mode = do
S.writeFile (fromRawFilePath file) (formatPointer k)
maybe noop (R.setFileMode file) mode
newtype Restage = Restage Bool
{- Restage pointer file. This is used after updating a worktree file
- when content is added/removed, to prevent git status from showing
- it as modified.
-
- The InodeCache is for the worktree file. It is used to detect when
- the worktree file is changed by something else before git update-index
- gets to look at it.
-
- Asks git to refresh its index information for the file.
- That in turn runs the clean filter on the file; when the clean
- filter produces the same pointer that was in the index before, git
- realizes that the file has not actually been modified.
-
- Note that, if the pointer file is staged for deletion, or has different
- content than the current worktree content staged, this won't change
- that. So it's safe to call at any time and any situation.
-
- If the index is known to be locked (eg, git add has run git-annex),
- that would fail. Restage False will prevent the index being updated,
- and will store it in the restage log. Displays a message to help the
- user understand why the file will appear to be modified.
-
- This uses the git queue, so the update is not performed immediately,
- and this can be run multiple times cheaply. Using the git queue also
- prevents building up too large a number of updates when many files
- are being processed. It's also recorded in the restage log so that,
- if the process is interrupted before the git queue is fulushed, the
- restage will be taken care of later.
-}
restagePointerFile :: Restage -> RawFilePath -> InodeCache -> Annex ()
restagePointerFile (Restage False) f orig = do
flip writeRestageLog orig =<< inRepo (toTopFilePath f)
toplevelWarning True $ unableToRestage $ Just f
restagePointerFile (Restage True) f orig = do
flip writeRestageLog orig =<< inRepo (toTopFilePath f)
-- Avoid refreshing the index if run by the
-- smudge clean filter, because git uses that when
-- it's already refreshing the index, probably because
-- this very action is running. Running it again would likely
-- deadlock.
unlessM (Annex.getState Annex.insmudgecleanfilter) $
Annex.Queue.addFlushAction restagePointerFileRunner [f]
restagePointerFileRunner :: Git.Queue.FlushActionRunner Annex
restagePointerFileRunner =
Git.Queue.FlushActionRunner "restagePointerFiles" $ \r _fs ->
restagePointerFiles r
-- Restage all files in the restage log that have not been modified.
--
-- Other changes to the files may have been staged before this
-- gets a chance to run. To avoid a race with any staging of
-- changes, first lock the index file. Then run git update-index
-- on all still-unmodified files, using a copy of the index file,
-- to bypass the lock. Then replace the old index file with the new
-- updated index file.
restagePointerFiles :: Git.Repo -> Annex ()
restagePointerFiles r = unlessM (Annex.getState Annex.insmudgecleanfilter) $ do
-- Flush any queued changes to the keys database, so they
-- are visible to child processes.
-- The database is closed because that may improve behavior
-- when run in Windows's WSL1, which has issues with
-- multiple writers to SQL databases.
liftIO . Database.Keys.Handle.closeDbHandle
=<< Annex.getRead Annex.keysdbhandle
realindex <- liftIO $ Git.Index.currentIndexFile r
numsz@(numfiles, _) <- calcnumsz
let lock = fromRawFilePath (Git.Index.indexFileLock realindex)
lockindex = liftIO $ catchMaybeIO $ Git.LockFile.openLock' lock
unlockindex = liftIO . maybe noop Git.LockFile.closeLock
showwarning = warning $ unableToRestage Nothing
go Nothing = showwarning
go (Just _) = withtmpdir $ \tmpdir -> do
tsd <- getTSDelta
let tmpindex = toRawFilePath (tmpdir </> "index")
let replaceindex = liftIO $ moveFile tmpindex realindex
let updatetmpindex = do
r' <- liftIO $ Git.Env.addGitEnv r Git.Index.indexEnv
=<< Git.Index.indexEnvVal tmpindex
configfilterprocess numsz $
runupdateindex tsd r' replaceindex
return True
ok <- liftIO (createLinkOrCopy realindex tmpindex)
<&&> catchBoolIO updatetmpindex
unless ok showwarning
when (numfiles > 0) $
bracket lockindex unlockindex go
where
withtmpdir = withTmpDirIn (fromRawFilePath $ Git.localGitDir r) "annexindex"
isunmodified tsd f orig =
genInodeCache f tsd >>= return . \case
Nothing -> False
Just new -> compareStrong orig new
{- Avoid git warning about CRLF munging -}
avoidcrlfwarning r' = r' { gitGlobalOpts = gitGlobalOpts r' ++
[ Param "-c"
, Param $ "core.safecrlf=" ++ boolConfig False
] }
runupdateindex tsd r' replaceindex =
runsGitAnnexChildProcessViaGit' (avoidcrlfwarning r') $ \r'' ->
Git.UpdateIndex.refreshIndex r'' $ \feeder -> do
let atend = do
-- wait for index write
liftIO $ feeder Nothing
replaceindex
streamRestageLog atend $ \topf ic -> do
let f = fromTopFilePath topf r''
liftIO $ whenM (isunmodified tsd f ic) $
feedupdateindex f feeder
{- update-index is documented as picky about "./file" and it
- fails on "../../repo/path/file" when cwd is not in the repo
- being acted on. Avoid these problems with an absolute path.
-}
feedupdateindex f feeder = do
absf <- absPath f
feeder (Just absf)
calcnumsz = calcRestageLog (0, 0) $ \(_f, ic) (numfiles, sizefiles) ->
(numfiles+1, sizefiles + inodeCacheFileSize ic)
{- filter.annex.process configured to use git-annex filter-process
- is sometimes faster and sometimes slower than using
- git-annex smudge. The latter is run once per file, while
- the former has the content of files piped to it.
-}
filterprocessfaster :: (Integer, FileSize) -> Bool
filterprocessfaster (numfiles, sizefiles) =
let estimate_enabled = sizefiles `div` 191739611
estimate_disabled = numfiles `div` 7
in estimate_enabled <= estimate_disabled
{- This disables filter.annex.process if it's set when it would
- probably not be faster to use it. Unfortunately, simply
- passing -c filter.annex.process= also prevents git from
- running the smudge filter, so .git/config has to be modified
- to disable it. The modification is reversed at the end. In
- case this process is terminated early, the next time this
- runs it will take care of reversing the modification.
-}
configfilterprocess numsz = bracket setup cleanup . const
where
setup
| filterprocessfaster numsz = return Nothing
| otherwise = fromRepo (Git.Config.getMaybe ck) >>= \case
Nothing -> return Nothing
Just v -> do
void $ inRepo (Git.Config.change ckd (fromConfigValue v))
void $ inRepo (Git.Config.unset ck)
return (Just v)
cleanup (Just v) = do
void $ inRepo $ Git.Config.change ck (fromConfigValue v)
void $ inRepo (Git.Config.unset ckd)
cleanup Nothing = fromRepo (Git.Config.getMaybe ckd) >>= \case
Nothing -> return ()
Just v -> do
whenM (isNothing <$> fromRepo (Git.Config.getMaybe ck)) $
void $ inRepo (Git.Config.change ck (fromConfigValue v))
void $ inRepo (Git.Config.unset ckd)
ck = ConfigKey "filter.annex.process"
ckd = ConfigKey "filter.annex.process-temp-disabled"
unableToRestage :: Maybe RawFilePath -> StringContainingQuotedPath
unableToRestage mf =
"git status will show " <> maybe "some files" QuotedPath mf
<> " to be modified, since content availability has changed"
<> " and git-annex was unable to update the index."
<> " This is only a cosmetic problem affecting git status; git add,"
<> " git commit, etc won't be affected."
<> " To fix the git status display, you can run:"
<> " git-annex restage"
{- Parses a symlink target or a pointer file to a Key.
-
- Makes sure that the pointer file is valid, including not being longer
- than the maximum allowed size of a valid pointer file, and that any
- subsequent lines after the first contain the validPointerLineTag.
- If a valid pointer file gets some other data appended to it, it should
- never be considered valid, unless that data happened to itself be a
- valid pointer file.
-}
parseLinkTargetOrPointer :: S.ByteString -> Maybe Key
parseLinkTargetOrPointer = either (const Nothing) id
. parseLinkTargetOrPointer'
data InvalidAppendedPointerFile = InvalidAppendedPointerFile
parseLinkTargetOrPointer' :: S.ByteString -> Either InvalidAppendedPointerFile (Maybe Key)
parseLinkTargetOrPointer' b =
let (firstline, rest) = S8.span (/= '\n') b
in case parsekey $ droptrailing '\r' firstline of
Just k
| S.length b > maxValidPointerSz -> Left InvalidAppendedPointerFile
| restvalid (dropleading '\n' rest) -> Right (Just k)
| otherwise -> Left InvalidAppendedPointerFile
Nothing -> Right Nothing
where
parsekey l
| isLinkToAnnex l = fileKey $ snd $ S8.breakEnd pathsep l
| otherwise = Nothing
restvalid r
| S.null r = True
| otherwise =
let (l, r') = S8.span (/= '\n') r
in validPointerLineTag `S.isInfixOf` l
&& (not (S8.null r') && S8.head r' == '\n')
&& restvalid (S8.tail r')
dropleading c l
| S.null l = l
| S8.head l == c = S8.tail l
| otherwise = l
droptrailing c l
| S.null l = l
| S8.last l == c = S8.init l
| otherwise = l
pathsep '/' = True
#ifdef mingw32_HOST_OS
pathsep '\\' = True
#endif
pathsep _ = False
{- Avoid looking at more of the lazy ByteString than necessary since it
- could be reading from a large file that is not a pointer file. -}
parseLinkTargetOrPointerLazy :: L.ByteString -> Maybe Key
parseLinkTargetOrPointerLazy = either (const Nothing) id
. parseLinkTargetOrPointerLazy'
parseLinkTargetOrPointerLazy' :: L.ByteString -> Either InvalidAppendedPointerFile (Maybe Key)
parseLinkTargetOrPointerLazy' b =
let b' = L.take (fromIntegral maxPointerSz) b
in parseLinkTargetOrPointer' (L.toStrict b')
formatPointer :: Key -> S.ByteString
formatPointer k = prefix <> keyFile k <> nl
where
prefix = toInternalGitPath $ P.pathSeparator `S.cons` objectDir
nl = S8.singleton '\n'
{- Maximum size of a file that could be a pointer to a key.
- Check to avoid buffering really big files in git into
- memory when reading files that may be pointers.
-
- 8192 bytes is plenty for a pointer to a key. This adds some additional
- padding to allow for pointer files that have lines of additional data
- after the key.
-
- One additional byte is used to detect when a valid pointer file
- got something else appended to it.
-}
maxPointerSz :: Int
maxPointerSz = maxValidPointerSz + 1
{- Maximum size of a valid pointer files is 32kb. -}
maxValidPointerSz :: Int
maxValidPointerSz = 32768
maxSymlinkSz :: Int
maxSymlinkSz = 8192
{- Checks if a worktree file is a pointer to a key.
-
- Unlocked files whose content is present are not detected by this.
-
- It's possible, though unlikely, that an annex symlink points to
- an object that looks like a pointer file. Or that a non-annex
- symlink does. Avoids a false positive in those cases.
- -}
isPointerFile :: RawFilePath -> IO (Maybe Key)
isPointerFile f = catchDefaultIO Nothing $
#if defined(mingw32_HOST_OS)
withFile (fromRawFilePath f) ReadMode readhandle
#else
#if MIN_VERSION_unix(2,8,0)
let open = do
fd <- openFd (fromRawFilePath f) ReadOnly
(defaultFileFlags { nofollow = True })
fdToHandle fd
in bracket open hClose readhandle
#else
ifM (isSymbolicLink <$> R.getSymbolicLinkStatus f)
( return Nothing
, withFile (fromRawFilePath f) ReadMode readhandle
)
#endif
#endif
where
readhandle h = parseLinkTargetOrPointer <$> S.hGet h maxPointerSz
{- Checks a symlink target or pointer file first line to see if it
- appears to point to annexed content.
-
- We only look for paths inside the .git directory, and not at the .git
- directory itself, because GIT_DIR may cause a directory name other
- than .git to be used.
-}
isLinkToAnnex :: S.ByteString -> Bool
isLinkToAnnex s = p `S.isInfixOf` s
#ifdef mingw32_HOST_OS
-- '/' is used inside pointer files on Windows, not the native '\'
|| p' `S.isInfixOf` s
#endif
where
p = P.pathSeparator `S.cons` objectDir
#ifdef mingw32_HOST_OS
p' = toInternalGitPath p
#endif
{- String that must appear on every line of a valid pointer file. -}
validPointerLineTag :: S.ByteString
validPointerLineTag = "/annex/"

757
Annex/Locations.hs Normal file
View file

@ -0,0 +1,757 @@
{- git-annex file locations
-
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Locations (
keyFile,
fileKey,
keyPaths,
keyPath,
annexDir,
objectDir,
gitAnnexLocation,
gitAnnexLocation',
gitAnnexLocationDepth,
gitAnnexLink,
gitAnnexLinkCanonical,
gitAnnexContentLock,
gitAnnexContentRetentionTimestamp,
gitAnnexContentRetentionTimestampLock,
gitAnnexContentLockLock,
gitAnnexInodeSentinal,
gitAnnexInodeSentinalCache,
annexLocationsBare,
annexLocationsNonBare,
annexLocation,
exportAnnexObjectLocation,
gitAnnexDir,
gitAnnexObjectDir,
gitAnnexTmpOtherDir,
gitAnnexTmpOtherLock,
gitAnnexTmpOtherDirOld,
gitAnnexTmpWatcherDir,
gitAnnexTmpObjectDir,
gitAnnexTmpObjectLocation,
gitAnnexTmpWorkDir,
gitAnnexBadDir,
gitAnnexBadLocation,
gitAnnexUnusedLog,
gitAnnexKeysDbDir,
gitAnnexKeysDbLock,
gitAnnexKeysDbIndexCache,
gitAnnexFsckState,
gitAnnexFsckDbDir,
gitAnnexFsckDbDirOld,
gitAnnexFsckDbLock,
gitAnnexFsckResultsLog,
gitAnnexUpgradeLog,
gitAnnexUpgradeLock,
gitAnnexSmudgeLog,
gitAnnexSmudgeLock,
gitAnnexRestageLog,
gitAnnexRestageLogOld,
gitAnnexRestageLock,
gitAnnexAdjustedBranchUpdateLog,
gitAnnexAdjustedBranchUpdateLock,
gitAnnexMigrateLog,
gitAnnexMigrateLock,
gitAnnexMigrationsLog,
gitAnnexMigrationsLock,
gitAnnexMoveLog,
gitAnnexMoveLock,
gitAnnexExportDir,
gitAnnexExportDbDir,
gitAnnexExportLock,
gitAnnexExportUpdateLock,
gitAnnexExportExcludeLog,
gitAnnexImportDir,
gitAnnexImportLog,
gitAnnexContentIdentifierDbDir,
gitAnnexContentIdentifierLock,
gitAnnexImportFeedDbDir,
gitAnnexImportFeedDbLock,
gitAnnexScheduleState,
gitAnnexTransferDir,
gitAnnexCredsDir,
gitAnnexWebCertificate,
gitAnnexWebPrivKey,
gitAnnexFeedStateDir,
gitAnnexFeedState,
gitAnnexMergeDir,
gitAnnexJournalDir,
gitAnnexPrivateJournalDir,
gitAnnexJournalLock,
gitAnnexGitQueueLock,
gitAnnexIndex,
gitAnnexPrivateIndex,
gitAnnexIndexStatus,
gitAnnexViewIndex,
gitAnnexViewLog,
gitAnnexMergedRefs,
gitAnnexIgnoredRefs,
gitAnnexPidFile,
gitAnnexPidLockFile,
gitAnnexDaemonStatusFile,
gitAnnexDaemonLogFile,
gitAnnexFuzzTestLogFile,
gitAnnexHtmlShim,
gitAnnexUrlFile,
gitAnnexTmpCfgFile,
gitAnnexSshDir,
gitAnnexRemotesDir,
gitAnnexAssistantDefaultDir,
HashLevels(..),
hashDirMixed,
hashDirLower,
preSanitizeKeyName,
reSanitizeKeyName,
) where
import Data.Char
import Data.Default
import qualified Data.ByteString.Char8 as S8
import qualified System.FilePath.ByteString as P
import Common
import Key
import Types.UUID
import Types.GitConfig
import Types.Difference
import Types.BranchState
import Types.Export
import qualified Git
import qualified Git.Types as Git
import Git.FilePath
import Annex.DirHashes
import Annex.Fixup
import qualified Utility.RawFilePath as R
{- Conventions:
-
- Functions ending in "Dir" should always return values ending with a
- trailing path separator. Most code does not rely on that, but a few
- things do.
-
- Everything else should not end in a trailing path separator.
-
- Only functions (with names starting with "git") that build a path
- based on a git repository should return full path relative to the git
- repository. Everything else returns path segments.
-}
{- The directory git annex uses for local state, relative to the .git
- directory -}
annexDir :: RawFilePath
annexDir = P.addTrailingPathSeparator "annex"
{- The directory git annex uses for locally available object content,
- relative to the .git directory -}
objectDir :: RawFilePath
objectDir = P.addTrailingPathSeparator $ annexDir P.</> "objects"
{- Annexed file's possible locations relative to the .git directory
- in a non-bare repository.
-
- Normally it is hashDirMixed. However, it's always possible that a
- bare repository was converted to non-bare, or that the cripped
- filesystem setting changed, so still need to check both. -}
annexLocationsNonBare :: GitConfig -> Key -> [RawFilePath]
annexLocationsNonBare config key =
map (annexLocation config key) [hashDirMixed, hashDirLower]
{- Annexed file's possible locations relative to a bare repository. -}
annexLocationsBare :: GitConfig -> Key -> [RawFilePath]
annexLocationsBare config key =
map (annexLocation config key) [hashDirLower, hashDirMixed]
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath
annexLocation config key hasher = objectDir P.</> keyPath key (hasher $ objectHashLevels config)
{- For exportree remotes with annexobjects=true, objects are stored
- in this location as well as in the exported tree. -}
exportAnnexObjectLocation :: GitConfig -> Key -> ExportLocation
exportAnnexObjectLocation gc k =
mkExportLocation $
".git" P.</> annexLocation gc k hashDirLower
{- Number of subdirectories from the gitAnnexObjectDir
- to the gitAnnexLocation. -}
gitAnnexLocationDepth :: GitConfig -> Int
gitAnnexLocationDepth config = hashlevels + 1
where
HashLevels hashlevels = objectHashLevels config
{- Annexed object's location in a repository.
-
- When there are multiple possible locations, returns the one where the
- file is actually present.
-
- When the file is not present, returns the location where the file should
- be stored.
-}
gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLocation = gitAnnexLocation' R.doesPathExist
gitAnnexLocation' :: (RawFilePath -> IO Bool) -> Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLocation' checker key r config = gitAnnexLocation'' key r config
(annexCrippledFileSystem config)
(coreSymlinks config)
checker
(Git.localGitDir r)
gitAnnexLocation'' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (RawFilePath -> IO Bool) -> RawFilePath -> IO RawFilePath
gitAnnexLocation'' key r config crippled symlinkssupported checker gitdir
{- Bare repositories default to hashDirLower for new
- content, as it's more portable. But check all locations. -}
| Git.repoIsLocalBare r = checkall annexLocationsBare
{- If the repository is configured to only use lower, no need
- to check both. -}
| hasDifference ObjectHashLower (annexDifferences config) =
only hashDirLower
{- Repositories on crippled filesystems use same layout as bare
- repos for new content, unless symlinks are supported too. -}
| crippled = if symlinkssupported
then checkall annexLocationsNonBare
else checkall annexLocationsBare
| otherwise = checkall annexLocationsNonBare
where
only = return . inrepo . annexLocation config key
checkall f = check $ map inrepo $ f config key
inrepo d = gitdir P.</> d
check locs@(l:_) = fromMaybe l <$> firstM checker locs
check [] = error "internal"
{- Calculates a symlink target to link a file to an annexed object. -}
gitAnnexLink :: RawFilePath -> Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLink file key r config = do
currdir <- R.getCurrentDirectory
let absfile = absNormPathUnix currdir file
let gitdir = getgitdir currdir
loc <- gitAnnexLocation'' key r config False False (\_ -> return True) gitdir
toInternalGitPath <$> relPathDirToFile (parentDir absfile) loc
where
getgitdir currdir
{- This special case is for git submodules on filesystems not
- supporting symlinks; generate link target that will
- work portably. -}
| not (coreSymlinks config) && needsSubmoduleFixup r =
absNormPathUnix currdir (Git.repoPath r P.</> ".git")
| otherwise = Git.localGitDir r
absNormPathUnix d p = toInternalGitPath $
absPathFrom (toInternalGitPath d) (toInternalGitPath p)
{- Calculates a symlink target as would be used in a typical git
- repository, with .git in the top of the work tree. -}
gitAnnexLinkCanonical :: RawFilePath -> Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLinkCanonical file key r config = gitAnnexLink file key r' config'
where
r' = case r of
Git.Repo { Git.location = l@Git.Local { Git.worktree = Just wt } } ->
r { Git.location = l { Git.gitdir = wt P.</> ".git" } }
_ -> r
config' = config
{ annexCrippledFileSystem = False
, coreSymlinks = True
}
{- File used to lock a key's content. -}
gitAnnexContentLock :: Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexContentLock key r config = do
loc <- gitAnnexLocation key r config
return $ loc <> ".lck"
{- File used to indicate a key's content should not be dropped until after
- a specified time. -}
gitAnnexContentRetentionTimestamp :: Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexContentRetentionTimestamp key r config = do
loc <- gitAnnexLocation key r config
return $ loc <> ".rtm"
{- Lock file for gitAnnexContentRetentionTimestamp -}
gitAnnexContentRetentionTimestampLock :: Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexContentRetentionTimestampLock key r config = do
loc <- gitAnnexLocation key r config
return $ loc <> ".rtl"
{- Lock that is held when taking the gitAnnexContentLock to support the v10
- upgrade.
-
- This uses the gitAnnexInodeSentinal file, because it needs to be a file
- that exists in the repository, even when it's an old v8 repository that
- is mounted read-only. The gitAnnexInodeSentinal is created by git-annex
- init, so should already exist.
-}
gitAnnexContentLockLock :: Git.Repo -> RawFilePath
gitAnnexContentLockLock = gitAnnexInodeSentinal
gitAnnexInodeSentinal :: Git.Repo -> RawFilePath
gitAnnexInodeSentinal r = gitAnnexDir r P.</> "sentinal"
gitAnnexInodeSentinalCache :: Git.Repo -> RawFilePath
gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r <> ".cache"
{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> RawFilePath
gitAnnexDir r = P.addTrailingPathSeparator $ Git.localGitDir r P.</> annexDir
{- The part of the annex directory where file contents are stored. -}
gitAnnexObjectDir :: Git.Repo -> RawFilePath
gitAnnexObjectDir r = P.addTrailingPathSeparator $
Git.localGitDir r P.</> objectDir
{- .git/annex/tmp/ is used for temp files for key's contents -}
gitAnnexTmpObjectDir :: Git.Repo -> RawFilePath
gitAnnexTmpObjectDir r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "tmp"
{- .git/annex/othertmp/ is used for other temp files -}
gitAnnexTmpOtherDir :: Git.Repo -> RawFilePath
gitAnnexTmpOtherDir r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "othertmp"
{- Lock file for gitAnnexTmpOtherDir. -}
gitAnnexTmpOtherLock :: Git.Repo -> RawFilePath
gitAnnexTmpOtherLock r = gitAnnexDir r P.</> "othertmp.lck"
{- .git/annex/misctmp/ was used by old versions of git-annex and is still
- used during initialization -}
gitAnnexTmpOtherDirOld :: Git.Repo -> RawFilePath
gitAnnexTmpOtherDirOld r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "misctmp"
{- .git/annex/watchtmp/ is used by the watcher and assistant -}
gitAnnexTmpWatcherDir :: Git.Repo -> RawFilePath
gitAnnexTmpWatcherDir r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "watchtmp"
{- The temp file to use for a given key's content. -}
gitAnnexTmpObjectLocation :: Key -> Git.Repo -> RawFilePath
gitAnnexTmpObjectLocation key r = gitAnnexTmpObjectDir r P.</> keyFile key
{- Given a temp file such as gitAnnexTmpObjectLocation, makes a name for a
- subdirectory in the same location, that can be used as a work area
- when receiving the key's content.
-
- There are ordering requirements for creating these directories;
- use Annex.Content.withTmpWorkDir to set them up.
-}
gitAnnexTmpWorkDir :: RawFilePath -> RawFilePath
gitAnnexTmpWorkDir p =
let (dir, f) = P.splitFileName p
-- Using a prefix avoids name conflict with any other keys.
in dir P.</> "work." <> f
{- .git/annex/bad/ is used for bad files found during fsck -}
gitAnnexBadDir :: Git.Repo -> RawFilePath
gitAnnexBadDir r = P.addTrailingPathSeparator $ gitAnnexDir r P.</> "bad"
{- The bad file to use for a given key. -}
gitAnnexBadLocation :: Key -> Git.Repo -> RawFilePath
gitAnnexBadLocation key r = gitAnnexBadDir r P.</> keyFile key
{- .git/annex/foounused is used to number possibly unused keys -}
gitAnnexUnusedLog :: RawFilePath -> Git.Repo -> RawFilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r P.</> (prefix <> "unused")
{- .git/annex/keysdb/ contains a database of information about keys. -}
gitAnnexKeysDbDir :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexKeysDbDir r c = fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "keysdb"
{- Lock file for the keys database. -}
gitAnnexKeysDbLock :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexKeysDbLock r c = gitAnnexKeysDbDir r c <> ".lck"
{- Contains the stat of the last index file that was
- reconciled with the keys database. -}
gitAnnexKeysDbIndexCache :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexKeysDbIndexCache r c = gitAnnexKeysDbDir r c <> ".cache"
{- .git/annex/fsck/uuid/ is used to store information about incremental
- fscks. -}
gitAnnexFsckDir :: UUID -> Git.Repo -> Maybe GitConfig -> RawFilePath
gitAnnexFsckDir u r mc = case annexDbDir =<< mc of
Nothing -> go (gitAnnexDir r)
Just d -> go d
where
go d = d P.</> "fsck" P.</> fromUUID u
{- used to store information about incremental fscks. -}
gitAnnexFsckState :: UUID -> Git.Repo -> RawFilePath
gitAnnexFsckState u r = gitAnnexFsckDir u r Nothing P.</> "state"
{- Directory containing database used to record fsck info. -}
gitAnnexFsckDbDir :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexFsckDbDir u r c = gitAnnexFsckDir u r (Just c) P.</> "fsckdb"
{- Directory containing old database used to record fsck info. -}
gitAnnexFsckDbDirOld :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexFsckDbDirOld u r c = gitAnnexFsckDir u r (Just c) P.</> "db"
{- Lock file for the fsck database. -}
gitAnnexFsckDbLock :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexFsckDbLock u r c = gitAnnexFsckDir u r (Just c) P.</> "fsck.lck"
{- .git/annex/fsckresults/uuid is used to store results of git fscks -}
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> RawFilePath
gitAnnexFsckResultsLog u r =
gitAnnexDir r P.</> "fsckresults" P.</> fromUUID u
{- .git/annex/upgrade.log is used to record repository version upgrades. -}
gitAnnexUpgradeLog :: Git.Repo -> RawFilePath
gitAnnexUpgradeLog r = gitAnnexDir r P.</> "upgrade.log"
gitAnnexUpgradeLock :: Git.Repo -> RawFilePath
gitAnnexUpgradeLock r = gitAnnexDir r P.</> "upgrade.lck"
{- .git/annex/smudge.log is used to log smudged worktree files that need to
- be updated. -}
gitAnnexSmudgeLog :: Git.Repo -> RawFilePath
gitAnnexSmudgeLog r = gitAnnexDir r P.</> "smudge.log"
gitAnnexSmudgeLock :: Git.Repo -> RawFilePath
gitAnnexSmudgeLock r = gitAnnexDir r P.</> "smudge.lck"
{- .git/annex/restage.log is used to log worktree files that need to be
- restaged in git -}
gitAnnexRestageLog :: Git.Repo -> RawFilePath
gitAnnexRestageLog r = gitAnnexDir r P.</> "restage.log"
{- .git/annex/restage.old is used while restaging files in git -}
gitAnnexRestageLogOld :: Git.Repo -> RawFilePath
gitAnnexRestageLogOld r = gitAnnexDir r P.</> "restage.old"
gitAnnexRestageLock :: Git.Repo -> RawFilePath
gitAnnexRestageLock r = gitAnnexDir r P.</> "restage.lck"
{- .git/annex/adjust.log is used to log when the adjusted branch needs to
- be updated. -}
gitAnnexAdjustedBranchUpdateLog :: Git.Repo -> RawFilePath
gitAnnexAdjustedBranchUpdateLog r = gitAnnexDir r P.</> "adjust.log"
gitAnnexAdjustedBranchUpdateLock :: Git.Repo -> RawFilePath
gitAnnexAdjustedBranchUpdateLock r = gitAnnexDir r P.</> "adjust.lck"
{- .git/annex/migrate.log is used to log migrations before committing them. -}
gitAnnexMigrateLog :: Git.Repo -> RawFilePath
gitAnnexMigrateLog r = gitAnnexDir r P.</> "migrate.log"
gitAnnexMigrateLock :: Git.Repo -> RawFilePath
gitAnnexMigrateLock r = gitAnnexDir r P.</> "migrate.lck"
{- .git/annex/migrations.log is used to log committed migrations. -}
gitAnnexMigrationsLog :: Git.Repo -> RawFilePath
gitAnnexMigrationsLog r = gitAnnexDir r P.</> "migrations.log"
gitAnnexMigrationsLock :: Git.Repo -> RawFilePath
gitAnnexMigrationsLock r = gitAnnexDir r P.</> "migrations.lck"
{- .git/annex/move.log is used to log moves that are in progress,
- to better support resuming an interrupted move. -}
gitAnnexMoveLog :: Git.Repo -> RawFilePath
gitAnnexMoveLog r = gitAnnexDir r P.</> "move.log"
gitAnnexMoveLock :: Git.Repo -> RawFilePath
gitAnnexMoveLock r = gitAnnexDir r P.</> "move.lck"
{- .git/annex/export/ is used to store information about
- exports to special remotes. -}
gitAnnexExportDir :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexExportDir r c = fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "export"
{- Directory containing database used to record export info. -}
gitAnnexExportDbDir :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexExportDbDir u r c =
gitAnnexExportDir r c P.</> fromUUID u P.</> "exportdb"
{- Lock file for export database. -}
gitAnnexExportLock :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexExportLock u r c = gitAnnexExportDbDir u r c <> ".lck"
{- Lock file for updating the export database with information from the
- repository. -}
gitAnnexExportUpdateLock :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexExportUpdateLock u r c = gitAnnexExportDbDir u r c <> ".upl"
{- Log file used to keep track of files that were in the tree exported to a
- remote, but were excluded by its preferred content settings. -}
gitAnnexExportExcludeLog :: UUID -> Git.Repo -> RawFilePath
gitAnnexExportExcludeLog u r = gitAnnexDir r P.</> "export.ex" P.</> fromUUID u
{- Directory containing database used to record remote content ids.
-
- (This used to be "cid", but a problem with the database caused it to
- need to be rebuilt with a new name.)
-}
gitAnnexContentIdentifierDbDir :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexContentIdentifierDbDir r c =
fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "cidsdb"
{- Lock file for writing to the content id database. -}
gitAnnexContentIdentifierLock :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexContentIdentifierLock r c = gitAnnexContentIdentifierDbDir r c <> ".lck"
{- .git/annex/import/ is used to store information about
- imports from special remotes. -}
gitAnnexImportDir :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexImportDir r c = fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "import"
{- File containing state about the last import done from a remote. -}
gitAnnexImportLog :: UUID -> Git.Repo -> GitConfig -> RawFilePath
gitAnnexImportLog u r c =
gitAnnexImportDir r c P.</> fromUUID u P.</> "log"
{- Directory containing database used by importfeed. -}
gitAnnexImportFeedDbDir :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexImportFeedDbDir r c =
fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "importfeed"
{- Lock file for writing to the importfeed database. -}
gitAnnexImportFeedDbLock :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexImportFeedDbLock r c = gitAnnexImportFeedDbDir r c <> ".lck"
{- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> RawFilePath
gitAnnexScheduleState r = gitAnnexDir r P.</> "schedulestate"
{- .git/annex/creds/ is used to store credentials to access some special
- remotes. -}
gitAnnexCredsDir :: Git.Repo -> RawFilePath
gitAnnexCredsDir r = P.addTrailingPathSeparator $ gitAnnexDir r P.</> "creds"
{- .git/annex/certificate.pem and .git/annex/key.pem are used by the webapp
- when HTTPS is enabled -}
gitAnnexWebCertificate :: Git.Repo -> FilePath
gitAnnexWebCertificate r = fromRawFilePath $ gitAnnexDir r P.</> "certificate.pem"
gitAnnexWebPrivKey :: Git.Repo -> FilePath
gitAnnexWebPrivKey r = fromRawFilePath $ gitAnnexDir r P.</> "privkey.pem"
{- .git/annex/feeds/ is used to record per-key (url) state by importfeed -}
gitAnnexFeedStateDir :: Git.Repo -> RawFilePath
gitAnnexFeedStateDir r = P.addTrailingPathSeparator $
gitAnnexDir r P.</> "feedstate"
gitAnnexFeedState :: Key -> Git.Repo -> RawFilePath
gitAnnexFeedState k r = gitAnnexFeedStateDir r P.</> keyFile k
{- .git/annex/merge/ is used as a empty work tree for merges in
- adjusted branches. -}
gitAnnexMergeDir :: Git.Repo -> FilePath
gitAnnexMergeDir r = fromRawFilePath $
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "merge"
{- .git/annex/transfer/ is used to record keys currently
- being transferred, and other transfer bookkeeping info. -}
gitAnnexTransferDir :: Git.Repo -> RawFilePath
gitAnnexTransferDir r =
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "transfer"
{- .git/annex/journal/ is used to journal changes made to the git-annex
- branch -}
gitAnnexJournalDir :: BranchState -> Git.Repo -> RawFilePath
gitAnnexJournalDir st r = P.addTrailingPathSeparator $
case alternateJournal st of
Nothing -> gitAnnexDir r P.</> "journal"
Just d -> d
{- .git/annex/journal.private/ is used to journal changes regarding private
- repositories. -}
gitAnnexPrivateJournalDir :: BranchState -> Git.Repo -> RawFilePath
gitAnnexPrivateJournalDir st r = P.addTrailingPathSeparator $
case alternateJournal st of
Nothing -> gitAnnexDir r P.</> "journal-private"
Just d -> d
{- Lock file for the journal. -}
gitAnnexJournalLock :: Git.Repo -> RawFilePath
gitAnnexJournalLock r = gitAnnexDir r P.</> "journal.lck"
{- Lock file for flushing a git queue that writes to the git index or
- other git state that should only have one writer at a time. -}
gitAnnexGitQueueLock :: Git.Repo -> RawFilePath
gitAnnexGitQueueLock r = gitAnnexDir r P.</> "gitqueue.lck"
{- .git/annex/index is used to stage changes to the git-annex branch -}
gitAnnexIndex :: Git.Repo -> RawFilePath
gitAnnexIndex r = gitAnnexDir r P.</> "index"
{- .git/annex/index-private is used to store information that is not to
- be exposed to the git-annex branch. -}
gitAnnexPrivateIndex :: Git.Repo -> RawFilePath
gitAnnexPrivateIndex r = gitAnnexDir r P.</> "index-private"
{- Holds the ref of the git-annex branch that the index was last updated to.
-
- The .lck in the name is a historical accident; this is not used as a
- lock. -}
gitAnnexIndexStatus :: Git.Repo -> RawFilePath
gitAnnexIndexStatus r = gitAnnexDir r P.</> "index.lck"
{- The index file used to generate a filtered branch view._-}
gitAnnexViewIndex :: Git.Repo -> RawFilePath
gitAnnexViewIndex r = gitAnnexDir r P.</> "viewindex"
{- File containing a log of recently accessed views. -}
gitAnnexViewLog :: Git.Repo -> RawFilePath
gitAnnexViewLog r = gitAnnexDir r P.</> "viewlog"
{- List of refs that have already been merged into the git-annex branch. -}
gitAnnexMergedRefs :: Git.Repo -> RawFilePath
gitAnnexMergedRefs r = gitAnnexDir r P.</> "mergedrefs"
{- List of refs that should not be merged into the git-annex branch. -}
gitAnnexIgnoredRefs :: Git.Repo -> RawFilePath
gitAnnexIgnoredRefs r = gitAnnexDir r P.</> "ignoredrefs"
{- Pid file for daemon mode. -}
gitAnnexPidFile :: Git.Repo -> RawFilePath
gitAnnexPidFile r = gitAnnexDir r P.</> "daemon.pid"
{- Pid lock file for pidlock mode -}
gitAnnexPidLockFile :: Git.Repo -> RawFilePath
gitAnnexPidLockFile r = gitAnnexDir r P.</> "pidlock"
{- Status file for daemon mode. -}
gitAnnexDaemonStatusFile :: Git.Repo -> FilePath
gitAnnexDaemonStatusFile r = fromRawFilePath $
gitAnnexDir r P.</> "daemon.status"
{- Log file for daemon mode. -}
gitAnnexDaemonLogFile :: Git.Repo -> RawFilePath
gitAnnexDaemonLogFile r = gitAnnexDir r P.</> "daemon.log"
{- Log file for fuzz test. -}
gitAnnexFuzzTestLogFile :: Git.Repo -> FilePath
gitAnnexFuzzTestLogFile r = fromRawFilePath $
gitAnnexDir r P.</> "fuzztest.log"
{- Html shim file used to launch the webapp. -}
gitAnnexHtmlShim :: Git.Repo -> RawFilePath
gitAnnexHtmlShim r = gitAnnexDir r P.</> "webapp.html"
{- File containing the url to the webapp. -}
gitAnnexUrlFile :: Git.Repo -> RawFilePath
gitAnnexUrlFile r = gitAnnexDir r P.</> "url"
{- Temporary file used to edit configuriation from the git-annex branch. -}
gitAnnexTmpCfgFile :: Git.Repo -> RawFilePath
gitAnnexTmpCfgFile r = gitAnnexDir r P.</> "config.tmp"
{- .git/annex/ssh/ is used for ssh connection caching -}
gitAnnexSshDir :: Git.Repo -> RawFilePath
gitAnnexSshDir r = P.addTrailingPathSeparator $ gitAnnexDir r P.</> "ssh"
{- .git/annex/remotes/ is used for remote-specific state. -}
gitAnnexRemotesDir :: Git.Repo -> RawFilePath
gitAnnexRemotesDir r =
P.addTrailingPathSeparator $ gitAnnexDir r P.</> "remotes"
{- This is the base directory name used by the assistant when making
- repositories, by default. -}
gitAnnexAssistantDefaultDir :: FilePath
gitAnnexAssistantDefaultDir = "annex"
{- Sanitizes a String that will be used as part of a Key's keyName,
- dealing with characters that cause problems.
-
- This is used when a new Key is initially being generated, eg by genKey.
- Unlike keyFile and fileKey, it does not need to be a reversible
- escaping. Also, it's ok to change this to add more problematic
- characters later. Unlike changing keyFile, which could result in the
- filenames used for existing keys changing and contents getting lost.
-
- It is, however, important that the input and output of this function
- have a 1:1 mapping, to avoid two different inputs from mapping to the
- same key.
-}
preSanitizeKeyName :: String -> String
preSanitizeKeyName = preSanitizeKeyName' False
preSanitizeKeyName' :: Bool -> String -> String
preSanitizeKeyName' resanitize = concatMap escape
where
escape c
| isAsciiUpper c || isAsciiLower c || isDigit c = [c]
| c `elem` ['.', '-', '_'] = [c] -- common, assumed safe
| c `elem` ['/', '%', ':'] = [c] -- handled by keyFile
-- , is safe and uncommon, so will be used to escape
-- other characters. By itself, it is escaped to
-- doubled form.
| c == ',' = if not resanitize
then ",,"
else ","
| otherwise = ',' : show (ord c)
{- Converts a keyName that has been santizied with an old version of
- preSanitizeKeyName to be sanitized with the new version. -}
reSanitizeKeyName :: String -> String
reSanitizeKeyName = preSanitizeKeyName' True
{- Converts a key into a filename fragment without any directory.
-
- Escape "/" in the key name, to keep a flat tree of files and avoid
- issues with keys containing "/../" or ending with "/" etc.
-
- "/" is escaped to "%" because it's short and rarely used, and resembles
- a slash
- "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping
- is one to one.
- ":" is escaped to "&c", because it seemed like a good idea at the time.
-
- Changing what this function escapes and how is not a good idea, as it
- can cause existing objects to get lost.
-}
keyFile :: Key -> RawFilePath
keyFile k =
let b = serializeKey' k
in if S8.any (`elem` ['&', '%', ':', '/']) b
then S8.concatMap esc b
else b
where
esc '&' = "&a"
esc '%' = "&s"
esc ':' = "&c"
esc '/' = "%"
esc c = S8.singleton c
{- Reverses keyFile, converting a filename fragment (ie, the basename of
- the symlink target) into a key. -}
fileKey :: RawFilePath -> Maybe Key
fileKey = deserializeKey' . S8.intercalate "/" . map go . S8.split '%'
where
go = S8.concat . unescafterfirst . S8.split '&'
unescafterfirst [] = []
unescafterfirst (b:bs) = b : map (unesc . S8.uncons) bs
unesc :: Maybe (Char, S8.ByteString) -> S8.ByteString
unesc Nothing = mempty
unesc (Just ('c', b)) = S8.cons ':' b
unesc (Just ('s', b)) = S8.cons '%' b
unesc (Just ('a', b)) = S8.cons '&' b
unesc (Just (c, b)) = S8.cons c b
{- A location to store a key on a special remote that uses a filesystem.
- A directory hash is used, to protect against filesystems that dislike
- having many items in a single directory.
-
- The file is put in a directory with the same name, this allows
- write-protecting the directory to avoid accidental deletion of the file.
-}
keyPath :: Key -> Hasher -> RawFilePath
keyPath key hasher = hasher key P.</> f P.</> f
where
f = keyFile key
{- All possible locations to store a key in a special remote
- using different directory hashes.
-
- This is compatible with the annexLocationsNonBare and annexLocationsBare,
- for interoperability between special remotes and git-annex repos.
-}
keyPaths :: Key -> [RawFilePath]
keyPaths key = map (\h -> keyPath key (h def)) dirHashes

113
Annex/LockFile.hs Normal file
View file

@ -0,0 +1,113 @@
{- git-annex lock files.
-
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.LockFile (
lockFileCached,
unlockFile,
getLockCache,
fromLockCache,
withSharedLock,
withExclusiveLock,
takeExclusiveLock,
tryExclusiveLock,
) where
import Annex.Common
import Annex
import Types.LockCache
import Annex.Perms
import Annex.LockPool
import qualified Data.Map as M
import qualified System.FilePath.ByteString as P
{- Create a specified lock file, and takes a shared lock, which is retained
- in the cache. -}
lockFileCached :: RawFilePath -> Annex ()
lockFileCached file = go =<< fromLockCache file
where
go (Just _) = noop -- already locked
go Nothing = do
#ifndef mingw32_HOST_OS
mode <- annexFileMode
lockhandle <- lockShared (Just mode) file
#else
lockhandle <- liftIO $ waitToLock $ lockShared file
#endif
changeLockCache $ M.insert file lockhandle
unlockFile :: RawFilePath -> Annex ()
unlockFile file = maybe noop go =<< fromLockCache file
where
go lockhandle = do
liftIO $ dropLock lockhandle
changeLockCache $ M.delete file
getLockCache :: Annex LockCache
getLockCache = getState lockcache
fromLockCache :: RawFilePath -> Annex (Maybe LockHandle)
fromLockCache file = M.lookup file <$> getLockCache
changeLockCache :: (LockCache -> LockCache) -> Annex ()
changeLockCache a = do
m <- getLockCache
changeState $ \s -> s { lockcache = a m }
{- Runs an action with a shared lock held. If an exclusive lock is held,
- blocks until it becomes free. -}
withSharedLock :: RawFilePath -> Annex a -> Annex a
withSharedLock lockfile a = debugLocks $ do
createAnnexDirectory $ P.takeDirectory lockfile
mode <- annexFileMode
bracket (lock mode lockfile) (liftIO . dropLock) (const a)
where
#ifndef mingw32_HOST_OS
lock mode = lockShared (Just mode)
#else
lock _mode = liftIO . waitToLock . lockShared
#endif
{- Runs an action with an exclusive lock held. If the lock is already
- held, blocks until it becomes free. -}
withExclusiveLock :: RawFilePath -> Annex a -> Annex a
withExclusiveLock lockfile a = bracket
(takeExclusiveLock lockfile)
(liftIO . dropLock)
(const a)
{- Takes an exclusive lock, blocking until it's free. -}
takeExclusiveLock :: RawFilePath -> Annex LockHandle
takeExclusiveLock lockfile = debugLocks $ do
createAnnexDirectory $ P.takeDirectory lockfile
mode <- annexFileMode
lock mode lockfile
where
#ifndef mingw32_HOST_OS
lock mode = lockExclusive (Just mode)
#else
lock _mode = liftIO . waitToLock . lockExclusive
#endif
{- Tries to take an exclusive lock and run an action. If the lock is
- already held, returns Nothing. -}
tryExclusiveLock :: RawFilePath -> Annex a -> Annex (Maybe a)
tryExclusiveLock lockfile a = debugLocks $ do
createAnnexDirectory $ P.takeDirectory lockfile
mode <- annexFileMode
bracket (lock mode lockfile) (liftIO . unlock) go
where
#ifndef mingw32_HOST_OS
lock mode = tryLockExclusive (Just mode)
#else
lock _mode = liftIO . lockExclusive
#endif
unlock = maybe noop dropLock
go Nothing = return Nothing
go (Just _) = Just <$> a

17
Annex/LockPool.hs Normal file
View file

@ -0,0 +1,17 @@
{- Wraps Utility.LockPool, making pid locks be used when git-annex is so
- configured.
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.LockPool (module X) where
#ifndef mingw32_HOST_OS
import Annex.LockPool.PosixOrPid as X
#else
import Utility.LockPool.Windows as X
#endif

View file

@ -0,0 +1,93 @@
{- Wraps Utility.LockPool, making pid locks be used when git-annex is so
- configured.
-
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.LockPool.PosixOrPid (
LockFile,
LockHandle,
lockShared,
lockExclusive,
tryLockShared,
tryLockExclusive,
dropLock,
checkLocked,
LockStatus(..),
getLockStatus,
checkSaneLock,
) where
import Common
import Types
import qualified Annex
import qualified Utility.LockPool.Posix as Posix
import qualified Utility.LockPool.PidLock as Pid
import qualified Utility.LockPool.LockHandle as H
import Utility.FileMode
import Utility.LockPool.LockHandle (LockHandle, dropLock)
import Utility.LockFile.Posix (openLockFile)
import Utility.LockPool.STM (LockFile, LockMode(..))
import Utility.LockFile.LockStatus
import Config (pidLockFile)
import Messages (warning)
import Git.Quote
import System.Posix
lockShared :: Maybe ModeSetter -> LockFile -> Annex LockHandle
lockShared m f = pidLock m f LockShared $ Posix.lockShared m f
lockExclusive :: Maybe ModeSetter -> LockFile -> Annex LockHandle
lockExclusive m f = pidLock m f LockExclusive $ Posix.lockExclusive m f
tryLockShared :: Maybe ModeSetter -> LockFile -> Annex (Maybe LockHandle)
tryLockShared m f = tryPidLock m f LockShared $ Posix.tryLockShared m f
tryLockExclusive :: Maybe ModeSetter -> LockFile -> Annex (Maybe LockHandle)
tryLockExclusive m f = tryPidLock m f LockExclusive $ Posix.tryLockExclusive m f
checkLocked :: LockFile -> Annex (Maybe Bool)
checkLocked f = Posix.checkLocked f `pidLockCheck` checkpid
where
checkpid pidlock = Pid.checkLocked pidlock >>= \case
-- Only return true when the posix lock file exists.
Just _ -> Posix.checkLocked f
Nothing -> return Nothing
getLockStatus :: LockFile -> Annex LockStatus
getLockStatus f = Posix.getLockStatus f
`pidLockCheck` Pid.getLockStatus
checkSaneLock :: LockFile -> LockHandle -> Annex Bool
checkSaneLock f h = H.checkSaneLock f h
`pidLockCheck` flip Pid.checkSaneLock h
pidLockCheck :: IO a -> (LockFile -> IO a) -> Annex a
pidLockCheck posixcheck pidcheck = debugLocks $
liftIO . maybe posixcheck pidcheck =<< pidLockFile
pidLock :: Maybe ModeSetter -> LockFile -> LockMode -> IO LockHandle -> Annex LockHandle
pidLock m f lockmode posixlock = debugLocks $ go =<< pidLockFile
where
go Nothing = liftIO posixlock
go (Just pidlock) = do
timeout <- annexPidLockTimeout <$> Annex.getGitConfig
liftIO $ dummyPosixLock m f
Pid.waitLock f lockmode timeout pidlock (warning . UnquotedString)
tryPidLock :: Maybe ModeSetter -> LockFile -> LockMode -> IO (Maybe LockHandle) -> Annex (Maybe LockHandle)
tryPidLock m f lockmode posixlock = debugLocks $ liftIO . go =<< pidLockFile
where
go Nothing = posixlock
go (Just pidlock) = do
dummyPosixLock m f
Pid.tryLock f lockmode pidlock
-- The posix lock file is created even when using pid locks, in order to
-- avoid complicating any code that might expect to be able to see that
-- lock file. But, it's not locked.
dummyPosixLock :: Maybe ModeSetter -> LockFile -> IO ()
dummyPosixLock m f = bracket (openLockFile ReadLock m f) closeFd (const noop)

74
Annex/Magic.hs Normal file
View file

@ -0,0 +1,74 @@
{- Interface to libmagic
-
- Copyright 2019-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Magic (
Magic,
MimeType,
MimeEncoding,
initMagicMime,
getMagicMimeType,
getMagicMimeEncoding,
) where
import Types.Mime
import Control.Monad.IO.Class
#ifdef WITH_MAGICMIME
import Magic
import Utility.Env
import Control.Concurrent
import System.IO.Unsafe (unsafePerformIO)
import Common
#else
type Magic = ()
#endif
initMagicMime :: IO (Maybe Magic)
#ifdef WITH_MAGICMIME
initMagicMime = catchMaybeIO $ do
m <- magicOpen [MagicMime]
liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case
Nothing -> magicLoadDefault m
Just d -> magicLoad m
(d </> "magic" </> "magic.mgc")
return m
#else
initMagicMime = return Nothing
#endif
getMagicMime :: Magic -> FilePath -> IO (Maybe (MimeType, MimeEncoding))
#ifdef WITH_MAGICMIME
getMagicMime m f = Just . parse <$> magicConcurrentSafe (magicFile m f)
where
parse s =
let (mimetype, rest) = separate (== ';') s
in case rest of
(' ':'c':'h':'a':'r':'s':'e':'t':'=':mimeencoding) ->
(mimetype, mimeencoding)
_ -> (mimetype, "")
#else
getMagicMime _ _ = return Nothing
#endif
getMagicMimeType :: MonadIO m => Magic -> FilePath -> m (Maybe MimeType)
getMagicMimeType m f = liftIO $ fmap fst <$> getMagicMime m f
getMagicMimeEncoding :: MonadIO m => Magic -> FilePath -> m(Maybe MimeEncoding)
getMagicMimeEncoding m f = liftIO $ fmap snd <$> getMagicMime m f
#ifdef WITH_MAGICMIME
{-# NOINLINE mutex #-}
mutex :: MVar ()
mutex = unsafePerformIO $ newMVar ()
-- Work around a bug, the library is not concurrency safe and will
-- sometimes access the wrong memory if multiple ones are called at the
-- same time.
magicConcurrentSafe :: IO a -> IO a
magicConcurrentSafe = bracket_ (takeMVar mutex) (putMVar mutex ())
#endif

121
Annex/MetaData.hs Normal file
View file

@ -0,0 +1,121 @@
{- git-annex metadata
-
- Copyright 2014-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.MetaData (
genMetaData,
dateMetaData,
parseModMeta,
parseMetaDataMatcher,
module X
) where
import Annex.Common
import qualified Annex
import Types.MetaData as X
import Annex.MetaData.StandardFields as X
import Logs.MetaData
import Annex.CatFile
import Utility.Glob
import qualified Data.Set as S
import qualified Data.Text as T
import Data.Time.Calendar
import Data.Time.Clock
import Data.Time.Clock.POSIX
import Text.Read
{- Adds metadata for a file that has just been ingested into the
- annex, but has not yet been committed to git.
-
- When the file has been modified, the metadata is copied over
- from the old key to the new key. Note that it looks at the old key as
- committed to HEAD -- the new key may or may not have already been staged
- in the index.
-
- Also, can generate new metadata, if configured to do so.
-}
genMetaData :: Key -> RawFilePath -> Maybe POSIXTime -> Annex ()
genMetaData key file mmtime = do
catKeyFileHEAD file >>= \case
Nothing -> noop
Just oldkey ->
-- Have to copy first, before adding any
-- more metadata, because copyMetaData does not
-- preserve any metadata already on key.
whenM (copyMetaData oldkey key <&&> (not <$> onlydatemeta oldkey)) $
warncopied
whenM (annexGenMetaData <$> Annex.getGitConfig) $
case mmtime of
Just mtime -> do
old <- getCurrentMetaData key
addMetaData key $
dateMetaData (posixSecondsToUTCTime mtime) old
Nothing -> noop
where
warncopied = warning $ UnquotedString $
"Copied metadata from old version of " ++ fromRawFilePath file ++ " to new version. " ++
"If you don't want this copied metadata, run: git annex metadata --remove-all " ++ fromRawFilePath file
-- If the only fields copied were date metadata, and they'll
-- be overwritten with the current mtime, no need to warn about
-- copying.
onlydatemeta oldkey = ifM (annexGenMetaData <$> Annex.getGitConfig)
( null . filter (not . isDateMetaField . fst) . fromMetaData
<$> getCurrentMetaData oldkey
, return False
)
{- Generates metadata for a file's date stamp.
-
- Any date fields in the old metadata will be overwritten.
-
- Note that the returned MetaData does not contain all the input MetaData,
- only changes to add the date fields. -}
dateMetaData :: UTCTime -> MetaData -> MetaData
dateMetaData mtime old = modMeta old $
(SetMeta yearMetaField $ S.singleton $ toMetaValue $ encodeBS $ show y)
`ComposeModMeta`
(SetMeta monthMetaField $ S.singleton $ toMetaValue $ encodeBS $ show m)
`ComposeModMeta`
(SetMeta dayMetaField $ S.singleton $ toMetaValue $ encodeBS $ show d)
where
(y, m, d) = toGregorian $ utctDay mtime
{- Parses field=value, field+=value, field-=value, field?=value -}
parseModMeta :: String -> Either String ModMeta
parseModMeta p = case lastMaybe f of
Just '+' -> AddMeta <$> mkMetaField (T.pack f') <*> v
Just '-' -> DelMeta <$> mkMetaField (T.pack f') <*> (Just <$> v)
Just '?' -> MaybeSetMeta <$> mkMetaField (T.pack f') <*> v
_ -> SetMeta <$> mkMetaField (T.pack f) <*> (S.singleton <$> v)
where
(f, sv) = separate (== '=') p
f' = beginning f
v = pure (toMetaValue (encodeBS sv))
{- Parses field=value, field<value, field<=value, field>value, field>=value -}
parseMetaDataMatcher :: String -> Either String (MetaField, MetaValue -> Bool)
parseMetaDataMatcher p = (,)
<$> mkMetaField (T.pack f)
<*> pure matcher
where
(f, op_v) = break (`elem` "=<>") p
matcher = case op_v of
('=':v) -> checkglob v
('<':'=':v) -> checkcmp (<=) (<=) v
('<':v) -> checkcmp (<) (<) v
('>':'=':v) -> checkcmp (>=) (>=) v
('>':v) -> checkcmp (>) (>) v
_ -> checkglob ""
checkglob v =
let cglob = compileGlob v CaseInsensitive (GlobFilePath False)
in matchGlob cglob . decodeBS . fromMetaValue
checkcmp cmp cmp' v mv' =
let v' = decodeBS (fromMetaValue mv')
in case (doubleval v, doubleval v') of
(Just d, Just d') -> d' `cmp` d
_ -> v' `cmp'` v
doubleval v = readMaybe v :: Maybe Double

View file

@ -0,0 +1,67 @@
{- git-annex metadata, standard fields
-
- Copyright 2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.MetaData.StandardFields (
tagMetaField,
yearMetaField,
monthMetaField,
dayMetaField,
isDateMetaField,
lastChangedField,
mkLastChangedField,
isLastChangedField,
itemIdField
) where
import Types.MetaData
import qualified Data.Text as T
import Data.Monoid
import Prelude
tagMetaField :: MetaField
tagMetaField = mkMetaFieldUnchecked "tag"
yearMetaField :: MetaField
yearMetaField = mkMetaFieldUnchecked "year"
monthMetaField :: MetaField
monthMetaField = mkMetaFieldUnchecked "month"
dayMetaField :: MetaField
dayMetaField = mkMetaFieldUnchecked "day"
isDateMetaField :: MetaField -> Bool
isDateMetaField f
| f == yearMetaField = True
| f == monthMetaField = True
| f == dayMetaField = True
| otherwise = False
lastChangedField :: MetaField
lastChangedField = mkMetaFieldUnchecked lastchanged
mkLastChangedField :: MetaField -> MetaField
mkLastChangedField f = mkMetaFieldUnchecked (fromMetaField f <> lastchangedSuffix)
isLastChangedField :: MetaField -> Bool
isLastChangedField f
| f == lastChangedField = True
| otherwise = lastchanged `T.isSuffixOf` s && s /= lastchangedSuffix
where
s = fromMetaField f
lastchanged :: T.Text
lastchanged = "lastchanged"
lastchangedSuffix :: T.Text
lastchangedSuffix = "-lastchanged"
itemIdField :: MetaField
itemIdField = mkMetaFieldUnchecked "itemid"

44
Annex/Multicast.hs Normal file
View file

@ -0,0 +1,44 @@
{- git-annex multicast receive callback
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Multicast where
import Annex.Path
import Utility.Env
import Utility.PartialPrelude
import System.Process
import System.IO
import GHC.IO.Handle.FD
import Control.Applicative
import Prelude
multicastReceiveEnv :: String
multicastReceiveEnv = "GIT_ANNEX_MULTICAST_RECEIVE"
multicastCallbackEnv :: IO (FilePath, [(String, String)], Handle)
multicastCallbackEnv = do
gitannex <- programPath
-- This will even work on Windows
(rfd, wfd) <- createPipeFd
rh <- fdToHandle rfd
environ <- addEntry multicastReceiveEnv (show wfd) <$> getEnvironment
return (gitannex, environ, rh)
-- This is run when uftpd has received a file. Rather than move
-- the file into the annex here, which would require starting up the
-- Annex monad, parsing git config, and verifying the content, simply
-- output to the specified FD the filename. This keeps the time
-- that uftpd is not receiving the next file as short as possible.
runMulticastReceive :: [String] -> String -> IO ()
runMulticastReceive ("-I":_sessionid:fs) hs = case readish hs of
Just fd -> do
h <- fdToHandle fd
mapM_ (hPutStrLn h) fs
hClose h
Nothing -> return ()
runMulticastReceive _ _ = return ()

108
Annex/Notification.hs Normal file
View file

@ -0,0 +1,108 @@
{- git-annex desktop notifications
-
- Copyright 2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
{-# LANGUAGE CPP #-}
module Annex.Notification (NotifyWitness, noNotification, notifyTransfer, notifyDrop) where
import Annex.Common
import Types.Transfer
#ifdef WITH_DBUS_NOTIFICATIONS
import qualified Annex
import Types.DesktopNotify
import qualified DBus.Notify as Notify
import qualified DBus.Client
#endif
-- Witness that notification has happened.
data NotifyWitness = NotifyWitness
-- Only use when no notification should be done.
noNotification :: NotifyWitness
noNotification = NotifyWitness
{- Wrap around an action that performs a transfer, which may run multiple
- attempts. Displays notification when supported and when the user asked
- for it. -}
notifyTransfer :: Transferrable t => Observable v => Direction -> t -> (NotifyWitness -> Annex v) -> Annex v
#ifdef WITH_DBUS_NOTIFICATIONS
notifyTransfer direction t a = case descTransfrerrable t of
Nothing -> a NotifyWitness
Just desc -> do
wanted <- Annex.getRead Annex.desktopnotify
if (notifyStart wanted || notifyFinish wanted)
then do
client <- liftIO DBus.Client.connectSession
startnotification <- liftIO $ if notifyStart wanted
then Just <$> Notify.notify client (startedTransferNote direction desc)
else pure Nothing
res <- a NotifyWitness
let ok = observeBool res
when (notifyFinish wanted) $ liftIO $ void $ maybe
(Notify.notify client $ finishedTransferNote ok direction desc)
(\n -> Notify.replace client n $ finishedTransferNote ok direction desc)
startnotification
return res
else a NotifyWitness
#else
notifyTransfer _ _ a = a NotifyWitness
#endif
notifyDrop :: AssociatedFile -> Bool -> Annex ()
notifyDrop (AssociatedFile Nothing) _ = noop
#ifdef WITH_DBUS_NOTIFICATIONS
notifyDrop (AssociatedFile (Just f)) ok = do
wanted <- Annex.getRead Annex.desktopnotify
when (notifyFinish wanted) $ liftIO $ do
client <- DBus.Client.connectSession
void $ Notify.notify client (droppedNote ok (fromRawFilePath f))
#else
notifyDrop (AssociatedFile (Just _)) _ = noop
#endif
#ifdef WITH_DBUS_NOTIFICATIONS
startedTransferNote :: Direction -> String -> Notify.Note
startedTransferNote Upload = mkNote Notify.Transfer Notify.Low iconUpload
"Uploading"
startedTransferNote Download = mkNote Notify.Transfer Notify.Low iconDownload
"Downloading"
finishedTransferNote :: Bool -> Direction -> String -> Notify.Note
finishedTransferNote False Upload = mkNote Notify.TransferError Notify.Normal iconFailure
"Failed to upload"
finishedTransferNote False Download = mkNote Notify.TransferError Notify.Normal iconFailure
"Failed to download"
finishedTransferNote True Upload = mkNote Notify.TransferComplete Notify.Low iconSuccess
"Finished uploading"
finishedTransferNote True Download = mkNote Notify.TransferComplete Notify.Low iconSuccess
"Finished downloading"
droppedNote :: Bool -> String -> Notify.Note
droppedNote False = mkNote Notify.TransferError Notify.Normal iconFailure
"Failed to drop"
droppedNote True = mkNote Notify.TransferComplete Notify.Low iconSuccess
"Dropped"
iconUpload, iconDownload, iconFailure, iconSuccess :: String
iconUpload = "network-transmit"
iconDownload = "network-receive"
iconFailure = "dialog-error"
iconSuccess = "git-annex" -- Is there a standard icon for success/completion?
mkNote :: Notify.Category -> Notify.UrgencyLevel -> String -> String -> FilePath -> Notify.Note
mkNote category urgency icon desc path = Notify.blankNote
{ Notify.appName = "git-annex"
, Notify.appImage = Just (Notify.Icon icon)
, Notify.summary = desc ++ " " ++ path
, Notify.hints =
[ Notify.Category category
, Notify.Urgency urgency
, Notify.SuppressSound True
]
}
#endif

406
Annex/NumCopies.hs Normal file
View file

@ -0,0 +1,406 @@
{- git-annex numcopies configuration and checking
-
- Copyright 2014-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE ScopedTypeVariables, DeriveDataTypeable, OverloadedStrings #-}
module Annex.NumCopies (
module Types.NumCopies,
module Logs.NumCopies,
getFileNumMinCopies,
getSafestNumMinCopies,
getSafestNumMinCopies',
getGlobalFileNumCopies,
getNumCopies,
getMinCopies,
deprecatedNumCopies,
defaultNumCopies,
numCopiesCheck,
numCopiesCheck',
numCopiesCheck'',
numCopiesCount,
verifyEnoughCopiesToDrop,
verifiableCopies,
UnVerifiedCopy(..),
) where
import Annex.Common
import qualified Annex
import Annex.SafeDropProof
import Types.NumCopies
import Logs.NumCopies
import Logs.Trust
import Logs.Cluster
import Annex.CheckAttr
import qualified Remote
import qualified Types.Remote as Remote
import Annex.Content
import Annex.UUID
import Annex.CatFile
import qualified Database.Keys
import Control.Exception
import qualified Control.Monad.Catch as MC
import Data.Typeable
import qualified Data.Set as S
import qualified Data.Map as M
defaultNumCopies :: NumCopies
defaultNumCopies = configuredNumCopies 1
defaultMinCopies :: MinCopies
defaultMinCopies = configuredMinCopies 1
fromSourcesOr :: v -> [Annex (Maybe v)] -> Annex v
fromSourcesOr v = fromMaybe v <$$> getM id
{- The git config annex.numcopies is deprecated. -}
deprecatedNumCopies :: Annex (Maybe NumCopies)
deprecatedNumCopies = annexNumCopies <$> Annex.getGitConfig
{- Value forced on the command line by --numcopies. -}
getForcedNumCopies :: Annex (Maybe NumCopies)
getForcedNumCopies = Annex.getRead Annex.forcenumcopies
{- Value forced on the command line by --mincopies. -}
getForcedMinCopies :: Annex (Maybe MinCopies)
getForcedMinCopies = Annex.getRead Annex.forcemincopies
{- NumCopies value from any of the non-.gitattributes configuration
- sources. -}
getNumCopies :: Annex NumCopies
getNumCopies = fromSourcesOr defaultNumCopies
[ getForcedNumCopies
, getGlobalNumCopies
, deprecatedNumCopies
]
{- MinCopies value from any of the non-.gitattributes configuration
- sources. -}
getMinCopies :: Annex MinCopies
getMinCopies = fromSourcesOr defaultMinCopies
[ getForcedMinCopies
, getGlobalMinCopies
]
{- NumCopies and MinCopies value for a file, from any configuration source,
- including .gitattributes. -}
getFileNumMinCopies :: RawFilePath -> Annex (NumCopies, MinCopies)
getFileNumMinCopies f = do
fnumc <- getForcedNumCopies
fminc <- getForcedMinCopies
case (fnumc, fminc) of
(Just numc, Just minc) -> return (numc, minc)
(Just numc, Nothing) -> do
minc <- fromSourcesOr defaultMinCopies
[ snd <$> getNumMinCopiesAttr f
, getGlobalMinCopies
]
return (numc, minc)
(Nothing, Just minc) -> do
numc <- fromSourcesOr defaultNumCopies
[ fst <$> getNumMinCopiesAttr f
, getGlobalNumCopies
, deprecatedNumCopies
]
return (numc, minc)
(Nothing, Nothing) -> do
let fallbacknum = fromSourcesOr defaultNumCopies
[ getGlobalNumCopies
, deprecatedNumCopies
]
let fallbackmin = fromSourcesOr defaultMinCopies
[ getGlobalMinCopies
]
getNumMinCopiesAttr f >>= \case
(Just numc, Just minc) ->
return (numc, minc)
(Just numc, Nothing) -> (,)
<$> pure numc
<*> fallbackmin
(Nothing, Just minc) -> (,)
<$> fallbacknum
<*> pure minc
(Nothing, Nothing) -> (,)
<$> fallbacknum
<*> fallbackmin
{- Gets the highest NumCopies and MinCopies value for all files
- associated with a key. Provide any known associated file;
- the rest are looked up from the database.
-
- Using this when dropping, rather than getFileNumMinCopies
- avoids dropping one file that has a smaller value violating
- the value set for another file that uses the same content.
-}
getSafestNumMinCopies :: AssociatedFile -> Key -> Annex (NumCopies, MinCopies)
getSafestNumMinCopies afile k =
Database.Keys.getAssociatedFilesIncluding afile k
>>= getSafestNumMinCopies' afile k
getSafestNumMinCopies' :: AssociatedFile -> Key -> [RawFilePath] -> Annex (NumCopies, MinCopies)
getSafestNumMinCopies' afile k fs = do
l <- mapM getFileNumMinCopies fs
let l' = zip l fs
(,)
<$> findmax fst l' getNumCopies
<*> findmax snd l' getMinCopies
where
-- Some associated files in the keys database may no longer
-- correspond to files in the repository.
-- (But the AssociatedFile passed to this is known to be
-- an associated file, which may not be in the keys database
-- yet, so checking it is skipped.)
stillassociated f
| AssociatedFile (Just f) == afile = return True
| otherwise = catKeyFile f >>= \case
Just k' | k' == k -> return True
_ -> return False
-- Avoid calling stillassociated on every file; just make sure
-- that the one with the highest value is still associated.
findmax _ [] fallback = fallback
findmax getv l fallback = do
let n = maximum (map (getv . fst) l)
let (maxls, l') = partition (\(x, _) -> getv x == n) l
ifM (anyM stillassociated (map snd maxls))
( return n
, findmax getv l' fallback
)
{- This is the globally visible numcopies value for a file. So it does
- not include local configuration in the git config or command line
- options. -}
getGlobalFileNumCopies :: RawFilePath -> Annex NumCopies
getGlobalFileNumCopies f = fromSourcesOr defaultNumCopies
[ fst <$> getNumMinCopiesAttr f
, getGlobalNumCopies
]
getNumMinCopiesAttr :: RawFilePath -> Annex (Maybe NumCopies, Maybe MinCopies)
getNumMinCopiesAttr file =
checkAttrs ["annex.numcopies", "annex.mincopies"] file >>= \case
(n:m:[]) -> return
( configuredNumCopies <$> readish n
, configuredMinCopies <$> readish m
)
_ -> error "internal"
{- Checks if numcopies are satisfied for a file by running a comparison
- between the number of (not untrusted) copies that are
- believed to exist, and the configured value.
-
- This is good enough for everything except dropping the file, which
- requires active verification of the copies.
-}
numCopiesCheck :: RawFilePath -> Key -> (Int -> Int -> v) -> Annex v
numCopiesCheck file key vs = do
have <- trustExclude UnTrusted =<< Remote.keyLocations key
numCopiesCheck' file vs have
numCopiesCheck' :: RawFilePath -> (Int -> Int -> v) -> [UUID] -> Annex v
numCopiesCheck' file vs have = do
needed <- fst <$> getFileNumMinCopies file
let nhave = numCopiesCount have
explain (ActionItemTreeFile file) $ Just $ UnquotedString $
"has " ++ show nhave ++ " " ++ pluralCopies nhave ++
", and the configured annex.numcopies is " ++ show needed
return $ numCopiesCheck'' have vs needed
numCopiesCheck'' :: [UUID] -> (Int -> Int -> v) -> NumCopies -> v
numCopiesCheck'' have vs needed =
let nhave = numCopiesCount have
in nhave `vs` fromNumCopies needed
{- When a key is logged as present in a node of the cluster,
- the cluster's UUID will also be in the list, but is not a
- distinct copy.
-}
numCopiesCount :: [UUID] -> Int
numCopiesCount = length . filter (not . isClusterUUID)
data UnVerifiedCopy = UnVerifiedRemote Remote | UnVerifiedHere
deriving (Ord, Eq)
{- Verifies that enough copies of a key exist among the listed remotes,
- to safely drop it, running an action with a proof if so, and
- printing an informative message if not.
-
- Note that the proof is checked to still be valid at the current time
- before running the action, but when dropping the key may take some time,
- the proof's time may need to be checked again.
-}
verifyEnoughCopiesToDrop
:: String -- message to print when there are no known locations
-> Key
-> Maybe UUID -- repo dropping from
-> Maybe ContentRemovalLock
-> NumCopies
-> MinCopies
-> [UUID] -- repos to skip considering (generally untrusted remotes)
-> [VerifiedCopy] -- copies already verified to exist
-> [UnVerifiedCopy] -- places to check to see if they have copies
-> (SafeDropProof -> Annex a) -- action to perform the drop
-> Annex a -- action to perform when unable to drop
-> Annex a
verifyEnoughCopiesToDrop nolocmsg key dropfrom removallock neednum needmin skip preverified tocheck dropaction nodropaction =
helper [] [] preverified (nub tocheck) []
where
helper bad missing have [] lockunsupported =
liftIO (mkSafeDropProof neednum needmin have removallock) >>= \case
Right proof -> checkprooftime proof
Left stillhave -> do
notEnoughCopies key dropfrom neednum needmin stillhave (skip++missing) bad nolocmsg lockunsupported
nodropaction
helper bad missing have (c:cs) lockunsupported
| isSafeDrop neednum needmin have removallock =
liftIO (mkSafeDropProof neednum needmin have removallock) >>= \case
Right proof -> checkprooftime proof
Left stillhave -> helper bad missing stillhave (c:cs) lockunsupported
| otherwise = case c of
UnVerifiedHere -> lockContentShared key Nothing contverified
UnVerifiedRemote r
-- Skip cluster uuids because locking is
-- not supported with them, instead will
-- lock individual nodes.
| isClusterUUID (Remote.uuid r) -> helper bad missing have cs lockunsupported
| otherwise -> checkremote r contverified $
let lockunsupported' = r : lockunsupported
in Remote.hasKey r key >>= \case
Right True -> helper bad missing (mkVerifiedCopy RecentlyVerifiedCopy r : have) cs lockunsupported'
Left _ -> helper (r:bad) missing have cs lockunsupported'
Right False -> helper bad (Remote.uuid r:missing) have cs lockunsupported'
where
contverified vc = helper bad missing (vc : have) cs lockunsupported
checkremote r cont fallback = case Remote.lockContent r of
Just lockcontent -> do
-- The remote's lockContent will throw an exception
-- when it is unable to lock, in which case the
-- fallback should be run.
--
-- On the other hand, the continuation could itself
-- throw an exception (ie, the eventual drop action
-- fails), and in this case we don't want to run the
-- fallback since part of the drop action may have
-- already been performed.
--
-- Differentiate between these two sorts
-- of exceptions by using DropException.
let a = lockcontent key $ \v ->
cont v `catchNonAsync` (throw . DropException)
a `MC.catches`
[ MC.Handler (\ (e :: AsyncException) -> throwM e)
, MC.Handler (\ (e :: SomeAsyncException) -> throwM e)
, MC.Handler (\ (DropException e') -> throwM e')
, MC.Handler (\ (_e :: SomeException) -> fallback)
]
Nothing -> fallback
checkprooftime proof =
ifM (liftIO $ checkSafeDropProofEndTime (Just proof))
( dropaction proof
, do
safeDropProofExpired
nodropaction
)
data DropException = DropException SomeException
deriving (Typeable, Show)
instance Exception DropException
notEnoughCopies :: Key -> Maybe UUID -> NumCopies -> MinCopies -> [VerifiedCopy] -> [UUID] -> [Remote] -> String -> [Remote] -> Annex ()
notEnoughCopies key dropfrom neednum needmin have skip bad nolocmsg lockunsupported = do
showNote "unsafe"
if length have < fromNumCopies neednum
then showLongNote $ UnquotedString $
if fromNumCopies neednum == 1
then "Could not verify the existence of the 1 necessary copy."
else "Could only verify the existence of " ++
show (length have) ++ " out of " ++ show (fromNumCopies neednum) ++
" necessary " ++ pluralCopies (fromNumCopies neednum) ++ "."
else do
showLongNote $ UnquotedString $ "Unable to lock down " ++ show (fromMinCopies needmin) ++
" " ++ pluralCopies (fromMinCopies needmin) ++
" of file necessary to safely drop it."
if null lockunsupported
then showLongNote "(This could have happened because of a concurrent drop, or because a remote has too old a version of git-annex-shell installed.)"
else showLongNote $ UnquotedString $ "These remotes do not support locking: "
++ Remote.listRemoteNames lockunsupported
Remote.showTriedRemotes bad
-- When dropping from a cluster, don't suggest making the nodes of
-- the cluster available
clusternodes <- case mkClusterUUID =<< dropfrom of
Nothing -> pure []
Just cu -> do
clusters <- getClusters
pure $ maybe [] (map fromClusterNodeUUID . S.toList) $
M.lookup cu (clusterUUIDs clusters)
let excludeset = S.fromList $ map toUUID have++skip++clusternodes
-- Don't suggest making a cluster available when dropping from its
-- node.
let exclude u
| u `S.member` excludeset = pure True
| otherwise = case (dropfrom, mkClusterUUID u) of
(Just dropfrom', Just cu) -> do
clusters <- getClusters
pure $ case M.lookup cu (clusterUUIDs clusters) of
Just nodes ->
ClusterNodeUUID dropfrom'
`S.member` nodes
Nothing -> False
_ -> pure False
Remote.showLocations True key exclude nolocmsg
pluralCopies :: Int -> String
pluralCopies 1 = "copy"
pluralCopies _ = "copies"
{- Finds locations of a key that can be used to get VerifiedCopies,
- in order to allow dropping the key.
-
- Provide a list of UUIDs that the key is being dropped from.
- The returned lists will exclude any of those UUIDs.
-
- The return lists also exclude any repositories that are untrusted,
- since those should not be used for verification.
-
- When dropping from a cluster UUID, its nodes are excluded.
-
- Cluster UUIDs are also excluded since locking a key on a cluster
- is done by locking on individual nodes.
-
- The UnVerifiedCopy list is cost ordered.
- The VerifiedCopy list contains repositories that are trusted to
- contain the key.
-}
verifiableCopies :: Key -> [UUID] -> Annex ([UnVerifiedCopy], [VerifiedCopy])
verifiableCopies key exclude = do
locs <- filter (not . isClusterUUID) <$> Remote.keyLocations key
(remotes, trusteduuids) <- Remote.remoteLocations (Remote.IncludeIgnored False) locs
=<< trustGet Trusted
clusternodes <- if any isClusterUUID exclude
then do
clusters <- getClusters
pure $ concatMap (getclusternodes clusters) exclude
else pure []
untrusteduuids <- trustGet UnTrusted
let exclude' = exclude ++ untrusteduuids ++ clusternodes
let remotes' = Remote.remotesWithoutUUID remotes (exclude' ++ trusteduuids)
let verified = map (mkVerifiedCopy TrustedCopy) $
filter (`notElem` exclude') trusteduuids
u <- getUUID
let herec = if u `elem` locs && u `notElem` exclude'
then [UnVerifiedHere]
else []
return (herec ++ map UnVerifiedRemote remotes', verified)
where
getclusternodes clusters u = case mkClusterUUID u of
Just cu -> maybe [] (map fromClusterNodeUUID . S.toList) $
M.lookup cu (clusterUUIDs clusters)
Nothing -> []

129
Annex/Path.hs Normal file
View file

@ -0,0 +1,129 @@
{- git-annex program path
-
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Path (
programPath,
readProgramFile,
gitAnnexChildProcess,
gitAnnexChildProcessParams,
gitAnnexDaemonizeParams,
cleanStandaloneEnvironment,
) where
import Annex.Common
import Config.Files
import Utility.Env
import Annex.PidLock
import qualified Annex
import System.Environment (getExecutablePath, getArgs, getProgName)
{- A fully qualified path to the currently running git-annex program.
-
- getExecutablePath is used when possible. On OSs it supports
- well, it returns the complete path to the program. But, on other OSs,
- it might return just the basename. Fall back to reading the programFile,
- or searching for the command name in PATH.
-
- The standalone build runs git-annex via ld.so, and defeats
- getExecutablePath. It sets GIT_ANNEX_DIR to the location of the
- standalone build directory, and there are wrapper scripts for git-annex
- and git-annex-shell in that directory.
-}
programPath :: IO FilePath
programPath = go =<< getEnv "GIT_ANNEX_DIR"
where
go (Just dir) = do
name <- getProgName
return (dir </> name)
go Nothing = do
exe <- getExecutablePath
p <- if isAbsolute exe
then return exe
else fromMaybe exe <$> readProgramFile
maybe cannotFindProgram return =<< searchPath p
{- Returns the path for git-annex that is recorded in the programFile. -}
readProgramFile :: IO (Maybe FilePath)
readProgramFile = do
programfile <- programFile
headMaybe . lines <$> readFile programfile
cannotFindProgram :: IO a
cannotFindProgram = do
f <- programFile
giveup $ "cannot find git-annex program in PATH or in " ++ f
{- Runs a git-annex child process.
-
- Like runsGitAnnexChildProcessViaGit, when pid locking is in use,
- this takes the pid lock, while running it, and sets an env var
- that prevents the child process trying to take the pid lock,
- to avoid it deadlocking.
-}
gitAnnexChildProcess
:: String
-> [CommandParam]
-> (CreateProcess -> CreateProcess)
-> (Maybe Handle -> Maybe Handle -> Maybe Handle -> ProcessHandle -> IO a)
-> Annex a
gitAnnexChildProcess subcmd ps f a = do
cmd <- liftIO programPath
ps' <- gitAnnexChildProcessParams subcmd ps
pidLockChildProcess cmd ps' f a
{- Parameters to pass to a git-annex child process to run a subcommand
- with some parameters.
-
- Includes -c values that were passed on the git-annex command line
- or due to options like --debug being enabled.
-}
gitAnnexChildProcessParams :: String -> [CommandParam] -> Annex [CommandParam]
gitAnnexChildProcessParams subcmd ps = do
cps <- gitAnnexGitConfigOverrides
force <- Annex.getRead Annex.force
let cps' = if force
then Param "--force" : cps
else cps
return (Param subcmd : cps' ++ ps)
gitAnnexGitConfigOverrides :: Annex [CommandParam]
gitAnnexGitConfigOverrides = concatMap (\c -> [Param "-c", Param c])
<$> Annex.getGitConfigOverrides
{- Parameters to pass to git-annex when re-running the current command
- to daemonize it. Used with Utility.Daemon.daemonize. -}
gitAnnexDaemonizeParams :: Annex [CommandParam]
gitAnnexDaemonizeParams = do
-- This includes -c parameters passed to git, as well as ones
-- passed to git-annex.
cps <- gitAnnexGitConfigOverrides
-- Get every parameter git-annex was run with.
ps <- liftIO getArgs
return (map Param ps ++ cps)
{- Returns a cleaned up environment that lacks path and other settings
- used to make the standalone builds use their bundled libraries and programs.
- Useful when calling programs not included in the standalone builds.
-
- For a non-standalone build, returns Nothing.
-}
cleanStandaloneEnvironment :: IO (Maybe [(String, String)])
cleanStandaloneEnvironment = clean <$> getEnvironment
where
clean environ
| null vars = Nothing
| otherwise = Just $ catMaybes $ map (restoreorig environ) environ
where
vars = words $ fromMaybe "" $
lookup "GIT_ANNEX_STANDLONE_ENV" environ
restoreorig oldenviron p@(k, _v)
| k `elem` vars = case lookup ("ORIG_" ++ k) oldenviron of
(Just v')
| not (null v') -> Just (k, v')
_ -> Nothing
| otherwise = Just p

374
Annex/Perms.hs Normal file
View file

@ -0,0 +1,374 @@
{- git-annex file permissions
-
- Copyright 2012-2023 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Perms (
FileMode,
setAnnexFilePerm,
setAnnexDirPerm,
resetAnnexFilePerm,
annexFileMode,
createAnnexDirectory,
createWorkTreeDirectory,
freezeContent,
freezeContent',
freezeContent'',
checkContentWritePerm,
checkContentWritePerm',
thawContent,
thawContent',
createContentDir,
freezeContentDir,
thawContentDir,
modifyContentDir,
modifyContentDirWhenExists,
withShared,
hasFreezeHook,
hasThawHook,
) where
import Annex.Common
import Utility.FileMode
import Git
import Git.ConfigTypes
import qualified Annex
import Annex.Version
import Types.RepoVersion
import Config
import Utility.Directory.Create
import qualified Utility.RawFilePath as R
import System.PosixCompat.Files (fileMode, intersectFileModes, nullFileMode, groupWriteMode, ownerWriteMode, ownerReadMode, groupReadMode, otherReadMode, stdFileMode, ownerExecuteMode, groupExecuteMode, otherExecuteMode, setGroupIDMode)
withShared :: (SharedRepository -> Annex a) -> Annex a
withShared a = a =<< coreSharedRepository <$> Annex.getGitConfig
setAnnexFilePerm :: RawFilePath -> Annex ()
setAnnexFilePerm = setAnnexPerm False
setAnnexDirPerm :: RawFilePath -> Annex ()
setAnnexDirPerm = setAnnexPerm True
{- Sets appropriate file mode for a file or directory in the annex,
- other than the content files and content directory. Normally,
- don't change the mode, but with core.sharedRepository set,
- allow the group to write, etc. -}
setAnnexPerm :: Bool -> RawFilePath -> Annex ()
setAnnexPerm isdir file = setAnnexPerm' Nothing isdir >>= \go -> liftIO (go file)
setAnnexPerm' :: Maybe ([FileMode] -> FileMode -> FileMode) -> Bool -> Annex (RawFilePath -> IO ())
setAnnexPerm' modef isdir = ifM crippledFileSystem
( return (const noop)
, withShared $ \s -> return $ \file -> go s file
)
where
go GroupShared file = void $ tryIO $ modifyFileMode file $ modef' $
groupSharedModes ++
if isdir then [ ownerExecuteMode, groupExecuteMode ] else []
go AllShared file = void $ tryIO $ modifyFileMode file $ modef' $
readModes ++
[ ownerWriteMode, groupWriteMode ] ++
if isdir then executeModes else []
go UnShared file = case modef of
Nothing -> noop
Just f -> void $ tryIO $
modifyFileMode file $ f []
go (UmaskShared n) file = void $ tryIO $ R.setFileMode file $
if isdir then umaskSharedDirectory n else n
modef' = fromMaybe addModes modef
resetAnnexFilePerm :: RawFilePath -> Annex ()
resetAnnexFilePerm = resetAnnexPerm False
{- Like setAnnexPerm, but ignores the current mode of the file entirely,
- and sets the same mode that the umask would result in when creating a
- new file.
-
- Useful eg, after creating a temporary file with locked down modes,
- which is going to be moved to a non-temporary location and needs
- usual modes.
-}
resetAnnexPerm :: Bool -> RawFilePath -> Annex ()
resetAnnexPerm isdir file = unlessM crippledFileSystem $ do
defmode <- liftIO defaultFileMode
let modef moremodes _oldmode = addModes moremodes defmode
setAnnexPerm' (Just modef) isdir >>= \go -> liftIO (go file)
{- Creates a ModeSetter which can be used for creating a file in the annex
- (other than content files, which are locked down more). -}
annexFileMode :: Annex ModeSetter
annexFileMode = do
modesetter <- setAnnexPerm' Nothing False
withShared (\s -> pure $ mk s modesetter)
where
mk GroupShared = ModeSetter stdFileMode
mk AllShared = ModeSetter stdFileMode
mk UnShared = ModeSetter stdFileMode
mk (UmaskShared mode) = ModeSetter mode
{- Creates a directory inside the gitAnnexDir (or possibly the dbdir),
- creating any parent directories up to and including the gitAnnexDir.
- Makes directories with appropriate permissions. -}
createAnnexDirectory :: RawFilePath -> Annex ()
createAnnexDirectory dir = do
top <- parentDir <$> fromRepo gitAnnexDir
tops <- annexDbDir <$> Annex.getGitConfig >>= return . \case
Nothing -> [top]
Just dbdir -> [top, parentDir (parentDir dbdir)]
createDirectoryUnder' tops dir createdir
where
createdir p = do
liftIO $ R.createDirectory p
setAnnexDirPerm p
{- Create a directory in the git work tree, creating any parent
- directories up to the top of the work tree.
-
- Uses default permissions.
-}
createWorkTreeDirectory :: RawFilePath -> Annex ()
createWorkTreeDirectory dir = do
fromRepo repoWorkTree >>= liftIO . \case
Just wt -> createDirectoryUnder [wt] dir
-- Should never happen, but let whatever tries to write
-- to the directory be what throws an exception, as that
-- will be clearer than an exception from here.
Nothing -> noop
{- Normally, blocks writing to an annexed file, and modifies file
- permissions to allow reading it.
-
- Before v9, when core.sharedRepository is set, the write bits are not
- removed from the file, but instead the appropriate group write bits
- are set. This is necessary to let other users in the group lock the file.
- v9 improved this by using separate lock files, so the content file does
- not need to be writable when using it.
-
- In a shared repository, the current user may not be able to change
- a file owned by another user, so failure to change modes is ignored.
-
- Note that, on Linux, xattrs can sometimes prevent removing
- certain permissions from a file with chmod. (Maybe some ACLs too?)
- In such a case, this will return with the file still having some mode
- it should not normally have. checkContentWritePerm can detect when
- that happens with write permissions.
-}
freezeContent :: RawFilePath -> Annex ()
freezeContent file =
withShared $ \sr -> freezeContent' sr file
freezeContent' :: SharedRepository -> RawFilePath -> Annex ()
freezeContent' sr file = freezeContent'' sr file =<< getVersion
freezeContent'' :: SharedRepository -> RawFilePath -> Maybe RepoVersion -> Annex ()
freezeContent'' sr file rv = do
fastDebug "Annex.Perms" ("freezing content " ++ fromRawFilePath file)
unlessM crippledFileSystem $ go sr
freezeHook file
where
go UnShared = liftIO $ nowriteadd [ownerReadMode]
go GroupShared = if versionNeedsWritableContentFiles rv
then liftIO $ ignoresharederr $ modmode $ addModes
[ownerReadMode, groupReadMode, ownerWriteMode, groupWriteMode]
else liftIO $ ignoresharederr $
nowriteadd [ownerReadMode, groupReadMode]
go AllShared = if versionNeedsWritableContentFiles rv
then liftIO $ ignoresharederr $ modmode $ addModes
(readModes ++ writeModes)
else liftIO $ ignoresharederr $
nowriteadd readModes
go (UmaskShared n) = if versionNeedsWritableContentFiles rv
-- Assume that the configured mode includes write bits
-- for all users who should be able to lock the file, so
-- don't need to add any write modes.
then liftIO $ ignoresharederr $ modmode $ const n
else liftIO $ ignoresharederr $ modmode $ const $
removeModes writeModes n
ignoresharederr = void . tryIO
modmode = modifyFileMode file
nowriteadd readmodes = modmode $
removeModes writeModes .
addModes readmodes
{- Checks if the write permissions are as freezeContent should set them.
-
- When the repository is shared, the user may not be able to change
- permissions of a file owned by another user. So if the permissions seem
- wrong, but the repository is shared, returns Nothing. If the permissions
- are wrong otherwise, returns Just False.
-
- When there is a freeze hook, it may prevent write in some way other than
- permissions. One use of a freeze hook is when the filesystem does not
- support removing write permissions, so when there is such a hook
- write permissions are ignored.
-}
checkContentWritePerm :: RawFilePath -> Annex (Maybe Bool)
checkContentWritePerm file = ifM crippledFileSystem
( return (Just True)
, do
rv <- getVersion
hasfreezehook <- hasFreezeHook
withShared $ \sr ->
liftIO $ checkContentWritePerm' sr file rv hasfreezehook
)
checkContentWritePerm' :: SharedRepository -> RawFilePath -> Maybe RepoVersion -> Bool -> IO (Maybe Bool)
checkContentWritePerm' sr file rv hasfreezehook
| hasfreezehook = return (Just True)
| otherwise = case sr of
UnShared -> want Just (excludemodes writeModes)
GroupShared
| versionNeedsWritableContentFiles rv -> want sharedret
(includemodes [ownerWriteMode, groupWriteMode])
| otherwise -> want sharedret (excludemodes writeModes)
AllShared
| versionNeedsWritableContentFiles rv ->
want sharedret (includemodes writeModes)
| otherwise -> want sharedret (excludemodes writeModes)
UmaskShared n
| versionNeedsWritableContentFiles rv -> want sharedret
(\havemode -> havemode == n)
| otherwise -> want sharedret
(\havemode -> havemode == removeModes writeModes n)
where
want mk f = catchMaybeIO (fileMode <$> R.getFileStatus file)
>>= return . \case
Just havemode -> mk (f havemode)
Nothing -> mk True
includemodes l havemode = havemode == combineModes (havemode:l)
excludemodes l havemode = all (\m -> intersectFileModes m havemode == nullFileMode) l
sharedret True = Just True
sharedret False = Nothing
{- Allows writing to an annexed file that freezeContent was called on
- before. -}
thawContent :: RawFilePath -> Annex ()
thawContent file = withShared $ \sr -> thawContent' sr file
thawContent' :: SharedRepository -> RawFilePath -> Annex ()
thawContent' sr file = do
fastDebug "Annex.Perms" ("thawing content " ++ fromRawFilePath file)
thawPerms (go sr) (thawHook file)
where
go GroupShared = liftIO $ void $ tryIO $ groupWriteRead file
go AllShared = liftIO $ void $ tryIO $ groupWriteRead file
go UnShared = liftIO $ allowWrite file
go (UmaskShared n) = liftIO $ void $ tryIO $ R.setFileMode file n
{- Runs an action that thaws a file's permissions. This will probably
- fail on a crippled filesystem. But, if file modes are supported on a
- crippled filesystem, the file may be frozen, so try to thaw its
- permissions. -}
thawPerms :: Annex () -> Annex () -> Annex ()
thawPerms a hook = ifM crippledFileSystem
( hook >> void (tryNonAsync a)
, hook >> a
)
{- Blocks writing to the directory an annexed file is in, to prevent the
- file accidentally being deleted. However, if core.sharedRepository
- is set, this is not done, since the group must be allowed to delete the
- file without eing able to thaw the directory.
-}
freezeContentDir :: RawFilePath -> Annex ()
freezeContentDir file = do
fastDebug "Annex.Perms" ("freezing content directory " ++ fromRawFilePath dir)
unlessM crippledFileSystem $ withShared go
freezeHook dir
where
dir = parentDir file
go UnShared = liftIO $ preventWrite dir
go GroupShared = liftIO $ void $ tryIO $ groupWriteRead dir
go AllShared = liftIO $ void $ tryIO $ groupWriteRead dir
go (UmaskShared n) = liftIO $ void $ tryIO $ R.setFileMode dir $
umaskSharedDirectory $
-- If n includes group or other write mode, leave them set
-- to allow them to delete the file without being able to
-- thaw the directory.
removeModes [ownerWriteMode] n
thawContentDir :: RawFilePath -> Annex ()
thawContentDir file = do
fastDebug "Annex.Perms" ("thawing content directory " ++ fromRawFilePath dir)
thawPerms (withShared (liftIO . go)) (thawHook dir)
where
dir = parentDir file
go UnShared = allowWrite dir
go GroupShared = allowWrite dir
go AllShared = allowWrite dir
go (UmaskShared n) = R.setFileMode dir n
{- Makes the directory tree to store an annexed file's content,
- with appropriate permissions on each level. -}
createContentDir :: RawFilePath -> Annex ()
createContentDir dest = do
unlessM (liftIO $ R.doesPathExist dir) $
createAnnexDirectory dir
-- might have already existed with restricted perms
thawHook dir
unlessM crippledFileSystem $ liftIO $ allowWrite dir
where
dir = parentDir dest
{- Creates the content directory for a file if it doesn't already exist,
- or thaws it if it does, then runs an action to modify a file in the
- directory, and finally, freezes the content directory. -}
modifyContentDir :: RawFilePath -> Annex a -> Annex a
modifyContentDir f a = do
createContentDir f -- also thaws it
v <- tryNonAsync a
freezeContentDir f
either throwM return v
{- Like modifyContentDir, but avoids creating the content directory if it
- does not already exist. In that case, the action will probably fail. -}
modifyContentDirWhenExists :: RawFilePath -> Annex a -> Annex a
modifyContentDirWhenExists f a = do
thawContentDir f
v <- tryNonAsync a
freezeContentDir f
either throwM return v
hasFreezeHook :: Annex Bool
hasFreezeHook = isJust . annexFreezeContentCommand <$> Annex.getGitConfig
hasThawHook :: Annex Bool
hasThawHook = isJust . annexThawContentCommand <$> Annex.getGitConfig
freezeHook :: RawFilePath -> Annex ()
freezeHook p = maybe noop go =<< annexFreezeContentCommand <$> Annex.getGitConfig
where
go basecmd = void $ liftIO $
boolSystem "sh" [Param "-c", Param $ gencmd basecmd]
gencmd = massReplace [ ("%path", shellEscape (fromRawFilePath p)) ]
thawHook :: RawFilePath -> Annex ()
thawHook p = maybe noop go =<< annexThawContentCommand <$> Annex.getGitConfig
where
go basecmd = void $ liftIO $
boolSystem "sh" [Param "-c", Param $ gencmd basecmd]
gencmd = massReplace [ ("%path", shellEscape (fromRawFilePath p)) ]
{- Calculate mode to use for a directory from the mode to use for a file.
-
- This corresponds to git's handling of core.sharedRepository=0xxx
-}
umaskSharedDirectory :: FileMode -> FileMode
umaskSharedDirectory n = flip addModes n $ map snd $ filter fst
[ (isset ownerReadMode, ownerExecuteMode)
, (isset groupReadMode, groupExecuteMode)
, (isset otherReadMode, otherExecuteMode)
, (isset groupReadMode || isset groupWriteMode, setGroupIDMode)
]
where
isset v = checkMode v n

131
Annex/PidLock.hs Normal file
View file

@ -0,0 +1,131 @@
{- Pid locking support.
-
- Copyright 2014-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.PidLock where
import Annex.Common
import Git
#ifndef mingw32_HOST_OS
import Git.Env
import Annex.GitOverlay
import qualified Utility.LockFile.PidLock as PidF
import qualified Utility.LockPool.PidLock as PidP
import Utility.LockPool (dropLock)
import Utility.Env
import Config
#endif
{- When pid locking is in use, this tries to take the pid lock (unless
- the process already has it), and if successful, holds it while
- running the child process. The child process is run with an env var
- set, which prevents it from trying to take the pid lock itself.
-
- This way, any locking the parent does will not get in the way of
- the child. The child is assumed to not do any locking that conflicts
- with the parent, but if it did happen to do that, it would be noticed
- when git-annex is used without pid locking.
-
- If another process is already holding the pid lock, the child process
- is still run, but without setting the env var, so it can try to take the
- pid lock itself, and fail however is appropriate for it in that
- situation.
-}
pidLockChildProcess
:: FilePath
-> [CommandParam]
-> (CreateProcess -> CreateProcess)
-> (Maybe Handle -> Maybe Handle -> Maybe Handle -> ProcessHandle -> IO a)
-> Annex a
pidLockChildProcess cmd ps f a = do
let p = f (proc cmd (toCommand ps))
let gonopidlock = withCreateProcess p a
#ifndef mingw32_HOST_OS
pidLockFile >>= liftIO . \case
Nothing -> gonopidlock
Just pidlock -> bracket
(setup pidlock)
cleanup
(go gonopidlock p pidlock)
where
setup pidlock = fmap fst <$> PidP.tryLock' pidlock
cleanup (Just h) = dropLock h
cleanup Nothing = return ()
go gonopidlock _ _ Nothing = gonopidlock
go _ p pidlock (Just _h) = do
v <- PidF.pidLockEnv pidlock
baseenv <- case env p of
Nothing -> getEnvironment
Just baseenv -> pure baseenv
let p' = p { env = Just ((v, PidF.pidLockEnvValue) : baseenv) }
withCreateProcess p' a
#else
liftIO gonopidlock
#endif
{- Wrap around actions that may run a git-annex child process via a git
- command.
-
- This is like pidLockChildProcess, but rather than running a process
- itself, it runs the action with a modified Annex state that passes the
- necessary env var when running git.
-}
runsGitAnnexChildProcessViaGit :: Annex a -> Annex a
#ifndef mingw32_HOST_OS
runsGitAnnexChildProcessViaGit a = pidLockFile >>= \case
Nothing -> a
Just pidlock -> bracket (setup pidlock) cleanup (go pidlock)
where
setup pidlock = liftIO $ fmap fst <$> PidP.tryLock' pidlock
cleanup (Just h) = liftIO $ dropLock h
cleanup Nothing = return ()
go _ Nothing = a
go pidlock (Just _h) = do
v <- liftIO $ PidF.pidLockEnv pidlock
let addenv g = do
g' <- liftIO $ addGitEnv g v PidF.pidLockEnvValue
return (g', ())
let rmenv oldg g
| any (\(k, _) -> k == v) (fromMaybe [] (Git.gitEnv oldg)) = g
| otherwise =
let e' = case Git.gitEnv g of
Just e -> Just (delEntry v e)
Nothing -> Nothing
in g { Git.gitEnv = e' }
withAltRepo addenv rmenv (const a)
#else
runsGitAnnexChildProcessViaGit a = a
#endif
{- Like runsGitAnnexChildProcessViaGit, but the Annex state is not
- modified. Instead the input Repo's state is modified to set the
- necessary env var when git is run in that Repo.
-}
runsGitAnnexChildProcessViaGit' :: Git.Repo -> (Git.Repo -> Annex a) -> Annex a
#ifndef mingw32_HOST_OS
runsGitAnnexChildProcessViaGit' r a = pidLockFile >>= \case
Nothing -> a r
Just pidlock -> bracketIO (setup pidlock) cleanup (go pidlock)
where
setup pidlock = fmap fst <$> PidP.tryLock' pidlock
cleanup (Just h) = dropLock h
cleanup Nothing = return ()
go _ Nothing = a r
go pidlock (Just _h) = do
v <- liftIO $ PidF.pidLockEnv pidlock
r' <- liftIO $ addGitEnv r v PidF.pidLockEnvValue
a r'
#else
runsGitAnnexChildProcessViaGit' r a = a r
#endif

370
Annex/Proxy.hs Normal file
View file

@ -0,0 +1,370 @@
{- proxying
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Proxy where
import Annex.Common
import qualified Annex
import qualified Remote
import qualified Types.Remote as Remote
import qualified Remote.Git
import P2P.Proxy
import P2P.Protocol
import P2P.IO
import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
import Annex.Concurrent
import Annex.Tmp
import Annex.Verify
import Annex.UUID
import Logs.Proxy
import Logs.Cluster
import Logs.UUID
import Logs.Location
import Utility.Tmp.Dir
import Utility.Metered
import Git.Types
import qualified Database.Export as Export
import Control.Concurrent.STM
import Control.Concurrent.Async
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified System.FilePath.ByteString as P
import qualified Data.Map as M
import qualified Data.Set as S
proxyRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide
proxyRemoteSide clientmaxversion bypass r
| Remote.remotetype r == Remote.Git.remote =
proxyGitRemoteSide clientmaxversion bypass r
| otherwise =
proxySpecialRemoteSide clientmaxversion r
proxyGitRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide
proxyGitRemoteSide clientmaxversion bypass r = mkRemoteSide r $
openP2PShellConnection' r clientmaxversion bypass >>= \case
Just conn@(OpenConnection (remoterunst, remoteconn, _)) ->
return $ Just
( remoterunst
, remoteconn
, void $ liftIO $ closeP2PShellConnection conn
)
_ -> return Nothing
proxySpecialRemoteSide :: ProtocolVersion -> Remote -> Annex RemoteSide
proxySpecialRemoteSide clientmaxversion r = mkRemoteSide r $ do
let protoversion = min clientmaxversion maxProtocolVersion
remoterunst <- Serving (Remote.uuid r) Nothing <$>
liftIO (newTVarIO protoversion)
ihdl <- liftIO newEmptyTMVarIO
ohdl <- liftIO newEmptyTMVarIO
iwaitv <- liftIO newEmptyTMVarIO
owaitv <- liftIO newEmptyTMVarIO
iclosedv <- liftIO newEmptyTMVarIO
oclosedv <- liftIO newEmptyTMVarIO
exportdb <- ifM (Remote.isExportSupported r)
( Just <$> Export.openDb (Remote.uuid r)
, pure Nothing
)
worker <- liftIO . async =<< forkState
(proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv exportdb)
let remoteconn = P2PConnection
{ connRepo = Nothing
, connCheckAuth = const False
, connIhdl = P2PHandleTMVar ihdl (Just iwaitv) iclosedv
, connOhdl = P2PHandleTMVar ohdl (Just owaitv) oclosedv
, connIdent = ConnIdent (Just (Remote.name r))
}
let closeremoteconn = do
liftIO $ atomically $ putTMVar oclosedv ()
join $ liftIO (wait worker)
maybe noop Export.closeDb exportdb
return $ Just
( remoterunst
, remoteconn
, closeremoteconn
)
-- Proxy for the special remote, speaking the P2P protocol.
proxySpecialRemote
:: ProtocolVersion
-> Remote
-> TMVar (Either L.ByteString Message)
-> TMVar (Either L.ByteString Message)
-> TMVar ()
-> TMVar ()
-> Maybe Export.ExportHandle
-> Annex ()
proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go
where
go :: Annex ()
go = liftIO receivemessage >>= \case
Just (CHECKPRESENT k) -> do
tryNonAsync (Remote.checkPresent r k) >>= \case
Right True -> liftIO $ sendmessage SUCCESS
Right False -> liftIO $ sendmessage FAILURE
Left err -> liftIO $ propagateerror err
go
Just (LOCKCONTENT _) -> do
-- Special remotes do not support locking content.
liftIO $ sendmessage FAILURE
go
Just (REMOVE k) -> do
tryNonAsync (Remote.removeKey r Nothing k) >>= \case
Right () -> liftIO $ sendmessage SUCCESS
Left err -> liftIO $ propagateerror err
go
Just (PUT (ProtoAssociatedFile af) k) -> do
proxyput af k
go
Just (GET offset (ProtoAssociatedFile af) k) -> do
proxyget offset af k
go
Just (BYPASS _) -> go
Just (CONNECT _) ->
-- Not supported and the protocol ends here.
liftIO $ sendmessage $ CONNECTDONE (ExitFailure 1)
Just NOTIFYCHANGE -> do
liftIO $ sendmessage $
ERROR "NOTIFYCHANGE unsupported for a special remote"
go
Just _ -> giveup "protocol error"
Nothing -> return ()
receivemessage = liftIO (atomically recv) >>= \case
Right (Right m) -> return (Just m)
Right (Left _b) -> giveup "unexpected ByteString received from P2P MVar"
Left () -> return Nothing
where
recv =
(Right <$> takeTMVar ohdl)
`orElse`
(Left <$> readTMVar oclosedv)
receivebytestring = atomically recv >>= \case
Right (Left b) -> return (Just b)
Right (Right _m) -> giveup "did not receive ByteString from P2P MVar"
Left () -> return Nothing
where
recv =
(Right <$> takeTMVar ohdl)
`orElse`
(Left <$> readTMVar oclosedv)
sendmessage m = atomically $ putTMVar ihdl (Right m)
sendbytestring b = atomically $ putTMVar ihdl (Left b)
propagateerror err = sendmessage $ ERROR $
"proxied special remote reports: " ++ show err
-- Not using gitAnnexTmpObjectLocation because there might be
-- several concurrent GET and PUTs of the same key being proxied
-- from this special remote or others, and each needs to happen
-- independently. Also, this key is not getting added into the
-- local annex objects.
withproxytmpfile k a = withOtherTmp $ \othertmpdir ->
withTmpDirIn (fromRawFilePath othertmpdir) "proxy" $ \tmpdir ->
a (toRawFilePath tmpdir P.</> keyFile k)
-- Verify the content received from the client, to avoid bad content
-- being stored in the special remote.
proxyput af k = do
liftIO $ sendmessage $ PUT_FROM (Offset 0)
withproxytmpfile k $ \tmpfile -> do
let store = tryNonAsync (storeput k af (decodeBS tmpfile)) >>= \case
Right () -> liftIO $ sendmessage SUCCESS
Left err -> liftIO $ propagateerror err
liftIO receivemessage >>= \case
Just (DATA (Len len)) -> do
iv <- startVerifyKeyContentIncrementally Remote.AlwaysVerify k
h <- liftIO $ openFile (fromRawFilePath tmpfile) WriteMode
gotall <- liftIO $ receivetofile iv h len
liftIO $ hClose h
verified <- if gotall
then fst <$> finishVerifyKeyContentIncrementally' True iv
else pure False
if protoversion > ProtocolVersion 1
then liftIO receivemessage >>= \case
Just (VALIDITY Valid)
| verified -> store
| otherwise -> liftIO $ sendmessage FAILURE
Just (VALIDITY Invalid) ->
liftIO $ sendmessage FAILURE
_ -> giveup "protocol error"
else store
_ -> giveup "protocol error"
liftIO $ removeWhenExistsWith removeFile (fromRawFilePath tmpfile)
storeput k af tmpfile = case mexportdb of
Just exportdb -> liftIO (Export.getExportTree exportdb k) >>= \case
[] -> storeputkey k af tmpfile
locs -> do
havelocs <- liftIO $ S.fromList
<$> Export.getExportedLocation exportdb k
let locs' = filter (`S.notMember` havelocs) locs
forM_ locs' $ \loc ->
storeputexport exportdb k loc tmpfile
liftIO $ Export.flushDbQueue exportdb
Nothing -> storeputkey k af tmpfile
storeputkey k af tmpfile =
Remote.storeKey r k af (Just tmpfile) nullMeterUpdate
storeputexport exportdb k loc tmpfile = do
Remote.storeExport (Remote.exportActions r) tmpfile k loc nullMeterUpdate
liftIO $ Export.addExportedLocation exportdb k loc
receivetofile iv h n = liftIO receivebytestring >>= \case
Just b -> do
liftIO $ atomically $
putTMVar owaitv ()
`orElse`
readTMVar oclosedv
n' <- storetofile iv h n (L.toChunks b)
-- Normally all the data is sent in a single
-- lazy bytestring. However, when the special
-- remote is a node in a cluster, a PUT is
-- streamed to it in multiple chunks.
if n' == 0
then return True
else receivetofile iv h n'
Nothing -> return False
storetofile _ _ n [] = pure n
storetofile iv h n (b:bs) = do
writeVerifyChunk iv h b
storetofile iv h (n - fromIntegral (B.length b)) bs
proxyget offset af k = withproxytmpfile k $ \tmpfile -> do
-- Don't verify the content from the remote,
-- because the client will do its own verification.
let vc = Remote.NoVerify
tryNonAsync (Remote.retrieveKeyFile r k af (fromRawFilePath tmpfile) nullMeterUpdate vc) >>= \case
Right _ -> liftIO $ senddata offset tmpfile
Left err -> liftIO $ propagateerror err
senddata (Offset offset) f = do
size <- fromIntegral <$> getFileSize f
let n = max 0 (size - offset)
sendmessage $ DATA (Len n)
withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do
hSeek h AbsoluteSeek offset
sendbs =<< L.hGetContents h
-- Important to keep the handle open until
-- the client responds. The bytestring
-- could still be lazily streaming out to
-- the client.
waitclientresponse
where
sendbs bs = do
sendbytestring bs
when (protoversion > ProtocolVersion 0) $
sendmessage (VALIDITY Valid)
waitclientresponse =
receivemessage >>= \case
Just SUCCESS -> return ()
Just FAILURE -> return ()
Just _ -> giveup "protocol error"
Nothing -> return ()
{- Check if this repository can proxy for a specified remote uuid,
- and if so enable proxying for it. -}
checkCanProxy :: UUID -> UUID -> Annex Bool
checkCanProxy remoteuuid myuuid = do
myproxies <- M.lookup myuuid <$> getProxies
checkCanProxy' myproxies remoteuuid >>= \case
Right v -> do
Annex.changeState $ \st -> st { Annex.proxyremote = Just v }
return True
Left Nothing -> return False
Left (Just err) -> giveup err
checkCanProxy' :: Maybe (S.Set Proxy) -> UUID -> Annex (Either (Maybe String) (Either ClusterUUID Remote))
checkCanProxy' Nothing _ = return (Left Nothing)
checkCanProxy' (Just proxies) remoteuuid =
case filter (\p -> proxyRemoteUUID p == remoteuuid) (S.toList proxies) of
[] -> notconfigured
ps -> case mkClusterUUID remoteuuid of
Just cu -> proxyforcluster cu
Nothing -> proxyfor ps
where
proxyfor ps = do
rs <- concat . Remote.byCost <$> Remote.remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
case canProxyForRemote rs ps myclusters remoteuuid of
Nothing -> notconfigured
Just r -> return (Right (Right r))
proxyforcluster cu = do
clusters <- getClusters
if M.member cu (clusterUUIDs clusters)
then return (Right (Left cu))
else notconfigured
notconfigured = M.lookup remoteuuid <$> uuidDescMap >>= \case
Just desc -> return $ Left $ Just $
"not configured to proxy for repository " ++ fromUUIDDesc desc
Nothing -> return $ Left Nothing
{- Remotes that this repository is configured to proxy for.
-
- When there are multiple remotes that access the same repository,
- this picks the lowest cost one that is configured to be used as a proxy.
-}
proxyForRemotes :: Annex [Remote]
proxyForRemotes = do
myuuid <- getUUID
(M.lookup myuuid <$> getProxies) >>= \case
Nothing -> return []
Just myproxies -> do
let myproxies' = S.toList myproxies
rs <- concat . Remote.byCost <$> Remote.remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
return $ mapMaybe (canProxyForRemote rs myproxies' myclusters . Remote.uuid) rs
-- Only proxy for a remote when the git configuration allows it.
-- This is important to prevent changes to the git-annex branch
-- causing unexpected proxying for remotes.
canProxyForRemote
:: [Remote] -- ^ must be sorted by cost
-> [Proxy]
-> M.Map RemoteName ClusterUUID
-> UUID
-> (Maybe Remote)
canProxyForRemote rs myproxies myclusters remoteuuid =
headMaybe $ filter canproxy rs
where
canproxy r =
sameuuid r &&
proxyisconfigured r &&
any (isproxyfor r) myproxies
sameuuid r = Remote.uuid r == remoteuuid
isproxyfor r p =
proxyRemoteUUID p == remoteuuid &&
Remote.name r == proxyRemoteName p
proxyisconfigured r
| remoteAnnexProxy (Remote.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $
filter (\p -> Remote.uuid p == proxyuuid) rs
Nothing -> False
mkProxyMethods :: ProxyMethods
mkProxyMethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing
, addedContent = \u k -> logChange k u InfoPresent
}

97
Annex/Queue.hs Normal file
View file

@ -0,0 +1,97 @@
{- git-annex command queue
-
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
module Annex.Queue (
addCommand,
addFlushAction,
addUpdateIndex,
flush,
flushWhenFull,
size,
get,
mergeFrom,
) where
import Annex.Common
import Annex hiding (new)
import Annex.LockFile
import qualified Git.Queue
import qualified Git.UpdateIndex
{- Adds a git command to the queue. -}
addCommand :: [CommandParam] -> String -> [CommandParam] -> [FilePath] -> Annex ()
addCommand commonparams command params files = do
q <- get
store =<< flushWhenFull =<<
(Git.Queue.addCommand commonparams command params files q =<< gitRepo)
addFlushAction :: Git.Queue.FlushActionRunner Annex -> [RawFilePath] -> Annex ()
addFlushAction runner files = do
q <- get
store =<< flushWhenFull =<<
(Git.Queue.addFlushAction runner files q =<< gitRepo)
{- Adds an update-index stream to the queue. -}
addUpdateIndex :: Git.UpdateIndex.Streamer -> Annex ()
addUpdateIndex streamer = do
q <- get
store =<< flushWhenFull =<<
(Git.Queue.addUpdateIndex streamer q =<< gitRepo)
{- Runs the queue if it is full. -}
flushWhenFull :: Git.Queue.Queue Annex -> Annex (Git.Queue.Queue Annex)
flushWhenFull q
| Git.Queue.full q = flush' q
| otherwise = return q
{- Runs (and empties) the queue. -}
flush :: Annex ()
flush = do
q <- get
unless (0 == Git.Queue.size q) $ do
store =<< flush' q
{- When there are multiple worker threads, each has its own queue.
- And of course multiple git-annex processes may be running each with its
- own queue.
-
- But, flushing two queues at the same time could lead to failures due to
- git locking files. So, only one queue is allowed to flush at a time.
-}
flush' :: Git.Queue.Queue Annex -> Annex (Git.Queue.Queue Annex)
flush' q = do
lck <- fromRepo gitAnnexGitQueueLock
withExclusiveLock lck $ do
showStoringStateAction
Git.Queue.flush q =<< gitRepo
{- Gets the size of the queue. -}
size :: Annex Int
size = Git.Queue.size <$> get
get :: Annex (Git.Queue.Queue Annex)
get = maybe new return =<< getState repoqueue
new :: Annex (Git.Queue.Queue Annex)
new = do
sz <- annexQueueSize <$> getGitConfig
q <- liftIO $ Git.Queue.new sz Nothing
store q
return q
store :: Git.Queue.Queue Annex -> Annex ()
store q = changeState $ \s -> s { repoqueue = Just q }
mergeFrom :: AnnexState -> Annex ()
mergeFrom st = case repoqueue st of
Nothing -> noop
Just newq -> do
q <- get
let !q' = Git.Queue.merge q newq
store =<< flushWhenFull q'

View file

@ -0,0 +1,96 @@
{- git-annex remote tracking branches
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.RemoteTrackingBranch
( RemoteTrackingBranch
, mkRemoteTrackingBranch
, fromRemoteTrackingBranch
, setRemoteTrackingBranch
, makeRemoteTrackingBranchMergeCommit
, makeRemoteTrackingBranchMergeCommit'
, getRemoteTrackingBranchImportHistory
) where
import Annex.Common
import Annex.CatFile
import qualified Annex
import Git.Types
import qualified Git.Ref
import qualified Git.Branch
import Git.History
import qualified Types.Remote as Remote
import qualified Data.Set as S
newtype RemoteTrackingBranch = RemoteTrackingBranch
{ fromRemoteTrackingBranch :: Ref }
deriving (Show, Eq)
{- Makes a remote tracking branch corresponding to a local branch.
- Note that the local branch does not need to exist yet. -}
mkRemoteTrackingBranch :: Remote -> Branch -> RemoteTrackingBranch
mkRemoteTrackingBranch remote ref = RemoteTrackingBranch $
Git.Ref.underBase ("refs/remotes/" ++ Remote.name remote) ref
{- Set remote tracking branch to point to a commit. -}
setRemoteTrackingBranch :: RemoteTrackingBranch -> Sha -> Annex ()
setRemoteTrackingBranch tb commit =
inRepo $ Git.Branch.update' (fromRemoteTrackingBranch tb) commit
{- Makes a merge commit that preserves the import history of the
- RemoteTrackingBranch, while grafting new git history into it.
-
- The second parent of the merge commit is the past history of the
- RemoteTrackingBranch as imported from a remote. When importing a
- history of trees from a remote, commits can be synthesized from
- them, but such commits won't have the same sha due to eg date differing.
- But since we know that the second parent consists entirely of such
- import commits, they can be reused when updating the
- RemoteTrackingBranch.
-}
makeRemoteTrackingBranchMergeCommit :: RemoteTrackingBranch -> Sha -> Annex Sha
makeRemoteTrackingBranchMergeCommit tb commitsha =
-- Check if the tracking branch exists.
inRepo (Git.Ref.sha (fromRemoteTrackingBranch tb)) >>= \case
Nothing -> return commitsha
Just _ -> inRepo (getHistoryToDepth 1 (fromRemoteTrackingBranch tb)) >>= \case
Nothing -> return commitsha
Just (History hc _) -> case historyCommitParents hc of
[_, importhistory] -> do
treesha <- maybe
(giveup $ "Unable to cat commit " ++ fromRef commitsha)
commitTree
<$> catCommit commitsha
makeRemoteTrackingBranchMergeCommit' commitsha importhistory treesha
-- Earlier versions of git-annex did not
-- make the merge commit, or perhaps
-- something else changed where the
-- tracking branch pointed.
_ -> return commitsha
makeRemoteTrackingBranchMergeCommit' :: Sha -> Sha -> Sha -> Annex Sha
makeRemoteTrackingBranchMergeCommit' commitsha importedhistory treesha = do
cmode <- annexCommitMode <$> Annex.getGitConfig
inRepo $ Git.Branch.commitTree
cmode
["remote tracking branch"]
[commitsha, importedhistory]
treesha
{- When makeRemoteTrackingBranchMergeCommit was used, this finds the
- import history, starting from the second parent of the merge commit.
-}
getRemoteTrackingBranchImportHistory :: History HistoryCommit -> Maybe (History HistoryCommit)
getRemoteTrackingBranchImportHistory (History hc s) =
case historyCommitParents hc of
[_, importhistory] -> go importhistory (S.toList s)
_ -> Nothing
where
go _ [] = Nothing
go i (h@(History hc' _):hs)
| historyCommit hc' == i = Just h
| otherwise = go i hs

87
Annex/ReplaceFile.hs Normal file
View file

@ -0,0 +1,87 @@
{- git-annex file replacing
-
- Copyright 2013-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.ReplaceFile (
replaceGitAnnexDirFile,
replaceGitDirFile,
replaceWorkTreeFile,
replaceFile,
replaceFile',
) where
import Annex.Common
import Annex.Tmp
import Annex.Perms
import Git
import Utility.Tmp.Dir
import Utility.Directory.Create
#ifndef mingw32_HOST_OS
import Utility.Path.Max
#endif
{- replaceFile on a file located inside the gitAnnexDir. -}
replaceGitAnnexDirFile :: FilePath -> (RawFilePath -> Annex a) -> Annex a
replaceGitAnnexDirFile = replaceFile createAnnexDirectory
{- replaceFile on a file located inside the .git directory. -}
replaceGitDirFile :: FilePath -> (RawFilePath -> Annex a) -> Annex a
replaceGitDirFile = replaceFile $ \dir -> do
top <- fromRepo localGitDir
liftIO $ createDirectoryUnder [top] dir
{- replaceFile on a worktree file. -}
replaceWorkTreeFile :: FilePath -> (RawFilePath -> Annex a) -> Annex a
replaceWorkTreeFile = replaceFile createWorkTreeDirectory
{- Replaces a possibly already existing file with a new version,
- atomically, by running an action.
-
- The action is passed the name of temp file, in a temp directory,
- which it can write to, and once done the temp file is moved into place
- and anything else in the temp directory is deleted.
-
- The action can throw an exception, in which case the temp directory
- will be deleted, and the existing file will be preserved.
-
- Throws an IO exception when it was unable to replace the file.
-
- The createdirectory action is only run when moving the file into place
- fails, and can create any parent directory structure needed.
-}
replaceFile :: (RawFilePath -> Annex ()) -> FilePath -> (RawFilePath -> Annex a) -> Annex a
replaceFile createdirectory file action = replaceFile' createdirectory file (const True) action
replaceFile' :: (RawFilePath -> Annex ()) -> FilePath -> (a -> Bool) -> (RawFilePath -> Annex a) -> Annex a
replaceFile' createdirectory file checkres action = withOtherTmp $ \othertmpdir -> do
let othertmpdir' = fromRawFilePath othertmpdir
#ifndef mingw32_HOST_OS
-- Use part of the filename as the template for the temp
-- directory. This does not need to be unique, but it
-- makes it more clear what this temp directory is for.
filemax <- liftIO $ fileNameLengthLimit othertmpdir'
let basetmp = take (filemax `div` 2) (takeFileName file)
#else
-- Windows has limits on the whole path length, so keep
-- it short.
let basetmp = "t"
#endif
withTmpDirIn othertmpdir' basetmp $ \tmpdir -> do
let tmpfile = toRawFilePath (tmpdir </> basetmp)
r <- action tmpfile
when (checkres r) $
replaceFileFrom tmpfile (toRawFilePath file) createdirectory
return r
replaceFileFrom :: RawFilePath -> RawFilePath -> (RawFilePath -> Annex ()) -> Annex ()
replaceFileFrom src dest createdirectory = go `catchIO` fallback
where
go = liftIO $ moveFile src dest
fallback _ = do
createdirectory (parentDir dest)
go

34
Annex/SafeDropProof.hs Normal file
View file

@ -0,0 +1,34 @@
{- git-annex safe drop proof
-
- Copyright 2014-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.SafeDropProof (
SafeDropProof,
safeDropProofEndTime,
safeDropProofExpired,
checkSafeDropProofEndTime,
) where
import Annex.Common
import Types.NumCopies
import Data.Time.Clock.POSIX
safeDropProofExpired :: Annex ()
safeDropProofExpired = do
showNote "unsafe"
showLongNote $ UnquotedString
"Dropping took too long, and locks may have expired."
checkSafeDropProofEndTime :: Maybe SafeDropProof -> IO Bool
checkSafeDropProofEndTime p = case safeDropProofEndTime =<< p of
Nothing -> return True
Just endtime -> do
now <- getPOSIXTime
return (endtime > now)

135
Annex/SpecialRemote.hs Normal file
View file

@ -0,0 +1,135 @@
{- git-annex special remote configuration
-
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.SpecialRemote (
module Annex.SpecialRemote,
module Annex.SpecialRemote.Config
) where
import Annex.Common
import Annex.SpecialRemote.Config
import Types.Remote (RemoteConfig, SetupStage(..), typename, setup)
import Types.GitConfig
import Types.ProposedAccepted
import Config
import Remote.List
import Logs.Remote
import Logs.Trust
import qualified Types.Remote as Remote
import Git.Types (RemoteName)
import Utility.SafeOutput
import qualified Data.Map as M
{- See if there's an existing special remote with this name.
-
- Remotes that are not dead come first in the list
- when a name appears multiple times. -}
findExisting :: RemoteName -> Annex [(UUID, RemoteConfig, Maybe (ConfigFrom UUID))]
findExisting name = do
(a, b) <- findExisting' name
return (a++b)
{- Dead remotes with the name are in the second list, all others in the
- first list. -}
findExisting' :: RemoteName -> Annex ([(UUID, RemoteConfig, Maybe (ConfigFrom UUID))], [(UUID, RemoteConfig, Maybe (ConfigFrom UUID))])
findExisting' name = do
t <- trustMap
partition (\(u, _, _) -> M.lookup u t /= Just DeadTrusted)
. findByRemoteConfig (\c -> lookupName c == Just name)
<$> Logs.Remote.remoteConfigMap
newConfig
:: RemoteName
-> Maybe (Sameas UUID)
-> RemoteConfig
-- ^ configuration provided by the user
-> M.Map UUID RemoteConfig
-- ^ configuration of other special remotes, to inherit from
-- when sameas is used
-> RemoteConfig
newConfig name sameas fromuser m = case sameas of
Nothing -> M.insert nameField (Proposed name) fromuser
Just (Sameas u) -> addSameasInherited m $ M.fromList
[ (sameasNameField, Proposed name)
, (sameasUUIDField, Proposed (fromUUID u))
] `M.union` fromuser
specialRemoteMap :: Annex (M.Map UUID RemoteName)
specialRemoteMap = do
m <- Logs.Remote.remoteConfigMap
return $ specialRemoteNameMap m
specialRemoteNameMap :: M.Map UUID RemoteConfig -> M.Map UUID RemoteName
specialRemoteNameMap = M.fromList . mapMaybe go . M.toList
where
go (u, c) = case lookupName c of
Nothing -> Nothing
Just n -> Just (u, n)
{- find the remote type -}
findType :: RemoteConfig -> Either String RemoteType
findType config = maybe unspecified (specified . fromProposedAccepted) $
M.lookup typeField config
where
unspecified = Left "Specify the type of remote with type="
specified s = case filter (findtype s) remoteTypes of
[] -> Left $ "Unknown remote type " ++ s
++ " (pick from: "
++ intercalate " " (map typename remoteTypes)
++ ")"
(t:_) -> Right t
findtype s i = typename i == s
autoEnable :: Annex ()
autoEnable = do
m <- autoEnableable
enabled <- getenabledremotes
forM_ (M.toList m) $ \(cu, c) -> unless (cu `M.member` enabled) $ do
let u = case findSameasUUID c of
Just (Sameas u') -> u'
Nothing -> cu
case (lookupName c, findType c) of
-- Avoid auto-enabling when the name contains a
-- control character, because git does not avoid
-- displaying control characters in the name of a
-- remote, and an attacker could leverage
-- autoenabling it as part of an attack.
(Just name, Right t) | safeOutput name == name -> do
showSideAction $ UnquotedString $ "Auto enabling special remote " ++ name
dummycfg <- liftIO dummyRemoteGitConfig
tryNonAsync (setup t (AutoEnable c) (Just u) Nothing c dummycfg) >>= \case
Left e -> warning (UnquotedString (show e))
Right (_c, _u) ->
when (cu /= u) $
setConfig (remoteAnnexConfig c "config-uuid") (fromUUID cu)
_ -> return ()
where
getenabledremotes = M.fromList
. map (\r -> (getcu r, r))
<$> remoteList
getcu r = fromMaybe
(Remote.uuid r)
(remoteAnnexConfigUUID (Remote.gitconfig r))
autoEnableable :: Annex (M.Map UUID RemoteConfig)
autoEnableable = do
tm <- trustMap
(M.filterWithKey (notdead tm) . M.filter configured)
<$> remoteConfigMap
where
configured c = fromMaybe False $
trueFalseParser' . fromProposedAccepted
=<< M.lookup autoEnableField c
notdead tm cu c =
let u = case findSameasUUID c of
Just (Sameas u') -> u'
Nothing -> cu
in lookupTrust' u tm /= DeadTrusted

View file

@ -0,0 +1,321 @@
{- git-annex special remote configuration
-
- Copyright 2019-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Annex.SpecialRemote.Config where
import Common
import Types.Remote (configParser)
import Types
import Types.UUID
import Types.ProposedAccepted
import Types.RemoteConfig
import Types.GitConfig
import Config.Cost
import qualified Data.Map as M
import qualified Data.Set as S
import Text.Read
import Data.Typeable
import GHC.Stack
newtype Sameas t = Sameas t
deriving (Show)
newtype ConfigFrom t = ConfigFrom t
deriving (Show)
{- The name of a configured remote is stored in its config using this key. -}
nameField :: RemoteConfigField
nameField = Accepted "name"
{- The name of a sameas remote is stored using this key instead.
- This prevents old versions of git-annex getting confused. -}
sameasNameField :: RemoteConfigField
sameasNameField = Accepted "sameas-name"
lookupName :: RemoteConfig -> Maybe String
lookupName c = fmap fromProposedAccepted $
M.lookup nameField c <|> M.lookup sameasNameField c
instance RemoteNameable RemoteConfig where
getRemoteName c = fromMaybe "" (lookupName c)
{- The uuid that a sameas remote is the same as is stored in this key. -}
sameasUUIDField :: RemoteConfigField
sameasUUIDField = Accepted "sameas-uuid"
{- The type of a remote is stored in its config using this key. -}
typeField :: RemoteConfigField
typeField = Accepted "type"
autoEnableField :: RemoteConfigField
autoEnableField = Accepted "autoenable"
costField :: RemoteConfigField
costField = Accepted "cost"
encryptionField :: RemoteConfigField
encryptionField = Accepted "encryption"
macField :: RemoteConfigField
macField = Accepted "mac"
cipherField :: RemoteConfigField
cipherField = Accepted "cipher"
cipherkeysField :: RemoteConfigField
cipherkeysField = Accepted "cipherkeys"
pubkeysField :: RemoteConfigField
pubkeysField = Accepted "pubkeys"
chunkField :: RemoteConfigField
chunkField = Accepted "chunk"
chunksizeField :: RemoteConfigField
chunksizeField = Accepted "chunksize"
embedCredsField :: RemoteConfigField
embedCredsField = Accepted "embedcreds"
preferreddirField :: RemoteConfigField
preferreddirField = Accepted "preferreddir"
exportTreeField :: RemoteConfigField
exportTreeField = Accepted "exporttree"
importTreeField :: RemoteConfigField
importTreeField = Accepted "importtree"
versioningField :: RemoteConfigField
versioningField = Accepted "versioning"
exportTree :: ParsedRemoteConfig -> Bool
exportTree = fromMaybe False . getRemoteConfigValue exportTreeField
importTree :: ParsedRemoteConfig -> Bool
importTree = fromMaybe False . getRemoteConfigValue importTreeField
isVersioning :: ParsedRemoteConfig -> Bool
isVersioning = fromMaybe False . getRemoteConfigValue versioningField
annexObjectsField :: RemoteConfigField
annexObjectsField = Accepted "annexobjects"
annexObjects :: ParsedRemoteConfig -> Bool
annexObjects = fromMaybe False . getRemoteConfigValue annexObjectsField
{- Parsers for fields that are common to all special remotes. -}
commonFieldParsers :: [RemoteConfigFieldParser]
commonFieldParsers =
[ optionalStringParser nameField
(FieldDesc "name for the special remote")
, optionalStringParser sameasNameField HiddenField
, optionalStringParser sameasUUIDField HiddenField
, autoEnableFieldParser
, costParser costField
(FieldDesc "default cost of this special remote")
, optionalStringParser preferreddirField
(FieldDesc "directory whose content is preferred")
] ++ essentialFieldParsers
{- Parsers for fields that are common to all special remotes, and are
- also essential to include in eg, annex:: urls. -}
essentialFieldParsers :: [RemoteConfigFieldParser]
essentialFieldParsers =
[ optionalStringParser typeField
(FieldDesc "type of special remote")
, yesNoParser exportTreeField (Just False)
(FieldDesc "export trees of files to this remote")
, yesNoParser importTreeField (Just False)
(FieldDesc "import trees of files from this remote")
, yesNoParser annexObjectsField (Just False)
(FieldDesc "store other objects in remote along with exported trees")
]
autoEnableFieldParser :: RemoteConfigFieldParser
autoEnableFieldParser = trueFalseParser autoEnableField (Just False)
(FieldDesc "automatically enable special remote")
{- A remote with sameas-uuid set will inherit these values from the config
- of that uuid. These values cannot be overridden in the remote's config. -}
sameasInherits :: S.Set RemoteConfigField
sameasInherits = S.fromList
-- encryption configuration is necessarily the same for two
-- remotes that access the same data store
[ encryptionField
, macField
, cipherField
, cipherkeysField
, pubkeysField
-- legacy chunking was either enabled or not, so has to be the same
-- across configs for remotes that access the same data
, chunksizeField
-- (new-style chunking does not have that limitation)
-- but there is no benefit to picking a different chunk size
-- for the sameas remote, since it's reading whatever chunks were
-- stored
, chunkField
]
{- Each RemoteConfig that has a sameas-uuid inherits some fields
- from it. Such fields can only be set by inheritance; the RemoteConfig
- cannot provide values from them. -}
addSameasInherited :: M.Map UUID RemoteConfig -> RemoteConfig -> RemoteConfig
addSameasInherited m c = case findSameasUUID c of
Nothing -> c
Just (Sameas sameasuuid) -> case M.lookup sameasuuid m of
Nothing -> c
Just parentc ->
M.withoutKeys c sameasInherits
`M.union`
M.restrictKeys parentc sameasInherits
findSameasUUID :: RemoteConfig -> Maybe (Sameas UUID)
findSameasUUID c = Sameas . toUUID . fromProposedAccepted
<$> M.lookup sameasUUIDField c
{- Remove any fields inherited from a sameas-uuid. When storing a
- RemoteConfig, those fields don't get stored, since they were already
- inherited. -}
removeSameasInherited :: RemoteConfig -> RemoteConfig
removeSameasInherited c = case M.lookup sameasUUIDField c of
Nothing -> c
Just _ -> M.withoutKeys c sameasInherits
{- Finds remote uuids with matching RemoteConfig. -}
findByRemoteConfig :: (RemoteConfig -> Bool) -> M.Map UUID RemoteConfig -> [(UUID, RemoteConfig, Maybe (ConfigFrom UUID))]
findByRemoteConfig matching = map sameasuuid . filter (matching . snd) . M.toList
where
sameasuuid (u, c) = case M.lookup sameasUUIDField c of
Nothing -> (u, c, Nothing)
Just u' -> (toUUID (fromProposedAccepted u'), c, Just (ConfigFrom u))
{- Extracts a value from ParsedRemoteConfig. -}
getRemoteConfigValue :: HasCallStack => Typeable v => RemoteConfigField -> ParsedRemoteConfig -> Maybe v
getRemoteConfigValue f (ParsedRemoteConfig m _) = case M.lookup f m of
Just (RemoteConfigValue v) -> case cast v of
Just v' -> Just v'
Nothing -> error $ unwords
[ "getRemoteConfigValue"
, fromProposedAccepted f
, "found value of unexpected type"
, show (typeOf v) ++ "."
, "This is a bug in git-annex!"
]
Nothing -> Nothing
{- Gets all fields that remoteConfigRestPassthrough matched. -}
getRemoteConfigPassedThrough :: ParsedRemoteConfig -> M.Map RemoteConfigField String
getRemoteConfigPassedThrough (ParsedRemoteConfig m _) =
flip M.mapMaybe m $ \(RemoteConfigValue v) ->
case cast v of
Just (PassedThrough s) -> Just s
Nothing -> Nothing
newtype PassedThrough = PassedThrough String
parsedRemoteConfig :: RemoteType -> RemoteConfig -> Annex ParsedRemoteConfig
parsedRemoteConfig t c = either (const emptycfg) id . parseRemoteConfig c
<$> configParser t c
where
emptycfg = ParsedRemoteConfig mempty c
parseRemoteConfig :: RemoteConfig -> RemoteConfigParser -> Either String ParsedRemoteConfig
parseRemoteConfig c rpc =
go [] c (remoteConfigFieldParsers rpc ++ commonFieldParsers)
where
go l c' [] =
let (passover, leftovers) = partition
(maybe (const False) fst (remoteConfigRestPassthrough rpc) . fst)
(M.toList c')
leftovers' = filter (notaccepted . fst) leftovers
in if not (null leftovers')
then Left $ "Unexpected parameters: " ++
unwords (map (fromProposedAccepted . fst) leftovers')
else
let m = M.fromList $
l ++ map (uncurry passthrough) passover
in Right (ParsedRemoteConfig m c)
go l c' (p:rest) = do
let f = parserForField p
(valueParser p) (M.lookup f c) c >>= \case
Just v -> go ((f,v):l) (M.delete f c') rest
Nothing -> go l (M.delete f c') rest
passthrough f v = (f, RemoteConfigValue (PassedThrough (fromProposedAccepted v)))
notaccepted (Proposed _) = True
notaccepted (Accepted _) = False
optionalStringParser :: RemoteConfigField -> FieldDesc -> RemoteConfigFieldParser
optionalStringParser f fielddesc = RemoteConfigFieldParser
{ parserForField = f
, valueParser = p
, fieldDesc = fielddesc
, valueDesc = Nothing
}
where
p (Just v) _c = Right (Just (RemoteConfigValue (fromProposedAccepted v)))
p Nothing _c = Right Nothing
yesNoParser :: RemoteConfigField -> Maybe Bool -> FieldDesc -> RemoteConfigFieldParser
yesNoParser f mdef fd = genParser yesno f mdef fd
(Just (ValueDesc "yes or no"))
where
yesno "yes" = Just True
yesno "no" = Just False
yesno _ = Nothing
trueFalseParser :: RemoteConfigField -> Maybe Bool -> FieldDesc -> RemoteConfigFieldParser
trueFalseParser f mdef fd = genParser trueFalseParser' f mdef fd
(Just (ValueDesc "true or false"))
-- Not using Git.Config.isTrueFalse because git supports
-- a lot of other values for true and false in its configs,
-- and this is not a git config and we want to avoid that mess.
trueFalseParser' :: String -> Maybe Bool
trueFalseParser' "true" = Just True
trueFalseParser' "false" = Just False
trueFalseParser' _ = Nothing
costParser :: RemoteConfigField -> FieldDesc -> RemoteConfigFieldParser
costParser f fd = genParser readcost f Nothing fd
(Just (ValueDesc "a number"))
where
readcost :: String -> Maybe Cost
readcost = readMaybe
genParser
:: Typeable t
=> (String -> Maybe t)
-> RemoteConfigField
-> Maybe t -- ^ default if not configured
-> FieldDesc
-> Maybe ValueDesc
-> RemoteConfigFieldParser
genParser parse f mdef fielddesc valuedesc = RemoteConfigFieldParser
{ parserForField = f
, valueParser = p
, fieldDesc = fielddesc
, valueDesc = valuedesc
}
where
p Nothing _c = Right (fmap RemoteConfigValue mdef)
p (Just v) _c = case parse (fromProposedAccepted v) of
Just b -> Right (Just (RemoteConfigValue b))
Nothing -> case v of
Accepted _ -> Right (fmap RemoteConfigValue mdef)
Proposed _ -> Left $
"Bad value for " ++ fromProposedAccepted f ++
case valuedesc of
Just (ValueDesc vd) ->
" (expected " ++ vd ++ ")"
Nothing -> ""

480
Annex/Ssh.hs Normal file
View file

@ -0,0 +1,480 @@
{- git-annex ssh interface, with connection caching
-
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Ssh (
ConsumeStdin(..),
SshCommand,
sshCommand,
sshOptions,
sshCacheDir,
sshReadPort,
forceSshCleanup,
sshOptionsEnv,
sshOptionsTo,
inRepoWithSshOptionsTo,
runSshOptions,
sshAskPassEnv,
runSshAskPass
) where
import Annex.Common
import Annex.LockFile
import qualified BuildInfo
import qualified Annex
import qualified Git
import qualified Git.Url
import Config
import Annex.Path
import Utility.Env
import Utility.Hash
import Types.CleanupActions
import Annex.Concurrent.Utility
import Types.Concurrency
import Git.Env
import Git.Ssh
import qualified Utility.RawFilePath as R
import Annex.Perms
#ifndef mingw32_HOST_OS
import Annex.LockPool
#endif
import Control.Concurrent.STM
import qualified Data.ByteString as S
import qualified System.FilePath.ByteString as P
{- Some ssh commands are fed stdin on a pipe and so should be allowed to
- consume it. But ssh commands that are not piped stdin should generally
- not be allowed to consume the process's stdin. -}
data ConsumeStdin = ConsumeStdin | NoConsumeStdin
{- Generates a command to ssh to a given host (or user@host) on a given
- port. This includes connection caching parameters, and any ssh-options.
- If GIT_SSH or GIT_SSH_COMMAND is enabled, they are used instead. -}
sshCommand :: ConsumeStdin -> (SshHost, Maybe SshPort) -> RemoteGitConfig -> SshCommand -> Annex (FilePath, [CommandParam])
sshCommand cs (host, port) gc remotecmd = ifM (liftIO safe_GIT_SSH)
( maybe go return
=<< liftIO (gitSsh' host port remotecmd (consumeStdinParams cs))
, go
)
where
go = do
ps <- sshOptions cs (host, port) gc []
return ("ssh", Param (fromSshHost host):ps++[Param remotecmd])
{- Generates parameters to ssh to a given host (or user@host) on a given
- port. This includes connection caching parameters, and any
- ssh-options. Note that the host to ssh to and the command to run
- are not included in the returned options. -}
sshOptions :: ConsumeStdin -> (SshHost, Maybe Integer) -> RemoteGitConfig -> [CommandParam] -> Annex [CommandParam]
sshOptions cs (host, port) gc opts = go =<< sshCachingInfo (host, port)
where
go (Nothing, params) = return $ mkparams cs params
go (Just socketfile, params) = do
prepSocket socketfile host (mkparams NoConsumeStdin params)
return $ mkparams cs params
mkparams cs' ps = concat
[ ps
, map Param (remoteAnnexSshOptions gc)
, opts
, portParams port
, consumeStdinParams cs'
, [Param "-T"]
]
{- Due to passing -n to GIT_SSH and GIT_SSH_COMMAND, some settings
- of those that expect exactly git's parameters will break. So only
- use those if the user set GIT_ANNEX_USE_GIT_SSH to say it's ok. -}
safe_GIT_SSH :: IO Bool
safe_GIT_SSH = (== Just "1") <$> getEnv "GIT_ANNEX_USE_GIT_SSH"
consumeStdinParams :: ConsumeStdin -> [CommandParam]
consumeStdinParams ConsumeStdin = []
consumeStdinParams NoConsumeStdin = [Param "-n"]
{- Returns a filename to use for a ssh connection caching socket, and
- parameters to enable ssh connection caching. -}
sshCachingInfo :: (SshHost, Maybe Integer) -> Annex (Maybe FilePath, [CommandParam])
sshCachingInfo (host, port) = go =<< sshCacheDir'
where
go (Right dir) =
liftIO (bestSocketPath $ dir P.</> hostport2socket host port) >>= return . \case
Nothing -> (Nothing, [])
Just socketfile ->
let socketfile' = fromRawFilePath socketfile
in (Just socketfile', sshConnectionCachingParams socketfile')
-- No connection caching with concurrency is not a good
-- combination, so warn the user.
go (Left whynocaching) = do
getConcurrency >>= \case
NonConcurrent -> return ()
Concurrent {} -> warnnocaching whynocaching
ConcurrentPerCpu -> warnnocaching whynocaching
return (Nothing, [])
warnnocaching whynocaching =
whenM (annexAdviceNoSshCaching <$> Annex.getGitConfig) $ do
warning $ UnquotedString nocachingwarning
warning $ UnquotedString whynocaching
nocachingwarning = unwords
[ "You have enabled concurrency, but git-annex is not able"
, "to use ssh connection caching. This may result in"
, "multiple ssh processes prompting for passwords at the"
, "same time."
]
{- Given an absolute path to use for a socket file,
- returns whichever is shorter of that or the relative path to the same
- file.
-
- If no path can be constructed that is a valid socket, returns Nothing. -}
bestSocketPath :: RawFilePath -> IO (Maybe RawFilePath)
bestSocketPath abssocketfile = do
relsocketfile <- liftIO $ relPathCwdToFile abssocketfile
let socketfile = if S.length abssocketfile <= S.length relsocketfile
then abssocketfile
else relsocketfile
return $ if valid_unix_socket_path socketfile sshgarbagelen
then Just socketfile
else Nothing
where
-- ssh appends a 16 char extension to the socket when setting it
-- up, which needs to be taken into account when checking
-- that a valid socket was constructed.
sshgarbagelen = 1+16
sshConnectionCachingParams :: FilePath -> [CommandParam]
sshConnectionCachingParams socketfile =
[ Param "-S", Param socketfile
, Param "-o", Param "ControlMaster=auto"
, Param "-o", Param "ControlPersist=yes"
]
sshSocketDirEnv :: String
sshSocketDirEnv = "GIT_ANNEX_SSH_SOCKET_DIR"
{- Returns the directory where ssh connection caching sockets can be
- stored.
-
- The directory will be created if it does not exist.
-}
sshCacheDir :: Annex (Maybe RawFilePath)
sshCacheDir = eitherToMaybe <$> sshCacheDir'
sshCacheDir' :: Annex (Either String RawFilePath)
sshCacheDir' =
ifM (fromMaybe BuildInfo.sshconnectioncaching . annexSshCaching <$> Annex.getGitConfig)
( ifM crippledFileSystem
( gettmpdir >>= \case
Nothing ->
return (Left crippledfswarning)
Just tmpdir ->
liftIO $ catchMsgIO $
usetmpdir tmpdir
, do
d <- fromRepo gitAnnexSshDir
createAnnexDirectory d
return (Right d)
)
, return (Left "annex.sshcaching is not set to true")
)
where
gettmpdir = liftIO $ getEnv sshSocketDirEnv
usetmpdir tmpdir = do
let socktmp = tmpdir </> "ssh"
createDirectoryIfMissing True socktmp
return (toRawFilePath socktmp)
crippledfswarning = unwords
[ "This repository is on a crippled filesystem, so unix named"
, "pipes probably don't work, and ssh connection caching"
, "relies on those. One workaround is to set"
, sshSocketDirEnv
, "to point to a directory on a non-crippled filesystem."
]
portParams :: Maybe Integer -> [CommandParam]
portParams Nothing = []
portParams (Just port) = [Param "-p", Param $ show port]
{- Prepare to use a socket file for ssh connection caching.
-
- When concurrency is enabled, this blocks until a ssh connection
- has been made to the host. So, any password prompting by ssh will
- happen in this call, and only one ssh process will prompt at a time.
-
- Locks the socket lock file to prevent other git-annex processes from
- stopping the ssh multiplexer on this socket.
-}
prepSocket :: FilePath -> SshHost -> [CommandParam] -> Annex ()
prepSocket socketfile sshhost sshparams = do
-- There could be stale ssh connections hanging around
-- from a previous git-annex run that was interrupted.
-- This must run only once, before we have made any ssh connection,
-- and any other prepSocket calls must block while it's run.
tv <- Annex.getRead Annex.sshstalecleaned
join $ liftIO $ atomically $ do
cleaned <- takeTMVar tv
if cleaned
then do
putTMVar tv cleaned
return noop
else return $ do
sshCleanup
liftIO $ atomically $ putTMVar tv True
-- Cleanup at shutdown.
Annex.addCleanupAction SshCachingCleanup sshCleanup
let socketlock = socket2lock socketfile
getConcurrency >>= \case
NonConcurrent -> return ()
Concurrent {} -> makeconnection socketlock
ConcurrentPerCpu -> makeconnection socketlock
lockFileCached socketlock
where
-- When the LockCache already has the socketlock in it,
-- the connection has already been started. Otherwise,
-- get the connection started now.
makeconnection socketlock = debugLocks $
whenM (isNothing <$> fromLockCache socketlock) $
-- See if ssh can connect in batch mode,
-- if so there's no need to block for a password
-- prompt.
unlessM (tryssh ["-o", "BatchMode=true"]) $
-- ssh needs to prompt (probably)
-- If the user enters the wrong password,
-- ssh will tell them, so we can ignore
-- failure.
void $ prompt $ tryssh []
-- Try to ssh to the host quietly. Returns True if ssh apparently
-- connected to the host successfully. If ssh failed to connect,
-- returns False.
-- Even if ssh is forced to run some specific command, this will
-- return True.
-- (Except there's an unlikely false positive where a forced
-- ssh command exits 255.)
tryssh extraps = liftIO $ withNullHandle $ \nullh -> do
let p = (proc "ssh" $ concat
[ extraps
, toCommand sshparams
, [fromSshHost sshhost, "true"]
])
{ std_out = UseHandle nullh
, std_err = UseHandle nullh
}
withCreateProcess p $ \_ _ _ pid -> do
exitcode <- waitForProcess pid
return $ case exitcode of
ExitFailure 255 -> False
_ -> True
{- Find ssh socket files.
-
- The check that the lock file exists makes only socket files
- that were set up by prepSocket be found. On some NFS systems,
- a deleted socket file may linger for a while under another filename;
- and this check makes such files be skipped since the corresponding lock
- file won't exist.
-}
enumSocketFiles :: Annex [FilePath]
enumSocketFiles = liftIO . go =<< sshCacheDir
where
go Nothing = return []
go (Just dir) = filterM (R.doesPathExist . socket2lock)
=<< filter (not . isLock)
<$> catchDefaultIO [] (dirContents (fromRawFilePath dir))
{- Stop any unused ssh connection caching processes. -}
sshCleanup :: Annex ()
sshCleanup = mapM_ cleanup =<< enumSocketFiles
where
cleanup socketfile = do
#ifndef mingw32_HOST_OS
-- Drop any shared lock we have, and take an
-- exclusive lock, without blocking. If the lock
-- succeeds, nothing is using this ssh, and it can
-- be stopped.
--
-- After ssh is stopped cannot remove the lock file;
-- other processes may be waiting on our exclusive
-- lock to use it.
let lockfile = socket2lock socketfile
unlockFile lockfile
mode <- annexFileMode
tryLockExclusive (Just mode) lockfile >>= \case
Nothing -> noop
Just lck -> do
forceStopSsh socketfile
liftIO $ dropLock lck
#else
forceStopSsh socketfile
#endif
{- Stop all ssh connection caching processes, even when they're in use. -}
forceSshCleanup :: Annex ()
forceSshCleanup = mapM_ forceStopSsh =<< enumSocketFiles
forceStopSsh :: FilePath -> Annex ()
forceStopSsh socketfile = withNullHandle $ \nullh -> do
let (dir, base) = splitFileName socketfile
let p = (proc "ssh" $ toCommand $
[ Param "-O", Param "stop" ] ++
sshConnectionCachingParams base ++
[Param "localhost"])
{ cwd = Just dir
-- "ssh -O stop" is noisy on stderr even with -q
, std_out = UseHandle nullh
, std_err = UseHandle nullh
}
void $ liftIO $ catchMaybeIO $ withCreateProcess p $ \_ _ _ pid ->
forceSuccessProcess p pid
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath socketfile)
{- This needs to be as short as possible, due to limitations on the length
- of the path to a socket file. At the same time, it needs to be unique
- for each host.
-}
hostport2socket :: SshHost -> Maybe Integer -> RawFilePath
hostport2socket host Nothing = hostport2socket' $ fromSshHost host
hostport2socket host (Just port) = hostport2socket' $
fromSshHost host ++ "!" ++ show port
hostport2socket' :: String -> RawFilePath
hostport2socket' s
| length s > lengthofmd5s = toRawFilePath $ show $ md5 $ encodeBL s
| otherwise = toRawFilePath s
where
lengthofmd5s = 32
socket2lock :: FilePath -> RawFilePath
socket2lock socket = toRawFilePath (socket ++ lockExt)
isLock :: FilePath -> Bool
isLock f = lockExt `isSuffixOf` f
lockExt :: String
lockExt = ".lock"
{- This is the size of the sun_path component of sockaddr_un, which
- is the limit to the total length of the filename of a unix socket.
-
- On Linux, this is 108. On OSX, 104. TODO: Probe
-}
sizeof_sockaddr_un_sun_path :: Int
sizeof_sockaddr_un_sun_path = 100
{- Note that this looks at the true length of the path in bytes, as it will
- appear on disk. -}
valid_unix_socket_path :: RawFilePath -> Int -> Bool
valid_unix_socket_path f n = S.length f + n < sizeof_sockaddr_un_sun_path
{- Parses the SSH port, and returns the other OpenSSH options. If
- several ports are found, the last one takes precedence. -}
sshReadPort :: [String] -> (Maybe Integer, [String])
sshReadPort params = (port, reverse args)
where
(port,args) = aux (Nothing, []) params
aux (p,ps) [] = (p,ps)
aux (_,ps) ("-p":p:rest) = aux (readPort p, ps) rest
aux (p,ps) (q:rest) | "-p" `isPrefixOf` q = aux (readPort $ drop 2 q, ps) rest
| otherwise = aux (p,q:ps) rest
readPort p = fmap fst $ listToMaybe $ reads p
{- When this env var is set, git-annex runs ssh with the specified
- options. (The options are separated by newlines.)
-
- This is a workaround for GIT_SSH not being able to contain
- additional parameters to pass to ssh. (GIT_SSH_COMMAND can,
- but is not supported by older versions of git.) -}
sshOptionsEnv :: String
sshOptionsEnv = "GIT_ANNEX_SSHOPTION"
toSshOptionsEnv :: [CommandParam] -> String
toSshOptionsEnv = unlines . toCommand
fromSshOptionsEnv :: String -> [CommandParam]
fromSshOptionsEnv = map Param . lines
{- Enables ssh caching for git push/pull to a particular
- remote git repo. (Can safely be used on non-ssh remotes.)
-
- Also propagates any configured ssh-options.
-
- Like inRepo, the action is run with the local git repo.
- But here it's a modified version, with gitEnv to set GIT_SSH=git-annex,
- and sshOptionsEnv set so that git-annex will know what socket
- file to use. -}
inRepoWithSshOptionsTo :: Git.Repo -> RemoteGitConfig -> (Git.Repo -> IO a) -> Annex a
inRepoWithSshOptionsTo remote gc a =
liftIO . a =<< sshOptionsTo remote gc =<< gitRepo
{- To make any git commands be run with ssh caching enabled,
- and configured ssh-options alters the local Git.Repo's gitEnv
- to set GIT_SSH=git-annex, and set sshOptionsEnv when running git
- commands.
-
- If GIT_SSH or GIT_SSH_COMMAND are enabled, this has no effect. -}
sshOptionsTo :: Git.Repo -> RemoteGitConfig -> Git.Repo -> Annex Git.Repo
sshOptionsTo remote gc localr
| not (Git.repoIsUrl remote) || Git.repoIsHttp remote = unchanged
| otherwise = case Git.Url.hostuser remote of
Nothing -> unchanged
Just host -> ifM (liftIO $ safe_GIT_SSH <&&> gitSshEnvSet)
( unchanged
, do
let port = Git.Url.port remote
let sshhost = either giveup id (mkSshHost host)
(msockfile, cacheparams) <- sshCachingInfo (sshhost, port)
case msockfile of
Nothing -> use []
Just sockfile -> do
prepSocket sockfile sshhost $ concat
[ cacheparams
, map Param (remoteAnnexSshOptions gc)
, portParams port
, consumeStdinParams NoConsumeStdin
, [Param "-T"]
]
use cacheparams
)
where
unchanged = return localr
use opts = do
let sshopts = concat
[ opts
, map Param (remoteAnnexSshOptions gc)
]
if null sshopts
then unchanged
else do
command <- liftIO programPath
liftIO $ do
localr' <- addGitEnv localr sshOptionsEnv
(toSshOptionsEnv sshopts)
addGitEnv localr' gitSshEnv command
runSshOptions :: [String] -> String -> IO ()
runSshOptions args s = do
let args' = toCommand (fromSshOptionsEnv s) ++ args
let p = proc "ssh" args'
exitcode <- withCreateProcess p $ \_ _ _ pid -> waitForProcess pid
exitWith exitcode
{- When this env var is set, git-annex is being used as a ssh-askpass
- program, and should read the password from the specified location,
- and output it for ssh to read. -}
sshAskPassEnv :: String
sshAskPassEnv = "GIT_ANNEX_SSHASKPASS"
runSshAskPass :: FilePath -> IO ()
runSshAskPass passfile = putStrLn =<< readFile passfile

154
Annex/StallDetection.hs Normal file
View file

@ -0,0 +1,154 @@
{- Stall detection for transfers.
-
- Copyright 2020-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.StallDetection (
getStallDetection,
detectStalls,
StallDetection,
) where
import Annex.Common
import Types.StallDetection
import Types.Direction
import Types.Remote (gitconfig)
import Utility.Metered
import Utility.HumanTime
import Utility.DataUnits
import Utility.ThreadScheduler
import Control.Concurrent.STM
import Control.Monad.IO.Class (MonadIO)
import Data.Time.Clock
getStallDetection :: Direction -> Remote -> Maybe StallDetection
getStallDetection Download r =
remoteAnnexStallDetectionDownload (gitconfig r)
<|> remoteAnnexStallDetection (gitconfig r)
getStallDetection Upload r =
remoteAnnexStallDetectionUpload (gitconfig r)
<|> remoteAnnexStallDetection (gitconfig r)
{- This may be safely canceled (with eg uninterruptibleCancel),
- as long as the passed action can be safely canceled. -}
detectStalls :: (Monad m, MonadIO m) => Maybe StallDetection -> TVar (Maybe BytesProcessed) -> m () -> m ()
detectStalls Nothing _ _ = noop
detectStalls (Just StallDetectionDisabled) _ _ = noop
detectStalls (Just (StallDetection bwrate@(BwRate _minsz duration))) metervar onstall = do
-- If the progress is being updated, but less frequently than
-- the specified duration, a stall would be incorrectly detected.
--
-- For example, consider the case of a remote that does
-- not support progress updates, but is chunked with a large chunk
-- size. In that case, progress is only updated after each chunk.
--
-- So, wait for the first update, and see how long it takes.
-- When it's longer than the duration (or close to it),
-- upscale the duration and minsz accordingly.
starttime <- liftIO getCurrentTime
v <- waitforfirstupdate =<< readMeterVar metervar
endtime <- liftIO getCurrentTime
let timepassed = floor (endtime `diffUTCTime` starttime)
let BwRate scaledminsz scaledduration = upscale bwrate timepassed
detectStalls' scaledminsz scaledduration metervar onstall v
where
minwaitsecs = Seconds $
min 60 (fromIntegral (durationSeconds duration))
waitforfirstupdate startval = do
liftIO $ threadDelaySeconds minwaitsecs
v <- readMeterVar metervar
if v > startval
then return v
else waitforfirstupdate startval
detectStalls (Just ProbeStallDetection) metervar onstall = do
-- Only do stall detection once the progress is confirmed to be
-- consistently updating. After the first update, it needs to
-- advance twice within 30 seconds. With that established,
-- if no data at all is sent for a 60 second period, it's
-- assumed to be a stall.
v <- readMeterVar metervar >>= waitforfirstupdate
ontimelyadvance v $ \v' -> ontimelyadvance v' $
detectStalls' 1 duration metervar onstall
where
duration = Duration 60
delay = Seconds (fromIntegral (durationSeconds duration) `div` 2)
waitforfirstupdate startval = do
liftIO $ threadDelaySeconds delay
v <- readMeterVar metervar
if v > startval
then return v
else waitforfirstupdate startval
ontimelyadvance v cont = do
liftIO $ threadDelaySeconds delay
v' <- readMeterVar metervar
when (v' > v) $
cont v'
detectStalls'
:: (Monad m, MonadIO m)
=> ByteSize
-> Duration
-> TVar (Maybe BytesProcessed)
-> m ()
-> Maybe ByteSize
-> m ()
detectStalls' minsz duration metervar onstall st = do
liftIO $ threadDelaySeconds delay
-- Get whatever progress value was reported most recently, if any.
v <- readMeterVar metervar
let cont = detectStalls' minsz duration metervar onstall v
case (st, v) of
(Nothing, _) -> cont
(_, Nothing) -> cont
(Just prev, Just sofar)
-- Just in case a progress meter somehow runs
-- backwards, or a second progress meter was
-- started and is at a smaller value than
-- the previous one.
| prev > sofar -> cont
| sofar - prev < minsz -> onstall
| otherwise -> cont
where
delay = Seconds (fromIntegral (durationSeconds duration))
readMeterVar
:: MonadIO m
=> TVar (Maybe BytesProcessed)
-> m (Maybe ByteSize)
readMeterVar metervar = liftIO $ atomically $
fmap fromBytesProcessed <$> readTVar metervar
-- Scale up the minsz and duration to match the observed time that passed
-- between progress updates. This allows for some variation in the transfer
-- rate causing later progress updates to happen less frequently.
upscale :: BwRate -> Integer -> BwRate
upscale input@(BwRate minsz duration) timepassedsecs
| timepassedsecs > dsecs `div` allowedvariation = BwRate
(ceiling (fromIntegral minsz * scale))
(Duration (ceiling (fromIntegral dsecs * scale)))
| otherwise = input
where
scale = max (1 :: Double) $
(fromIntegral timepassedsecs / fromIntegral (max dsecs 1))
* fromIntegral allowedvariation
dsecs = durationSeconds duration
-- Setting this too low will make normal bandwidth variations be
-- considered to be stalls, while setting it too high will make
-- stalls not be detected for much longer than the expected
-- duration.
--
-- For example, a BwRate of 20MB/1m, when the first progress
-- update takes 10m to arrive, is scaled to 600MB/30m. That 30m
-- is a reasonable since only 3 chunks get sent in that amount of
-- time at that rate. If allowedvariation = 10, that would
-- be 2000MB/100m, which seems much too long to wait to detect a
-- stall.
allowedvariation = 3

67
Annex/Startup.hs Normal file
View file

@ -0,0 +1,67 @@
{- git-annex startup
-
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Startup where
import Annex.Common
import qualified Annex
import Logs.Cluster
#ifndef mingw32_HOST_OS
import Control.Concurrent.STM
import System.Posix.Signals
#endif
{- Run when starting up the main git-annex program. -}
startup :: Annex ()
startup = do
startupSignals
gc <- Annex.getGitConfig
when (isinitialized gc)
startupAnnex
where
isinitialized gc = annexUUID gc /= NoUUID
&& isJust (annexVersion gc)
{- Run when starting up the main git-annex program when
- git-annex has already been initialized.
- Alternatively, run after initialization.
-}
startupAnnex :: Annex ()
startupAnnex = doQuietAction $
-- Logs.Location needs this before it is used, in order for a
-- cluster to be treated as the location of keys
-- that are located in any of its nodes.
preLoadClusters
startupSignals :: Annex ()
startupSignals = do
#ifndef mingw32_HOST_OS
av <- Annex.getRead Annex.signalactions
let propagate sig = liftIO $ installhandleronce sig av
propagate sigINT
propagate sigQUIT
propagate sigTERM
propagate sigTSTP
propagate sigCONT
propagate sigHUP
-- sigWINCH is not propagated; it should not be needed,
-- and the concurrent-output library installs its own signal
-- handler for it.
-- sigSTOP and sigKILL cannot be caught, so will not be propagated.
where
installhandleronce sig av = void $
installHandler sig (CatchOnce (gotsignal sig av)) Nothing
gotsignal sig av = do
mapM_ (\a -> a (fromIntegral sig)) =<< atomically (readTVar av)
raiseSignal sig
installhandleronce sig av
#else
return ()
#endif

68
Annex/TaggedPush.hs Normal file
View file

@ -0,0 +1,68 @@
{- git-annex tagged pushes
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.TaggedPush where
import Annex.Common
import qualified Remote
import qualified Annex.Branch
import qualified Git
import qualified Git.Ref
import qualified Git.Command
import qualified Git.Branch
import Utility.Base64
import qualified Data.ByteString as S
{- Converts a git branch into a branch that is tagged with a UUID, typically
- the UUID of the repo that will be pushing it, and possibly with other
- information.
-
- Pushing to branches on the remote that have our uuid in them is ugly,
- but it reserves those branches for pushing by us, and so our pushes will
- never conflict with other pushes.
-
- To avoid cluttering up the branch display, the branch is put under
- refs/synced/, rather than the usual refs/remotes/
-
- Both UUIDs and Base64 encoded data are always legal to be used in git
- refs, per git-check-ref-format.
-}
toTaggedBranch :: UUID -> Maybe String -> Git.Branch -> Git.Ref
toTaggedBranch u info b = Git.Ref $ S.intercalate "/" $ catMaybes
[ Just "refs/synced"
, Just $ fromUUID u
, toB64 . encodeBS <$> info
, Just $ Git.fromRef' $ Git.Ref.base b
]
fromTaggedBranch :: Git.Ref -> Maybe (UUID, Maybe S.ByteString)
fromTaggedBranch b = case splitc '/' $ Git.fromRef b of
("refs":"synced":u:info:_base) ->
Just (toUUID u, fromB64Maybe (encodeBS info))
("refs":"synced":u:_base) ->
Just (toUUID u, Nothing)
_ -> Nothing
listTaggedBranches :: Annex [(Git.Sha, Git.Ref)]
listTaggedBranches = filter (isJust . fromTaggedBranch . snd)
<$> inRepo Git.Ref.list
taggedPush :: UUID -> Maybe String -> Git.Ref -> Remote -> Git.Repo -> IO Bool
taggedPush u info branch remote = Git.Command.runBool
[ Param "push"
, Param $ Remote.name remote
{- Using forcePush here is safe because we "own" the tagged branch
- we're pushing; it has no other writers. Ensures it is pushed
- even if it has been rewritten by a transition. -}
, Param $ Git.Branch.forcePush $ refspec Annex.Branch.name
, Param $ refspec branch
]
where
refspec b = Git.fromRef b ++ ":" ++ Git.fromRef (toTaggedBranch u info b)

74
Annex/Tmp.hs Normal file
View file

@ -0,0 +1,74 @@
{- git-annex tmp files
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Tmp where
import Annex.Common
import qualified Annex
import Annex.LockFile
import Annex.Perms
import Types.CleanupActions
import qualified Utility.RawFilePath as R
import Data.Time.Clock.POSIX
import System.PosixCompat.Files (modificationTime)
-- | For creation of tmp files, other than for key's contents.
--
-- The action should normally clean up whatever files it writes to the temp
-- directory that is passed to it. However, once the action is done,
-- any files left in that directory may be cleaned up by another process at
-- any time.
withOtherTmp :: (RawFilePath -> Annex a) -> Annex a
withOtherTmp a = do
Annex.addCleanupAction OtherTmpCleanup cleanupOtherTmp
tmpdir <- fromRepo gitAnnexTmpOtherDir
tmplck <- fromRepo gitAnnexTmpOtherLock
withSharedLock tmplck $ do
void $ createAnnexDirectory tmpdir
a tmpdir
-- | This uses an alternate temp directory. The action should normally
-- clean up whatever files it writes there, but if it leaves files
-- there (perhaps due to being interrupted), the files will be eventually
-- cleaned up by another git-annex process (after they're a week old).
--
-- Unlike withOtherTmp, this does not rely on locking working.
-- Its main use is in situations where the state of lockfile is not
-- determined yet, eg during initialization.
withEventuallyCleanedOtherTmp :: (RawFilePath -> Annex a) -> Annex a
withEventuallyCleanedOtherTmp = bracket setup cleanup
where
setup = do
tmpdir <- fromRepo gitAnnexTmpOtherDirOld
void $ createAnnexDirectory tmpdir
return tmpdir
cleanup = liftIO . void . tryIO . removeDirectory . fromRawFilePath
-- | Cleans up any tmp files that were left by a previous
-- git-annex process that got interrupted or failed to clean up after
-- itself for some other reason.
--
-- Does not do anything if withOtherTmp is running.
cleanupOtherTmp :: Annex ()
cleanupOtherTmp = do
tmplck <- fromRepo gitAnnexTmpOtherLock
void $ tryIO $ tryExclusiveLock tmplck $ do
tmpdir <- fromRawFilePath <$> fromRepo gitAnnexTmpOtherDir
void $ liftIO $ tryIO $ removeDirectoryRecursive tmpdir
oldtmp <- fromRawFilePath <$> fromRepo gitAnnexTmpOtherDirOld
liftIO $ mapM_ cleanold
=<< emptyWhenDoesNotExist (dirContentsRecursive oldtmp)
liftIO $ void $ tryIO $ removeDirectory oldtmp -- when empty
where
cleanold f = do
now <- liftIO getPOSIXTime
let oldenough = now - (60 * 60 * 24 * 7)
catchMaybeIO (modificationTime <$> R.getSymbolicLinkStatus (toRawFilePath f)) >>= \case
Just mtime | realToFrac mtime <= oldenough ->
void $ tryIO $ removeWhenExistsWith R.removeLink (toRawFilePath f)
_ -> return ()

443
Annex/Transfer.hs Normal file
View file

@ -0,0 +1,443 @@
{- git-annex transfers
-
- Copyright 2012-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP, BangPatterns, OverloadedStrings #-}
module Annex.Transfer (
module X,
upload,
upload',
alwaysUpload,
download,
download',
runTransfer,
alwaysRunTransfer,
noRetry,
stdRetry,
pickRemote,
) where
import Annex.Common
import qualified Annex
import Logs.Transfer as X
import Types.Transfer as X
import Annex.Notification as X
import Annex.Content
import Annex.Perms
import Annex.Action
import Utility.Metered
import Utility.ThreadScheduler
import Utility.FileMode
import Annex.LockPool
import Types.Key
import qualified Types.Remote as Remote
import qualified Types.Backend
import Types.Concurrency
import Annex.Concurrent
import Types.WorkerPool
import Annex.WorkerPool
import Annex.TransferrerPool
import Annex.StallDetection
import Backend (isCryptographicallySecureKey)
import Types.StallDetection
import qualified Utility.RawFilePath as R
import Control.Concurrent
import Control.Concurrent.Async
import Control.Concurrent.STM hiding (retry)
import qualified Data.Map.Strict as M
import qualified System.FilePath.ByteString as P
import Data.Ord
-- Upload, supporting canceling detected stalls.
upload :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool
upload r key af d witness =
case getStallDetection Upload r of
Nothing -> go (Just ProbeStallDetection)
Just StallDetectionDisabled -> go Nothing
Just sd -> runTransferrer sd r key af d Upload witness
where
go sd = upload' (Remote.uuid r) key af sd d (action . Remote.storeKey r key af Nothing) witness
-- Upload, not supporting canceling detected stalls
upload' :: Observable v => UUID -> Key -> AssociatedFile -> Maybe StallDetection -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
upload' u key f sd d a _witness = guardHaveUUID u $
runTransfer (Transfer Upload u (fromKey id key)) Nothing f sd d a
alwaysUpload :: Observable v => UUID -> Key -> AssociatedFile -> Maybe StallDetection -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
alwaysUpload u key f sd d a _witness = guardHaveUUID u $
alwaysRunTransfer (Transfer Upload u (fromKey id key)) Nothing f sd d a
-- Download, supporting canceling detected stalls.
download :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool
download r key f d witness =
case getStallDetection Download r of
Nothing -> go (Just ProbeStallDetection)
Just StallDetectionDisabled -> go Nothing
Just sd -> runTransferrer sd r key f d Download witness
where
go sd = getViaTmp (Remote.retrievalSecurityPolicy r) vc key f Nothing $ \dest ->
download' (Remote.uuid r) key f sd d (go' dest) witness
go' dest p = verifiedAction $
Remote.retrieveKeyFile r key f (fromRawFilePath dest) p vc
vc = Remote.RemoteVerify r
-- Download, not supporting canceling detected stalls.
download' :: Observable v => UUID -> Key -> AssociatedFile -> Maybe StallDetection -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
download' u key f sd d a _witness = guardHaveUUID u $
runTransfer (Transfer Download u (fromKey id key)) Nothing f sd d a
guardHaveUUID :: Observable v => UUID -> Annex v -> Annex v
guardHaveUUID u a
| u == NoUUID = return observeFailure
| otherwise = a
{- Runs a transfer action. Creates and locks the lock file while the
- action is running, and stores info in the transfer information
- file.
-
- If the transfer action returns False, the transfer info is
- left in the failedTransferDir.
-
- If the transfer is already in progress, returns False.
-
- An upload can be run from a read-only filesystem, and in this case
- no transfer information or lock file is used.
-
- Cannot cancel stalls, but when a likely stall is detected,
- suggests to the user that they enable stall detection handling.
-}
runTransfer :: Observable v => Transfer -> Maybe Backend -> AssociatedFile -> Maybe StallDetection -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
runTransfer = runTransfer' False
{- Like runTransfer, but ignores any existing transfer lock file for the
- transfer, allowing re-running a transfer that is already in progress.
-}
alwaysRunTransfer :: Observable v => Transfer -> Maybe Backend -> AssociatedFile -> Maybe StallDetection -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
alwaysRunTransfer = runTransfer' True
runTransfer' :: Observable v => Bool -> Transfer -> Maybe Backend -> AssociatedFile -> Maybe StallDetection -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
runTransfer' ignorelock t eventualbackend afile stalldetection retrydecider transferaction =
enteringStage (TransferStage (transferDirection t)) $
debugLocks $
preCheckSecureHashes (transferKey t) eventualbackend go
where
go = do
info <- liftIO $ startTransferInfo afile
(tfile, lckfile, moldlckfile) <- fromRepo $ transferFileAndLockFile t
(meter, createtfile, metervar) <- mkProgressUpdater t info tfile
mode <- annexFileMode
(lck, inprogress) <- prep lckfile moldlckfile createtfile mode
if inprogress && not ignorelock
then do
warning "transfer already in progress, or unable to take transfer lock"
return observeFailure
else do
v <- retry 0 info metervar $
detectStallsAndSuggestConfig stalldetection metervar $
transferaction meter
liftIO $ cleanup tfile lckfile moldlckfile lck
if observeBool v
then removeFailedTransfer t
else recordFailedTransfer t info
return v
prep :: RawFilePath -> Maybe RawFilePath -> Annex () -> ModeSetter -> Annex (Maybe (LockHandle, Maybe LockHandle), Bool)
#ifndef mingw32_HOST_OS
prep lckfile moldlckfile createtfile mode = catchPermissionDenied (const prepfailed) $ do
createAnnexDirectory $ P.takeDirectory lckfile
tryLockExclusive (Just mode) lckfile >>= \case
Nothing -> return (Nothing, True)
-- Since the lock file is removed in cleanup,
-- there's a race where different processes
-- may have a deleted and a new version of the same
-- lock file open. checkSaneLock guards against
-- that.
Just lockhandle -> ifM (checkSaneLock lckfile lockhandle)
( case moldlckfile of
Nothing -> do
createtfile
return (Just (lockhandle, Nothing), False)
Just oldlckfile -> do
createAnnexDirectory $ P.takeDirectory oldlckfile
tryLockExclusive (Just mode) oldlckfile >>= \case
Nothing -> do
liftIO $ dropLock lockhandle
return (Nothing, True)
Just oldlockhandle -> ifM (checkSaneLock oldlckfile oldlockhandle)
( do
createtfile
return (Just (lockhandle, Just oldlockhandle), False)
, do
liftIO $ dropLock oldlockhandle
liftIO $ dropLock lockhandle
return (Nothing, True)
)
, do
liftIO $ dropLock lockhandle
return (Nothing, True)
)
#else
prep lckfile moldlckfile createtfile _mode = catchPermissionDenied (const prepfailed) $ do
createAnnexDirectory $ P.takeDirectory lckfile
catchMaybeIO (liftIO $ lockExclusive lckfile) >>= \case
Just (Just lockhandle) -> case moldlckfile of
Nothing -> do
createtfile
return (Just (lockhandle, Nothing), False)
Just oldlckfile -> do
createAnnexDirectory $ P.takeDirectory oldlckfile
catchMaybeIO (liftIO $ lockExclusive oldlckfile) >>= \case
Just (Just oldlockhandle) -> do
createtfile
return (Just (lockhandle, Just oldlockhandle), False)
_ -> do
liftIO $ dropLock lockhandle
return (Nothing, False)
_ -> return (Nothing, False)
#endif
prepfailed = return (Nothing, False)
cleanup _ _ _ Nothing = noop
cleanup tfile lckfile moldlckfile (Just (lockhandle, moldlockhandle)) = do
void $ tryIO $ R.removeLink tfile
#ifndef mingw32_HOST_OS
void $ tryIO $ R.removeLink lckfile
maybe noop (void . tryIO . R.removeLink) moldlckfile
maybe noop dropLock moldlockhandle
dropLock lockhandle
#else
{- Windows cannot delete the lockfile until the lock
- is closed. So it's possible to race with another
- process that takes the lock before it's removed,
- so ignore failure to remove.
-}
maybe noop dropLock moldlockhandle
dropLock lockhandle
void $ tryIO $ R.removeLink lckfile
maybe noop (void . tryIO . R.removeLink) moldlckfile
#endif
retry numretries oldinfo metervar run =
tryNonAsync run >>= \case
Right v
| observeBool v -> return v
| otherwise -> checkretry
Left e -> do
warning (UnquotedString (show e))
checkretry
where
checkretry = do
b <- getbytescomplete metervar
let newinfo = oldinfo { bytesComplete = Just b }
let !numretries' = succ numretries
ifM (retrydecider numretries' oldinfo newinfo)
( retry numretries' newinfo metervar run
, return observeFailure
)
getbytescomplete metervar = liftIO $
maybe 0 fromBytesProcessed <$> readTVarIO metervar
detectStallsAndSuggestConfig :: Maybe StallDetection -> TVar (Maybe BytesProcessed) -> Annex a -> Annex a
detectStallsAndSuggestConfig Nothing _ a = a
detectStallsAndSuggestConfig sd@(Just _) metervar a =
bracket setup cleanup (const a)
where
setup = do
v <- liftIO newEmptyTMVarIO
sdt <- liftIO $ async $ detectStalls sd metervar $
void $ atomically $ tryPutTMVar v True
wt <- liftIO . async =<< forkState (warnonstall v)
return (v, sdt, wt)
cleanup (v, sdt, wt) = do
liftIO $ uninterruptibleCancel sdt
void $ liftIO $ atomically $ tryPutTMVar v False
join (liftIO (wait wt))
warnonstall v = whenM (liftIO (atomically (takeTMVar v))) $
warning "Transfer seems to have stalled. To restart stalled transfers, configure annex.stalldetection"
{- Runs a transfer using a separate process, which lets detected stalls be
- canceled. -}
runTransferrer
:: StallDetection
-> Remote
-> Key
-> AssociatedFile
-> RetryDecider
-> Direction
-> NotifyWitness
-> Annex Bool
runTransferrer sd r k afile retrydecider direction _witness =
enteringStage (TransferStage direction) $ preCheckSecureHashes k Nothing $ do
info <- liftIO $ startTransferInfo afile
go 0 info
where
go numretries info =
withTransferrer (performTransfer (Just sd) AnnexLevel id (Just r) t info) >>= \case
Right () -> return True
Left newinfo -> do
let !numretries' = succ numretries
ifM (retrydecider numretries' info newinfo)
( go numretries' newinfo
, return False
)
t = Transfer direction (Remote.uuid r) (fromKey id k)
{- Avoid download and upload of keys with insecure content when
- annex.securehashesonly is configured.
-
- This is not a security check. Even if this let the content be
- downloaded, the actual security checks would prevent the content from
- being added to the repository. The only reason this is done here is to
- avoid transferring content that's going to be rejected anyway.
-
- We assume that, if annex.securehashesonly is set and the local repo
- still contains content using an insecure hash, remotes will likewise
- tend to be configured to reject it, so Upload is also prevented.
-}
preCheckSecureHashes :: Observable v => Key -> Maybe Backend -> Annex v -> Annex v
preCheckSecureHashes k meventualbackend a = case meventualbackend of
Just eventualbackend -> go
(pure (Types.Backend.isCryptographicallySecure eventualbackend))
(Types.Backend.backendVariety eventualbackend)
Nothing -> go
(isCryptographicallySecureKey k)
(fromKey keyVariety k)
where
go checksecure variety = ifM checksecure
( a
, ifM (annexSecureHashesOnly <$> Annex.getGitConfig)
( blocked variety
, a
)
)
blocked variety = do
warning $ UnquotedString $ "annex.securehashesonly blocked transfer of " ++ decodeBS (formatKeyVariety variety) ++ " key"
return observeFailure
type NumRetries = Integer
type RetryDecider = NumRetries -> TransferInfo -> TransferInfo -> Annex Bool
{- Both retry deciders are checked together, so if one chooses to delay,
- it will always take effect. -}
combineRetryDeciders :: RetryDecider -> RetryDecider -> RetryDecider
combineRetryDeciders a b = \n old new -> do
ar <- a n old new
br <- b n old new
return (ar || br)
noRetry :: RetryDecider
noRetry _ _ _ = pure False
stdRetry :: RetryDecider
stdRetry = combineRetryDeciders forwardRetry configuredRetry
{- Keep retrying failed transfers, as long as forward progress is being
- made.
-
- Up to a point -- while some remotes can resume where the previous
- transfer left off, and so it would make sense to keep retrying forever,
- other remotes restart each transfer from the beginning, and so even if
- forward progress is being made, it's not real progress. So, retry a
- maximum of 5 times by default.
-}
forwardRetry :: RetryDecider
forwardRetry numretries old new
| fromMaybe 0 (bytesComplete old) < fromMaybe 0 (bytesComplete new) =
(numretries <=) <$> maybe globalretrycfg pure remoteretrycfg
| otherwise = return False
where
globalretrycfg = fromMaybe 5 . annexForwardRetry
<$> Annex.getGitConfig
remoteretrycfg = remoteAnnexRetry =<<
(Remote.gitconfig <$> transferRemote new)
{- Retries a number of times with growing delays in between when enabled
- by git configuration. -}
configuredRetry :: RetryDecider
configuredRetry numretries _old new = do
(maxretries, Seconds initretrydelay) <- getcfg $
Remote.gitconfig <$> transferRemote new
if numretries < maxretries
then do
let retrydelay = Seconds (initretrydelay * 2^(numretries-1))
showSideAction $ UnquotedString $ "Delaying " ++ show (fromSeconds retrydelay) ++ "s before retrying."
liftIO $ threadDelaySeconds retrydelay
return True
else return False
where
globalretrycfg = fromMaybe 0 . annexRetry
<$> Annex.getGitConfig
globalretrydelaycfg = fromMaybe (Seconds 1) . annexRetryDelay
<$> Annex.getGitConfig
getcfg Nothing = (,) <$> globalretrycfg <*> globalretrydelaycfg
getcfg (Just gc) = (,)
<$> maybe globalretrycfg return (remoteAnnexRetry gc)
<*> maybe globalretrydelaycfg return (remoteAnnexRetryDelay gc)
{- Picks a remote from the list and tries a transfer to it. If the transfer
- does not succeed, goes on to try other remotes from the list.
-
- The list should already be ordered by remote cost, and is normally
- tried in order. However, when concurrent jobs are running, they will
- be assigned different remotes of the same cost when possible. This can
- increase total transfer speed.
-}
pickRemote :: Observable v => [Remote] -> (Remote -> Annex v) -> Annex v
pickRemote l a = debugLocks $ go l =<< getConcurrency
where
go [] _ = return observeFailure
go (r:[]) _ = a r
go rs NonConcurrent = gononconcurrent rs
go rs (Concurrent n)
| n <= 1 = gononconcurrent rs
| otherwise = goconcurrent rs
go rs ConcurrentPerCpu = goconcurrent rs
gononconcurrent [] = return observeFailure
gononconcurrent (r:rs) = do
ok <- a r
if observeBool ok
then return ok
else gononconcurrent rs
goconcurrent rs = do
mv <- Annex.getRead Annex.activeremotes
active <- liftIO $ takeMVar mv
let rs' = sortBy (lessActiveFirst active) rs
goconcurrent' mv active rs'
goconcurrent' mv active [] = do
liftIO $ putMVar mv active
return observeFailure
goconcurrent' mv active (r:rs) = do
let !active' = M.insertWith (+) r 1 active
liftIO $ putMVar mv active'
let getnewactive = do
active'' <- liftIO $ takeMVar mv
let !active''' = M.update (\n -> if n > 1 then Just (n-1) else Nothing) r active''
return active'''
let removeactive = liftIO . putMVar mv =<< getnewactive
ok <- a r `onException` removeactive
if observeBool ok
then do
removeactive
return ok
else do
active'' <- getnewactive
-- Re-sort the remaining rs
-- because other threads could have
-- been assigned them in the meantime.
let rs' = sortBy (lessActiveFirst active'') rs
goconcurrent' mv active'' rs'
lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering
lessActiveFirst active a b
| Remote.cost a == Remote.cost b = comparing (`M.lookup` active) a b
| otherwise = comparing Remote.cost a b

300
Annex/TransferrerPool.hs Normal file
View file

@ -0,0 +1,300 @@
{- A pool of "git-annex transferrer" processes
-
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE CPP #-}
module Annex.TransferrerPool where
import Annex.Common
import qualified Annex
import Types.TransferrerPool
import Types.Transferrer
import Types.Transfer
import qualified Types.Remote as Remote
import Types.Messages
import Types.CleanupActions
import Messages.Serialized
import Annex.Path
import Annex.StallDetection
import Annex.Link
import Utility.Batch
import Utility.Metered
import qualified Utility.SimpleProtocol as Proto
import Control.Concurrent
import Control.Concurrent.Async
import Control.Concurrent.STM hiding (check)
import Control.Monad.IO.Class (MonadIO)
import qualified Data.Map as M
#ifndef mingw32_HOST_OS
import System.Posix.Signals
import System.Posix.Process (getProcessGroupIDOf)
#endif
type SignalActionsVar = TVar (M.Map SignalAction (Int -> IO ()))
data RunTransferrer = RunTransferrer String [CommandParam] BatchCommandMaker
mkRunTransferrer :: BatchCommandMaker -> Annex RunTransferrer
mkRunTransferrer batchmaker = RunTransferrer
<$> liftIO programPath
<*> gitAnnexChildProcessParams "transferrer" []
<*> pure batchmaker
{- Runs an action with a Transferrer from the pool. -}
withTransferrer :: (Transferrer -> Annex a) -> Annex a
withTransferrer a = do
rt <- mkRunTransferrer nonBatchCommandMaker
pool <- Annex.getRead Annex.transferrerpool
let nocheck = pure (pure True)
signalactonsvar <- Annex.getRead Annex.signalactions
withTransferrer' False signalactonsvar nocheck rt pool a
withTransferrer'
:: (MonadIO m, MonadMask m)
=> Bool
-- ^ When minimizeprocesses is True, only one Transferrer is left
-- running in the pool at a time. So if this needed to start a
-- new Transferrer, it's stopped when done. Otherwise, idle
-- processes are left in the pool for use later.
-> SignalActionsVar
-> MkCheckTransferrer
-> RunTransferrer
-> TransferrerPool
-> (Transferrer -> m a)
-> m a
withTransferrer' minimizeprocesses signalactonsvar mkcheck rt pool a = do
(mi, leftinpool) <- liftIO $ atomically (popTransferrerPool pool)
(i@(TransferrerPoolItem _ check), t) <- liftIO $ case mi of
Nothing -> do
t <- mkTransferrer signalactonsvar rt
i <- mkTransferrerPoolItem mkcheck t
return (i, t)
Just i -> checkTransferrerPoolItem signalactonsvar rt i
a t `finally` returntopool leftinpool check t i
where
returntopool leftinpool check t i
| not minimizeprocesses || leftinpool == 0 =
-- If the transferrer got killed, the handles will
-- be closed, so it should not be returned to the
-- pool.
liftIO $ whenM (hIsOpen (transferrerWrite t)) $
liftIO $ atomically $ pushTransferrerPool pool i
| otherwise = liftIO $ do
void $ forkIO $ transferrerShutdown t
atomically $ pushTransferrerPool pool $ TransferrerPoolItem Nothing check
{- Check if a Transferrer from the pool is still ok to be used.
- If not, stop it and start a new one. -}
checkTransferrerPoolItem :: SignalActionsVar -> RunTransferrer -> TransferrerPoolItem -> IO (TransferrerPoolItem, Transferrer)
checkTransferrerPoolItem signalactonsvar rt i = case i of
TransferrerPoolItem (Just t) check -> ifM check
( return (i, t)
, do
transferrerShutdown t
new check
)
TransferrerPoolItem Nothing check -> new check
where
new check = do
t <- mkTransferrer signalactonsvar rt
return (TransferrerPoolItem (Just t) check, t)
data TransferRequestLevel = AnnexLevel | AssistantLevel
deriving (Show)
{- Requests that a Transferrer perform a Transfer, and waits for it to
- finish.
-
- When a stall is detected, kills the Transferrer.
-
- If the transfer failed or stalled, returns TransferInfo with an
- updated bytesComplete reflecting how much data has been transferred.
-}
performTransfer
:: (Monad m, MonadIO m, MonadMask m)
=> Maybe StallDetection
-> TransferRequestLevel
-> (forall a. Annex a -> m a)
-- ^ Run an annex action in the monad. Will not be used with
-- actions that block for a long time.
-> Maybe Remote
-> Transfer
-> TransferInfo
-> Transferrer
-> m (Either TransferInfo ())
performTransfer stalldetection level runannex r t info transferrer = do
bpv <- liftIO $ newTVarIO zeroBytesProcessed
ifM (catchBoolIO $ bracket setup cleanup (go bpv))
( return (Right ())
, do
n <- liftIO $ atomically $
fromBytesProcessed <$> readTVar bpv
return $ Left $ info { bytesComplete = Just n }
)
where
setup = do
liftIO $ sendRequest level t r
(associatedFile info)
(transferrerWrite transferrer)
metervar <- liftIO $ newTVarIO Nothing
stalledvar <- liftIO $ newTVarIO False
tid <- liftIO $ async $
detectStalls stalldetection metervar $ do
atomically $ writeTVar stalledvar True
killTransferrer transferrer
return (metervar, tid, stalledvar)
cleanup (_, tid, stalledvar) = do
liftIO $ uninterruptibleCancel tid
whenM (liftIO $ atomically $ readTVar stalledvar) $ do
runannex $ showLongNote "Transfer stalled"
-- Close handles, to prevent the transferrer being
-- reused since the process was killed.
liftIO $ hClose $ transferrerRead transferrer
liftIO $ hClose $ transferrerWrite transferrer
go bpv (metervar, _, _) = relaySerializedOutput
(liftIO $ readResponse (transferrerRead transferrer))
(liftIO . sendSerializedOutputResponse (transferrerWrite transferrer))
(updatemeter bpv metervar)
runannex
updatemeter bpv metervar (Just n) = liftIO $ do
atomically $ writeTVar metervar (Just n)
atomically $ writeTVar bpv n
updatemeter _bpv metervar Nothing = liftIO $
atomically $ writeTVar metervar Nothing
{- Starts a new git-annex transfer process, setting up handles
- that will be used to communicate with it. -}
mkTransferrer :: SignalActionsVar -> RunTransferrer -> IO Transferrer
#ifndef mingw32_HOST_OS
mkTransferrer signalactonsvar (RunTransferrer program params batchmaker) = do
#else
mkTransferrer _ (RunTransferrer program params batchmaker) = do
#endif
{- It runs as a batch job. -}
let (program', params') = batchmaker (program, params)
{- It's put into its own group so that the whole group can be
- killed to stop a transfer. -}
(Just writeh, Just readh, _, ph) <- createProcess
(proc program' $ toCommand params')
{ create_group = True
, std_in = CreatePipe
, std_out = CreatePipe
}
{- Set up signal propagation, so eg ctrl-c will also interrupt
- the processes in the transferrer's process group.
-
- There is a race between the process being created and this point.
- If a signal is received before this can run, it is not sent to
- the transferrer. This leaves the transferrer waiting for the
- first message on stdin to tell what to do. If the signal kills
- this parent process, the transferrer will then get a sigpipe
- and die too. If the signal suspends this parent process,
- it's ok to leave the transferrer running, as it's waiting on
- the pipe until this process wakes back up.
-}
#ifndef mingw32_HOST_OS
pid <- getPid ph
unregistersignalprop <- case pid of
Just p -> getProcessGroupIDOf p >>= \pgrp -> do
atomically $ modifyTVar' signalactonsvar $
M.insert (PropagateSignalProcessGroup p) $ \sig ->
signalProcessGroup (fromIntegral sig) pgrp
return $ atomically $ modifyTVar' signalactonsvar $
M.delete (PropagateSignalProcessGroup p)
Nothing -> return noop
#else
let unregistersignalprop = noop
#endif
return $ Transferrer
{ transferrerRead = readh
, transferrerWrite = writeh
, transferrerHandle = ph
, transferrerShutdown = do
-- The transferrer may write to stdout
-- as it's shutting down, so don't close
-- the readh right away. Instead, drain
-- anything sent to it.
drainer <- async $ void $ hGetContents readh
hClose writeh
void $ waitForProcess ph
wait drainer
hClose readh
unregistersignalprop
}
-- | Send a request to perform a transfer.
sendRequest :: TransferRequestLevel -> Transfer -> Maybe Remote -> AssociatedFile -> Handle -> IO ()
sendRequest level t mremote afile h = do
let tr = maybe
(TransferRemoteUUID (transferUUID t))
(TransferRemoteName . Remote.name)
mremote
let f = case (level, transferDirection t) of
(AnnexLevel, Upload) -> UploadRequest
(AnnexLevel, Download) -> DownloadRequest
(AssistantLevel, Upload) -> AssistantUploadRequest
(AssistantLevel, Download) -> AssistantDownloadRequest
let r = f tr (transferKey t) (TransferAssociatedFile afile)
let l = unwords $ Proto.formatMessage r
debug "Annex.TransferrerPool" ("> " ++ l)
hPutStrLn h l
hFlush h
sendSerializedOutputResponse :: Handle -> SerializedOutputResponse -> IO ()
sendSerializedOutputResponse h sor = do
let l = unwords $ Proto.formatMessage $
TransferSerializedOutputResponse sor
debug "Annex.TransferrerPool" ("> " ++ show l)
hPutStrLn h l
hFlush h
-- | Read a response to a transfer request.
--
-- Before the final response, this will return whatever SerializedOutput
-- should be displayed as the transfer is performed.
readResponse :: Handle -> IO (Either SerializedOutput Bool)
readResponse h = do
l <- liftIO $ hGetLine h
debug "Annex.TransferrerPool" ("< " ++ l)
case Proto.parseMessage l of
Just (TransferOutput so) -> return (Left so)
Just (TransferResult r) -> return (Right r)
Nothing -> transferrerProtocolError l
transferrerProtocolError :: String -> a
transferrerProtocolError l = giveup $ "transferrer protocol error: " ++ show l
{- Kill the transferrer, and all its child processes. -}
killTransferrer :: Transferrer -> IO ()
killTransferrer t = do
interruptProcessGroupOf $ transferrerHandle t
threadDelay 50000 -- 0.05 second grace period
terminateProcess $ transferrerHandle t
{- Stop all transferrers in the pool. -}
emptyTransferrerPool :: Annex ()
emptyTransferrerPool = do
poolvar <- Annex.getRead Annex.transferrerpool
pool <- liftIO $ atomically $ swapTVar poolvar []
liftIO $ forM_ pool $ \case
TransferrerPoolItem (Just t) _ -> transferrerShutdown t
TransferrerPoolItem Nothing _ -> noop
-- Transferrers usually restage pointer files themselves,
-- but when killTransferrer is used, a transferrer may have
-- pointer files it has not gotten around to restaging yet.
-- So, restage pointer files here in clean up from such killed
-- transferrers.
unless (null pool) $
restagePointerFiles =<< Annex.gitRepo

127
Annex/UUID.hs Normal file
View file

@ -0,0 +1,127 @@
{- git-annex uuids
-
- Each git repository used by git-annex has an annex.uuid setting that
- uniquely identifies that repository.
-
- UUIDs of remotes are cached in git config, using keys named
- remote.<name>.annex-uuid
-
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.UUID (
configkeyUUID,
configRepoUUID,
getUUID,
getRepoUUID,
getUncachedUUID,
isUUIDConfigured,
prepUUID,
genUUID,
genUUIDInNameSpace,
gCryptNameSpace,
removeRepoUUID,
storeUUID,
storeUUIDIn,
setUUID,
webUUID,
bitTorrentUUID,
) where
import Annex.Common
import qualified Annex
import qualified Git
import qualified Git.Config
import Git.Types
import Config
import qualified Data.UUID as U
import qualified Data.UUID.V4 as U4
import qualified Data.UUID.V5 as U5
import qualified Data.ByteString as S
import Data.String
configkeyUUID :: ConfigKey
configkeyUUID = annexConfig "uuid"
configRepoUUID :: Git.Repo -> ConfigKey
configRepoUUID r = remoteAnnexConfig r "uuid"
{- Generates a random UUID, that does not include the MAC address. -}
genUUID :: IO UUID
genUUID = toUUID <$> U4.nextRandom
{- Generates a UUID from a given string, using a namespace.
- Given the same namespace, the same string will always result
- in the same UUID. -}
genUUIDInNameSpace :: U.UUID -> S.ByteString -> UUID
genUUIDInNameSpace namespace = toUUID . U5.generateNamed namespace . S.unpack
{- Namespace used for UUIDs derived from git-remote-gcrypt ids. -}
gCryptNameSpace :: U.UUID
gCryptNameSpace = U5.generateNamed U5.namespaceURL $
S.unpack "http://git-annex.branchable.com/design/gcrypt/"
{- Get current repository's UUID. -}
getUUID :: Annex UUID
getUUID = annexUUID <$> Annex.getGitConfig
{- Looks up a remote repo's UUID, caching it in .git/config if
- it's not already. -}
getRepoUUID :: Git.Repo -> Annex UUID
getRepoUUID r = do
c <- toUUID <$> getConfig cachekey ""
let u = getUncachedUUID r
if c /= u && u /= NoUUID
then do
updatecache u
return u
else return c
where
updatecache u = do
g <- gitRepo
when (g /= r) $ storeUUIDIn cachekey u
cachekey = configRepoUUID r
removeRepoUUID :: Annex ()
removeRepoUUID = do
unsetConfig configkeyUUID
storeUUID NoUUID
getUncachedUUID :: Git.Repo -> UUID
getUncachedUUID = toUUID . Git.Config.get configkeyUUID ""
-- Does the repo's config have a key for the UUID?
-- True even when the key has no value.
isUUIDConfigured :: Git.Repo -> Bool
isUUIDConfigured = isJust . Git.Config.getMaybe configkeyUUID
{- Make sure that the repo has an annex.uuid setting. -}
prepUUID :: Annex ()
prepUUID = whenM ((==) NoUUID <$> getUUID) $
storeUUID =<< liftIO genUUID
storeUUID :: UUID -> Annex ()
storeUUID = storeUUIDIn configkeyUUID
storeUUIDIn :: ConfigKey -> UUID -> Annex ()
storeUUIDIn configfield = setConfig configfield . fromUUID
{- Only sets the configkey in the Repo; does not change .git/config -}
setUUID :: Git.Repo -> UUID -> IO Git.Repo
setUUID r u = do
let s = encodeBS $ show configkeyUUID ++ "=" ++ fromUUID u
Git.Config.store s Git.Config.ConfigList r
-- Dummy uuid for the whole web. Do not alter.
webUUID :: UUID
webUUID = UUID (fromString "00000000-0000-0000-0000-000000000001")
-- Dummy uuid for bittorrent. Do not alter.
bitTorrentUUID :: UUID
bitTorrentUUID = UUID (fromString "00000000-0000-0000-0000-000000000002")

View file

@ -0,0 +1,77 @@
{- handling untrusted filepaths
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.UntrustedFilePath where
import Data.Char
import System.FilePath
import Utility.SafeOutput
{- Given a string that we'd like to use as the basis for FilePath, but that
- was provided by a third party and is not to be trusted, returns the closest
- sane FilePath.
-
- All spaces and punctuation and other wacky stuff are replaced
- with '_', except for '.' and '-'
-
- "../" becomes ".._", which is safe.
- "/foo" becomes "_foo", which is safe.
- "c:foo" becomes "c_foo", which is safe even on windows.
-
- Leading '.' and '-' are also replaced with '_', so
- so no dotfiles that might control a program are inadvertently created,
- and to avoid filenames being treated as options to commands the user
- might run.
-
- Also there's an off chance the string might be empty, so to avoid
- needing to handle such an invalid filepath, return a dummy "file" in
- that case.
-}
sanitizeFilePath :: String -> FilePath
sanitizeFilePath = sanitizeLeadingFilePathCharacter . sanitizeFilePathComponent
{- For when the filepath is being built up out of components that should be
- individually sanitized, this can be used for each component, followed by
- sanitizeLeadingFilePathCharacter for the whole thing.
-}
sanitizeFilePathComponent :: String -> String
sanitizeFilePathComponent = map sanitize
where
sanitize c
| c == '.' || c == '-' = c
| isSpace c || isPunctuation c || isSymbol c || isControl c || c == '/' = '_'
| otherwise = c
sanitizeLeadingFilePathCharacter :: String -> FilePath
sanitizeLeadingFilePathCharacter [] = "file"
sanitizeLeadingFilePathCharacter ('.':s) = '_':s
sanitizeLeadingFilePathCharacter ('-':s) = '_':s
sanitizeLeadingFilePathCharacter ('/':s) = '_':s
sanitizeLeadingFilePathCharacter s = s
controlCharacterInFilePath :: FilePath -> Bool
controlCharacterInFilePath = any (not . safechar)
where
safechar c = safeOutputChar c && c /= '\n'
{- ../ is a path traversal, no matter where it appears.
-
- An absolute path is, of course.
-}
pathTraversalInFilePath :: FilePath -> Bool
pathTraversalInFilePath f
| isAbsolute f = True
| any (== "..") (splitPath f) = True
-- On windows, C:foo with no directory is not considered absolute
| hasDrive f = True
| otherwise = False
gitDirectoryInFilePath :: FilePath -> Bool
gitDirectoryInFilePath = any (== ".git")
. map dropTrailingPathSeparator
. splitPath

23
Annex/UpdateInstead.hs Normal file
View file

@ -0,0 +1,23 @@
{- git-annex UpdateIntead emulation
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.UpdateInstead where
import qualified Annex
import Annex.Common
import Annex.AdjustedBranch
import Git.Branch
import Git.ConfigTypes
{- receive.denyCurrentBranch=updateInstead does not work
- when an adjusted branch is checked out, so must be emulated. -}
needUpdateInsteadEmulation :: Annex Bool
needUpdateInsteadEmulation = updateinsteadset <&&> isadjusted
where
updateinsteadset = (== UpdateInstead) . receiveDenyCurrentBranch
<$> Annex.getGitConfig
isadjusted = (maybe False (isJust . getAdjustment) <$> inRepo Git.Branch.current)

190
Annex/Url.hs Normal file
View file

@ -0,0 +1,190 @@
{- Url downloading, with git-annex user agent and configured http
- headers, security restrictions, etc.
-
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Url (
withUrlOptions,
withUrlOptionsPromptingCreds,
getUrlOptions,
getUserAgent,
ipAddressesUnlimited,
checkBoth,
download,
download',
exists,
getUrlInfo,
U.URLString,
U.UrlOptions(..),
U.UrlInfo(..),
U.sinkResponseFile,
U.matchStatusCodeException,
U.downloadConduit,
U.downloadPartial,
U.parseURIRelaxed,
U.allowedScheme,
U.assumeUrlExists,
) where
import Annex.Common
import qualified Annex
import qualified Utility.Url as U
import qualified Utility.Url.Parse as U
import Utility.Hash (IncrementalVerifier)
import Utility.IPAddress
import Network.HTTP.Client.Restricted
import Utility.Metered
import Git.Credential
import qualified BuildInfo
import Network.Socket
import Network.HTTP.Client
import Network.HTTP.Client.TLS
import Text.Read
import qualified Data.Set as S
defaultUserAgent :: U.UserAgent
defaultUserAgent = "git-annex/" ++ BuildInfo.packageversion
getUserAgent :: Annex U.UserAgent
getUserAgent = Annex.getRead $
fromMaybe defaultUserAgent . Annex.useragent
getUrlOptions :: Annex U.UrlOptions
getUrlOptions = Annex.getState Annex.urloptions >>= \case
Just uo -> return uo
Nothing -> do
uo <- mk
Annex.changeState $ \s -> s
{ Annex.urloptions = Just uo }
return uo
where
mk = do
(urldownloader, manager) <- checkallowedaddr
U.mkUrlOptions
<$> (Just <$> getUserAgent)
<*> headers
<*> pure urldownloader
<*> pure manager
<*> (annexAllowedUrlSchemes <$> Annex.getGitConfig)
<*> pure (Just (\u -> "Configuration of annex.security.allowed-url-schemes does not allow accessing " ++ show u))
<*> pure U.noBasicAuth
headers = annexHttpHeadersCommand <$> Annex.getGitConfig >>= \case
Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd])
Nothing -> annexHttpHeaders <$> Annex.getGitConfig
checkallowedaddr = words . annexAllowedIPAddresses <$> Annex.getGitConfig >>= \case
["all"] -> do
curlopts <- map Param . annexWebOptions <$> Annex.getGitConfig
allowedurlschemes <- annexAllowedUrlSchemes <$> Annex.getGitConfig
let urldownloader = if null curlopts && not (any (`S.notMember` U.conduitUrlSchemes) allowedurlschemes)
then U.DownloadWithConduit $
U.DownloadWithCurlRestricted mempty
else U.DownloadWithCurl curlopts
manager <- liftIO $ U.newManager $
avoidtimeout $ tlsManagerSettings
return (urldownloader, manager)
allowedaddrsports -> do
addrmatcher <- liftIO $
(\l v -> any (\f -> f v) l) . catMaybes
<$> mapM (uncurry makeAddressMatcher)
(mapMaybe splitAddrPort allowedaddrsports)
-- Default to not allowing access to loopback
-- and private IP addresses to avoid data
-- leakage.
let isallowed addr
| addrmatcher addr = True
| isLoopbackAddress addr = False
| isPrivateAddress addr = False
| otherwise = True
let connectionrestricted = connectionRestricted
("Configuration of annex.security.allowed-ip-addresses does not allow accessing address " ++)
let r = addressRestriction $ \addr ->
if isallowed (addrAddress addr)
then Nothing
else Just (connectionrestricted addr)
(settings, pr) <- liftIO $
mkRestrictedManagerSettings r Nothing Nothing
case pr of
Nothing -> return ()
Just ProxyRestricted -> toplevelWarning True
"http proxy settings not used due to annex.security.allowed-ip-addresses configuration"
manager <- liftIO $ U.newManager $
avoidtimeout settings
-- Curl is not used, as its interface does not allow
-- preventing it from accessing specific IP addresses.
let urldownloader = U.DownloadWithConduit $
U.DownloadWithCurlRestricted r
return (urldownloader, manager)
-- http-client defailts to timing out a request after 30 seconds
-- or so, but some web servers are slower and git-annex has its own
-- separate timeout controls, so disable that.
avoidtimeout s = s { managerResponseTimeout = responseTimeoutNone }
splitAddrPort :: String -> Maybe (String, Maybe PortNumber)
splitAddrPort s
-- "[addr]:port" (also allow "[addr]")
| "[" `isPrefixOf` s = case splitc ']' (drop 1 s) of
[a,cp] -> case splitc ':' cp of
["",p] -> do
pn <- readMaybe p
return (a, Just pn)
[""] -> Just (a, Nothing)
_ -> Nothing
_ -> Nothing
| otherwise = Just (s, Nothing)
ipAddressesUnlimited :: Annex Bool
ipAddressesUnlimited =
("all" == ) . annexAllowedIPAddresses <$> Annex.getGitConfig
withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a
withUrlOptions a = a =<< getUrlOptions
-- When downloading an url, if authentication is needed, uses
-- git-credential to prompt for username and password.
--
-- Note that, when the downloader is curl, it will not use git-credential.
-- If the user wants to, they can configure curl to use a netrc file that
-- handles authentication.
withUrlOptionsPromptingCreds :: (U.UrlOptions -> Annex a) -> Annex a
withUrlOptionsPromptingCreds a = do
g <- Annex.gitRepo
uo <- getUrlOptions
prompter <- mkPrompter
cc <- Annex.getRead Annex.gitcredentialcache
a $ uo
{ U.getBasicAuth = \u -> prompter $
getBasicAuthFromCredential g cc u
}
checkBoth :: U.URLString -> Maybe Integer -> U.UrlOptions -> Annex Bool
checkBoth url expected_size uo =
liftIO (U.checkBoth url expected_size uo) >>= \case
Right r -> return r
Left err -> warning (UnquotedString err) >> return False
download :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex Bool
download meterupdate iv url file uo =
liftIO (U.download meterupdate iv url file uo) >>= \case
Right () -> return True
Left err -> warning (UnquotedString err) >> return False
download' :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex (Either String ())
download' meterupdate iv url file uo =
liftIO (U.download meterupdate iv url file uo)
exists :: U.URLString -> U.UrlOptions -> Annex Bool
exists url uo = liftIO (U.exists url uo) >>= \case
Right b -> return b
Left err -> warning (UnquotedString err) >> return False
getUrlInfo :: U.URLString -> U.UrlOptions -> Annex (Either String U.UrlInfo)
getUrlInfo url uo = liftIO (U.getUrlInfo url uo)

45
Annex/VariantFile.hs Normal file
View file

@ -0,0 +1,45 @@
{- git-annex .variant files for automatic merge conflict resolution
-
- Copyright 2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.VariantFile where
import Annex.Common
import Utility.Hash
import qualified Data.ByteString as S
variantMarker :: String
variantMarker = ".variant-"
mkVariant :: FilePath -> String -> FilePath
mkVariant file variant = takeDirectory file
</> dropExtension (takeFileName file)
++ variantMarker ++ variant
++ takeExtension file
{- The filename to use when resolving a conflicted merge of a file,
- that points to a key.
-
- Something derived from the key needs to be included in the filename,
- but rather than exposing the whole key to the user, a very weak hash
- is used. There is a very real, although still unlikely, chance of
- conflicts using this hash.
-
- In the event that there is a conflict with the filename generated
- for some other key, that conflict will itself be handled by the
- conflicted merge resolution code. That case is detected, and the full
- key is used in the filename.
-}
variantFile :: FilePath -> Key -> FilePath
variantFile file key
| doubleconflict = mkVariant file (fromRawFilePath (keyFile key))
| otherwise = mkVariant file (shortHash $ serializeKey' key)
where
doubleconflict = variantMarker `isInfixOf` file
shortHash :: S.ByteString -> String
shortHash = take 4 . show . md5s

83
Annex/VectorClock.hs Normal file
View file

@ -0,0 +1,83 @@
{- git-annex vector clocks
-
- These are basically a timestamp. However, when logging a new
- value, if the old value has a vector clock that is the same or greater
- than the current vector clock, the old vector clock is incremented.
- This way, clock skew does not cause confusion.
-
- Copyright 2017-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.VectorClock (
module Annex.VectorClock,
module Types.VectorClock,
) where
import Types.VectorClock
import Annex.Common
import qualified Annex
import Utility.TimeStamp
import Data.ByteString.Builder
import qualified Data.Attoparsec.ByteString.Lazy as A
currentVectorClock :: Annex CandidateVectorClock
currentVectorClock = liftIO =<< Annex.getState Annex.getvectorclock
-- Runs the action and uses the same vector clock throughout,
-- except when it's necessary to use a newer one due to a past value having
-- a newer vector clock.
--
-- When the action modifies several files in the git-annex branch,
-- this can cause less space to be used, since the same vector clock
-- value is used, which can compress better.
--
-- However, this should not be used when running a long-duration action,
-- because the vector clock is based on the start of the action, and not on
-- the later points where it writes changes. For example, if this were
-- used across downloads of several files, the location log information
-- would have an earlier vector clock than necessary, which might cause it
-- to be disregarded in favor of other information that was collected
-- at an earlier point in time than when the transfers completted and the
-- log was written.
reuseVectorClockWhile :: Annex a -> Annex a
reuseVectorClockWhile = bracket setup cleanup . const
where
setup = do
origget <- Annex.getState Annex.getvectorclock
vc <- liftIO origget
use (pure vc)
return origget
cleanup origget = use origget
use vc = Annex.changeState $ \s ->
s { Annex.getvectorclock = vc }
-- Convert a candidate vector clock in to the final one to use,
-- advancing it if necessary when necessary to get ahead of a previously
-- used vector clock.
advanceVectorClock :: CandidateVectorClock -> [VectorClock] -> VectorClock
advanceVectorClock (CandidateVectorClock c) [] = VectorClock c
advanceVectorClock (CandidateVectorClock c) prevs
| prev >= VectorClock c = case prev of
VectorClock v -> VectorClock (v + 1)
Unknown -> VectorClock c
| otherwise = VectorClock c
where
prev = maximum prevs
formatVectorClock :: VectorClock -> String
formatVectorClock Unknown = "0"
formatVectorClock (VectorClock t) = show t
buildVectorClock :: VectorClock -> Builder
buildVectorClock = string7 . formatVectorClock
parseVectorClock :: String -> Maybe VectorClock
parseVectorClock t = VectorClock <$> parsePOSIXTime t
vectorClockParser :: A.Parser VectorClock
vectorClockParser = VectorClock <$> parserPOSIXTime

View file

@ -0,0 +1,33 @@
{- git-annex vector clock utilities
-
- Copyright 2017-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.VectorClock.Utility where
import Data.Time.Clock.POSIX
import Types.VectorClock
import Utility.Env
import Utility.TimeStamp
startVectorClock :: IO (IO CandidateVectorClock)
startVectorClock = go =<< getEnv "GIT_ANNEX_VECTOR_CLOCK"
where
go Nothing = timebased
go (Just s) = case parsePOSIXTime s of
Just t -> return (pure (CandidateVectorClock t))
Nothing -> timebased
-- Avoid using fractional seconds in the CandidateVectorClock.
-- This reduces the size of the packed git-annex branch by up
-- to 8%.
--
-- Due to the use of vector clocks, high resolution timestamps are
-- not necessary to make clear which information is most recent when
-- eg, a value is changed repeatedly in the same second. In such a
-- case, the vector clock will be advanced to the next second after
-- the last modification.
timebased = return $
CandidateVectorClock . truncateResolution 0 <$> getPOSIXTime

398
Annex/Verify.hs Normal file
View file

@ -0,0 +1,398 @@
{- verification
-
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Verify (
shouldVerify,
verifyKeyContentPostRetrieval,
verifyKeyContent,
verifyKeyContent',
Verification(..),
unVerified,
warnUnverifiableInsecure,
isVerifiable,
startVerifyKeyContentIncrementally,
finishVerifyKeyContentIncrementally,
finishVerifyKeyContentIncrementally',
verifyKeyContentIncrementally,
IncrementalVerifier(..),
writeVerifyChunk,
resumeVerifyFromOffset,
tailVerify,
) where
import Annex.Common
import qualified Annex
import qualified Types.Remote
import Types.Remote (VerifyConfigA(..))
import qualified Types.Backend
import qualified Backend
import Types.Remote (unVerified, Verification(..), RetrievalSecurityPolicy(..))
import Utility.Hash (IncrementalVerifier(..))
import Utility.Metered
import Annex.WorkerPool
import Types.WorkerPool
import Types.Key
import Control.Concurrent.STM
import Control.Concurrent.Async
import qualified Data.ByteString as S
#if WITH_INOTIFY
import qualified System.INotify as INotify
import qualified System.FilePath.ByteString as P
#endif
shouldVerify :: VerifyConfig -> Annex Bool
shouldVerify AlwaysVerify = return True
shouldVerify NoVerify = return False
shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig
shouldVerify (RemoteVerify r) =
(shouldVerify DefaultVerify
<&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r)))
-- Export remotes are not key/value stores, so always verify
-- content from them even when verification is disabled.
<||> Types.Remote.isExportSupported r
{- Verifies that a file is the expected content of a key.
-
- Configuration can prevent verification, for either a
- particular remote or always, unless the RetrievalSecurityPolicy
- requires verification.
-
- Most keys have a known size, and if so, the file size is checked.
-
- When the key's backend allows verifying the content (via checksum),
- it is checked.
-
- If the RetrievalSecurityPolicy requires verification and the key's
- backend doesn't support it, the verification will fail.
-}
verifyKeyContentPostRetrieval :: RetrievalSecurityPolicy -> VerifyConfig -> Verification -> Key -> RawFilePath -> Annex Bool
verifyKeyContentPostRetrieval rsp v verification k f = case (rsp, verification) of
(_, Verified) -> return True
(RetrievalVerifiableKeysSecure, _) -> ifM (isVerifiable k)
( verify
, ifM (annexAllowUnverifiedDownloads <$> Annex.getGitConfig)
( verify
, warnUnverifiableInsecure k >> return False
)
)
(_, UnVerified) -> ifM (shouldVerify v)
( verify
, return True
)
(_, IncompleteVerify _) -> ifM (shouldVerify v)
( verify
, return True
)
(_, MustVerify) -> verify
(_, MustFinishIncompleteVerify _) -> verify
where
verify = enteringStage VerifyStage $
case verification of
IncompleteVerify iv ->
resumeVerifyKeyContent k f iv
MustFinishIncompleteVerify iv ->
resumeVerifyKeyContent k f iv
_ -> verifyKeyContent k f
-- When possible, does an incremental verification, because that can be
-- faster. Eg, the VURL backend can need to try multiple checksums and only
-- with an incremental verification does it avoid reading files twice.
verifyKeyContent :: Key -> RawFilePath -> Annex Bool
verifyKeyContent k f = verifyKeySize k f <&&> verifyKeyContent' k f
-- Does not verify size.
verifyKeyContent' :: Key -> RawFilePath -> Annex Bool
verifyKeyContent' k f =
Backend.maybeLookupBackendVariety (fromKey keyVariety k) >>= \case
Nothing -> return True
Just b -> case (Types.Backend.verifyKeyContentIncrementally b, Types.Backend.verifyKeyContent b) of
(Nothing, Nothing) -> return True
(Just mkiv, mverifier) -> do
iv <- mkiv k
showAction (UnquotedString (descIncrementalVerifier iv))
res <- liftIO $ catchDefaultIO Nothing $
withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do
feedIncrementalVerifier h iv
finalizeIncrementalVerifier iv
case res of
Just res' -> return res'
Nothing -> case mverifier of
Nothing -> return True
Just verifier -> verifier k f
(Nothing, Just verifier) -> verifier k f
resumeVerifyKeyContent :: Key -> RawFilePath -> IncrementalVerifier -> Annex Bool
resumeVerifyKeyContent k f iv = liftIO (positionIncrementalVerifier iv) >>= \case
Nothing -> fallback
Just endpos -> do
fsz <- liftIO $ catchDefaultIO 0 $ getFileSize f
if fsz < endpos
then fallback
else case fromKey keySize k of
Just size | fsz /= size -> return False
_ -> go fsz endpos >>= \case
Just v -> return v
Nothing -> fallback
where
fallback = verifyKeyContent k f
go fsz endpos
| fsz == endpos =
liftIO $ catchDefaultIO (Just False) $
finalizeIncrementalVerifier iv
| otherwise = do
showAction (UnquotedString (descIncrementalVerifier iv))
liftIO $ catchDefaultIO (Just False) $
withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do
hSeek h AbsoluteSeek endpos
feedIncrementalVerifier h iv
finalizeIncrementalVerifier iv
feedIncrementalVerifier :: Handle -> IncrementalVerifier -> IO ()
feedIncrementalVerifier h iv = do
b <- S.hGetSome h chunk
if S.null b
then return ()
else do
updateIncrementalVerifier iv b
feedIncrementalVerifier h iv
where
chunk = 65536
verifyKeySize :: Key -> RawFilePath -> Annex Bool
verifyKeySize k f = case fromKey keySize k of
Just size -> do
size' <- liftIO $ catchDefaultIO 0 $ getFileSize f
return (size' == size)
Nothing -> return True
warnUnverifiableInsecure :: Key -> Annex ()
warnUnverifiableInsecure k = warning $ UnquotedString $ unwords
[ "Getting " ++ kv ++ " keys with this remote is not secure;"
, "the content cannot be verified to be correct."
, "(Use annex.security.allow-unverified-downloads to bypass"
, "this safety check.)"
]
where
kv = decodeBS (formatKeyVariety (fromKey keyVariety k))
isVerifiable :: Key -> Annex Bool
isVerifiable k = maybe False (isJust . Types.Backend.verifyKeyContent)
<$> Backend.maybeLookupBackendVariety (fromKey keyVariety k)
startVerifyKeyContentIncrementally :: VerifyConfig -> Key -> Annex (Maybe IncrementalVerifier)
startVerifyKeyContentIncrementally verifyconfig k =
ifM (shouldVerify verifyconfig)
( Backend.maybeLookupBackendVariety (fromKey keyVariety k) >>= \case
Just b -> case Types.Backend.verifyKeyContentIncrementally b of
Just v -> Just <$> v k
Nothing -> return Nothing
Nothing -> return Nothing
, return Nothing
)
finishVerifyKeyContentIncrementally :: Maybe IncrementalVerifier -> Annex (Bool, Verification)
finishVerifyKeyContentIncrementally = finishVerifyKeyContentIncrementally' False
finishVerifyKeyContentIncrementally' :: Bool -> Maybe IncrementalVerifier -> Annex (Bool, Verification)
finishVerifyKeyContentIncrementally' _ Nothing =
return (True, UnVerified)
finishVerifyKeyContentIncrementally' quiet (Just iv) =
liftIO (finalizeIncrementalVerifier iv) >>= \case
Just True -> return (True, Verified)
Just False -> do
unless quiet $
warning "verification of content failed"
return (False, UnVerified)
-- Incremental verification was not able to be done.
Nothing -> return (True, UnVerified)
verifyKeyContentIncrementally :: VerifyConfig -> Key -> (Maybe IncrementalVerifier -> Annex ()) -> Annex Verification
verifyKeyContentIncrementally verifyconfig k a = do
miv <- startVerifyKeyContentIncrementally verifyconfig k
a miv
snd <$> finishVerifyKeyContentIncrementally miv
writeVerifyChunk :: Maybe IncrementalVerifier -> Handle -> S.ByteString -> IO ()
writeVerifyChunk (Just iv) h c = do
S.hPut h c
updateIncrementalVerifier iv c
writeVerifyChunk Nothing h c = S.hPut h c
{- Given a file handle that is open for reading (and likely also for writing),
- and an offset, feeds the current content of the file up to the offset to
- the IncrementalVerifier. Leaves the file seeked to the offset.
- Returns the meter with the offset applied. -}
resumeVerifyFromOffset
:: Integer
-> Maybe IncrementalVerifier
-> MeterUpdate
-> Handle
-> IO MeterUpdate
resumeVerifyFromOffset o incrementalverifier meterupdate h
| o /= 0 = do
maybe noop (`go` o) incrementalverifier
-- Make sure the handle is seeked to the offset.
-- (Reading the file probably left it there
-- when that was done, but let's be sure.)
hSeek h AbsoluteSeek o
return offsetmeterupdate
| otherwise = return meterupdate
where
offsetmeterupdate = offsetMeterUpdate meterupdate (toBytesProcessed o)
go iv n
| n == 0 = return ()
| otherwise = do
let c = if n > fromIntegral defaultChunkSize
then defaultChunkSize
else fromIntegral n
b <- S.hGet h c
updateIncrementalVerifier iv b
unless (b == S.empty) $
go iv (n - fromIntegral (S.length b))
-- | Runs a writer action that retrieves to a file. In another thread,
-- reads the file as it grows, and feeds it to the incremental verifier.
--
-- Once the writer finishes, this returns quickly. It may not feed
-- the entire content of the file to the incremental verifier.
--
-- The file does not need to exist yet when this is called. It will wait
-- for the file to appear before opening it and starting verification.
--
-- This is not supported for all OSs, and on OS's where it is not
-- supported, verification will not happen.
--
-- The writer probably needs to be another process. If the file is being
-- written directly by git-annex, the haskell RTS will prevent opening it
-- for read at the same time, and verification will not happen.
--
-- Note that there are situations where the file may fail to verify despite
-- having the correct content. For example, when the file is written out
-- of order, or gets replaced part way through. To deal with such cases,
-- when verification fails, it should not be treated as if the file's
-- content is known to be incorrect, but instead as an indication that the
-- file should be verified again, once it's done being written to.
--
-- (It is also possible, in theory, for a file to verify despite having
-- incorrect content. For that to happen, the file would need to have
-- the right content when this checks it, but then the content gets
-- changed later by whatever is writing to the file.)
--
-- This should be fairly efficient, reading from the disk cache,
-- as long as the writer does not get very far ahead of it. However,
-- there are situations where it would be much less expensive to verify
-- chunks as they are being written. For example, when resuming with
-- a lot of content in the file, all that content needs to be read,
-- and if the disk is slow, the reader may never catch up to the writer,
-- and the disk cache may never speed up reads. So this should only be
-- used when there's not a better way to incrementally verify.
tailVerify :: Maybe IncrementalVerifier -> RawFilePath -> Annex a -> Annex a
tailVerify (Just iv) f writer = do
finished <- liftIO newEmptyTMVarIO
t <- liftIO $ async $ tailVerify' iv f finished
let finishtail = do
liftIO $ atomically $ putTMVar finished ()
liftIO (wait t)
writer `finally` finishtail
tailVerify Nothing _ writer = writer
tailVerify' :: IncrementalVerifier -> RawFilePath -> TMVar () -> IO ()
#if WITH_INOTIFY
tailVerify' iv f finished =
tryNonAsync go >>= \case
Right r -> return r
Left _ -> unableIncrementalVerifier iv
where
-- Watch the directory containing the file, and wait for
-- the file to be modified. It's possible that the file already
-- exists before the downloader starts, but it replaces it instead
-- of resuming, and waiting for modification deals with such
-- situations.
inotifydirchange i cont =
INotify.addWatch i [INotify.Modify] dir $ \case
-- Ignore changes to other files in the directory.
INotify.Modified { INotify.maybeFilePath = fn }
| fn == Just basef -> cont
_ -> noop
where
(dir, basef) = P.splitFileName f
inotifyfilechange i = INotify.addWatch i [INotify.Modify] f . const
go = INotify.withINotify $ \i -> do
modified <- newEmptyTMVarIO
let signalmodified = atomically $ void $ tryPutTMVar modified ()
wd <- inotifydirchange i signalmodified
let cleanup = void . tryNonAsync . INotify.removeWatch
let stop w = do
cleanup w
unableIncrementalVerifier iv
waitopen modified >>= \case
Nothing -> stop wd
Just h -> do
cleanup wd
wf <- inotifyfilechange i signalmodified
tryNonAsync (follow h modified) >>= \case
Left _ -> stop wf
Right () -> cleanup wf
hClose h
waitopen modified = do
v <- atomically $
(Just <$> takeTMVar modified)
`orElse`
((const Nothing) <$> takeTMVar finished)
case v of
Just () -> do
r <- tryNonAsync $
tryWhenExists (openBinaryFile (fromRawFilePath f) ReadMode) >>= \case
Just h -> return (Just h)
-- File does not exist, must have been
-- deleted. Wait for next modification
-- and try again.
Nothing -> waitopen modified
case r of
Right r' -> return r'
-- Permission error prevents
-- reading, or this same process
-- is writing to the file,
-- and it cannot be read at the
-- same time.
Left _ -> return Nothing
-- finished without the file being modified
Nothing -> return Nothing
follow h modified = do
b <- S.hGetNonBlocking h chunk
if S.null b
then do
-- We've caught up to the writer.
-- Wait for the file to get modified again,
-- or until we're told it is done being
-- written.
cont <- atomically $
(const (follow h modified)
<$> takeTMVar modified)
`orElse`
(const (return ())
<$> takeTMVar finished)
cont
else do
updateIncrementalVerifier iv b
atomically (tryTakeTMVar finished) >>= \case
Nothing -> follow h modified
Just () -> return ()
chunk = 65536
#else
tailVerify' iv _ _ = unableIncrementalVerifier iv
#endif

68
Annex/Version.hs Normal file
View file

@ -0,0 +1,68 @@
{- git-annex repository versioning
-
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Version where
import Annex.Common
import Config
import Git.Types
import Types.RepoVersion
import qualified Annex
import qualified Data.Map as M
defaultVersion :: RepoVersion
defaultVersion = RepoVersion 10
latestVersion :: RepoVersion
latestVersion = RepoVersion 10
supportedVersions :: [RepoVersion]
supportedVersions = map RepoVersion [8, 9, 10]
upgradeableVersions :: [RepoVersion]
#ifndef mingw32_HOST_OS
upgradeableVersions = map RepoVersion [0..10]
#else
upgradeableVersions = map RepoVersion [2..10]
#endif
autoUpgradeableVersions :: M.Map RepoVersion RepoVersion
autoUpgradeableVersions = M.fromList
[ (RepoVersion 3, defaultVersion)
, (RepoVersion 4, defaultVersion)
, (RepoVersion 5, defaultVersion)
, (RepoVersion 6, defaultVersion)
, (RepoVersion 7, defaultVersion)
, (RepoVersion 8, defaultVersion)
, (RepoVersion 9, defaultVersion)
]
versionField :: ConfigKey
versionField = annexConfig "version"
getVersion :: Annex (Maybe RepoVersion)
getVersion = annexVersion <$> Annex.getGitConfig
setVersion :: RepoVersion -> Annex ()
setVersion (RepoVersion v) = setConfig versionField (show v)
removeVersion :: Annex ()
removeVersion = unsetConfig versionField
versionSupportsFilterProcess :: Maybe RepoVersion -> Bool
versionSupportsFilterProcess (Just v)
| v >= RepoVersion 9 = True
versionSupportsFilterProcess _ = False
versionNeedsWritableContentFiles :: Maybe RepoVersion -> Bool
versionNeedsWritableContentFiles (Just v)
| v >= RepoVersion 10 = False
versionNeedsWritableContentFiles _ = True

636
Annex/View.hs Normal file
View file

@ -0,0 +1,636 @@
{- metadata based branch views
-
- Copyright 2014-2023 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings, PackageImports #-}
module Annex.View where
import Annex.Common
import Annex.View.ViewedFile
import Types.View
import Types.AdjustedBranch
import Types.MetaData
import Annex.MetaData
import qualified Annex
import qualified Annex.Branch
import qualified Git
import qualified Git.DiffTree as DiffTree
import qualified Git.Branch
import qualified Git.LsFiles
import qualified Git.LsTree
import qualified Git.Ref
import Git.CatFile
import Git.UpdateIndex
import Git.Sha
import Git.Types
import Git.FilePath
import Annex.WorkTree
import Annex.GitOverlay
import Annex.Link
import Annex.CatFile
import Annex.Concurrent
import Annex.Content.Presence
import Logs
import Logs.MetaData
import Logs.View
import Utility.Glob
import Types.Command
import CmdLine.Action
import qualified Utility.RawFilePath as R
import qualified Data.Text as T
import qualified Data.ByteString as B
import qualified Data.Set as S
import qualified Data.Map as M
import qualified System.FilePath.ByteString as P
import Control.Concurrent.Async
import "mtl" Control.Monad.Writer
{- Each visible ViewFilter in a view results in another level of
- subdirectory nesting. When a file matches multiple ways, it will appear
- in multiple subdirectories. This means there is a bit of an exponential
- blowup with a single file appearing in a crazy number of places!
-
- Capping the view size to 5 is reasonable; why wants to dig
- through 5+ levels of subdirectories to find anything?
-}
viewTooLarge :: View -> Bool
viewTooLarge view = visibleViewSize view > 5
visibleViewSize :: View -> Int
visibleViewSize = length . filter viewVisible . viewComponents
{- Parses field=value, field!=value, field?=value, tag, !tag, and ?tag
-
- Note that the field may not be a legal metadata field name,
- but it's let through anyway.
- This is useful when matching on directory names with spaces,
- which are not legal MetaFields.
-}
parseViewParam :: ViewUnset -> String -> (MetaField, ViewFilter)
parseViewParam vu s = case separate (== '=') s of
('!':tag, []) | not (null tag) ->
( tagMetaField
, mkExcludeValues tag
)
('?':tag, []) | not (null tag) ->
( tagMetaField
, mkFilterOrUnsetValues tag
)
(tag, []) ->
( tagMetaField
, mkFilterValues tag
)
(field, wanted)
| end field == "!" ->
( mkMetaFieldUnchecked (T.pack (beginning field))
, mkExcludeValues wanted
)
| end field == "?" ->
( mkMetaFieldUnchecked (T.pack (beginning field))
, mkFilterOrUnsetValues wanted
)
| otherwise ->
( mkMetaFieldUnchecked (T.pack field)
, mkFilterValues wanted
)
where
mkExcludeValues = ExcludeValues . S.singleton . toMetaValue . encodeBS
mkFilterValues v
| any (`elem` v) ['*', '?'] = FilterGlob v
| otherwise = FilterValues $ S.singleton $ toMetaValue $ encodeBS v
mkFilterOrUnsetValues v
| any (`elem` v) ['*', '?'] = FilterGlobOrUnset v vu
| otherwise = FilterValuesOrUnset (S.singleton $ toMetaValue $ encodeBS v) vu
data ViewChange = Unchanged | Narrowing | Widening
deriving (Ord, Eq, Show)
{- Updates a view, adding new fields to filter on (Narrowing),
- or allowing new values in an existing field (Widening). -}
refineView :: View -> [(MetaField, ViewFilter)] -> (View, ViewChange)
refineView origview = checksize . calc Unchanged origview
where
calc c v [] = (v, c)
calc c v ((f, vf):rest) =
let (v', c') = refine v f vf
in calc (max c c') v' rest
refine view field vf
| field `elem` map viewField (viewComponents view) =
let (components', viewchanges) = runWriter $
mapM (\c -> updateViewComponent c field vf) (viewComponents view)
viewchange = if field `elem` map viewField (viewComponents origview)
then maximum viewchanges
else Narrowing
in (view { viewComponents = components' }, viewchange)
| otherwise =
let component = mkViewComponent field vf
view' = view { viewComponents = component : viewComponents view }
in (view', Narrowing)
checksize r@(v, _)
| viewTooLarge v = giveup $ "View is too large (" ++ show (visibleViewSize v) ++ " levels of subdirectories)"
| otherwise = r
updateViewComponent :: ViewComponent -> MetaField -> ViewFilter -> Writer [ViewChange] ViewComponent
updateViewComponent c field vf
| viewField c == field = do
let (newvf, viewchange) = combineViewFilter (viewFilter c) vf
tell [viewchange]
return $ mkViewComponent field newvf
| otherwise = return c
{- Adds an additional filter to a view. This can only result in narrowing
- the view. Multivalued filters are added in non-visible form. -}
filterView :: View -> [(MetaField, ViewFilter)] -> View
filterView v vs = v { viewComponents = viewComponents f' ++ viewComponents v}
where
f = fst $ refineView (v {viewComponents = []}) vs
f' = f { viewComponents = map toinvisible (viewComponents f) }
toinvisible c = c { viewVisible = False }
{- Combine old and new ViewFilters, yielding a result that matches
- either old+new, or only new. Which depends on the types of things
- being combined.
-}
combineViewFilter :: ViewFilter -> ViewFilter -> (ViewFilter, ViewChange)
combineViewFilter old@(FilterValues olds) (FilterValues news)
| combined == old = (combined, Unchanged)
| otherwise = (combined, Widening)
where
combined = FilterValues (S.union olds news)
combineViewFilter old@(ExcludeValues olds) (ExcludeValues news)
| combined == old = (combined, Unchanged)
| otherwise = (combined, Narrowing)
where
combined = ExcludeValues (S.union olds news)
{- If we have FilterValues and change to a FilterGlob,
- it's always a widening change, because the glob could match other
- values. OTOH, going the other way, it's a Narrowing change if the old
- glob matches all the new FilterValues. -}
combineViewFilter (FilterValues _) newglob@(FilterGlob _) =
(newglob, Widening)
combineViewFilter (FilterGlob oldglob) new@(FilterValues s)
| all (matchGlob (compileGlob oldglob CaseInsensitive (GlobFilePath False)) . decodeBS . fromMetaValue) (S.toList s) = (new, Narrowing)
| otherwise = (new, Widening)
{- With two globs, the old one is discarded, and the new one is used.
- We can tell if that's a narrowing change by checking if the old
- glob matches the new glob. For example, "*" matches "foo*",
- so that's narrowing. While "f?o" does not match "f??", so that's
- widening. -}
combineViewFilter (FilterGlob old) newglob@(FilterGlob new)
| old == new = (newglob, Unchanged)
| matchGlob (compileGlob old CaseInsensitive (GlobFilePath False)) new = (newglob, Narrowing)
| otherwise = (newglob, Widening)
{- Combining FilterValuesOrUnset and FilterGlobOrUnset with FilterValues
- and FilterGlob maintains the OrUnset if the second parameter has it,
- and is otherwise the same as combining without OrUnset, except that
- eliminating the OrUnset can be narrowing, and adding it can be widening. -}
combineViewFilter old@(FilterValuesOrUnset olds _) (FilterValuesOrUnset news newvu)
| combined == old = (combined, Unchanged)
| otherwise = (combined, Widening)
where
combined = FilterValuesOrUnset (S.union olds news) newvu
combineViewFilter (FilterValues olds) (FilterValuesOrUnset news vu) =
(combined, Widening)
where
combined = FilterValuesOrUnset (S.union olds news) vu
combineViewFilter old@(FilterValuesOrUnset olds _) (FilterValues news)
| combined == old = (combined, Narrowing)
| otherwise = (combined, Widening)
where
combined = FilterValues (S.union olds news)
combineViewFilter (FilterValuesOrUnset _ _) newglob@(FilterGlob _) =
(newglob, Widening)
combineViewFilter (FilterGlob _) new@(FilterValuesOrUnset _ _) =
(new, Widening)
combineViewFilter (FilterValues _) newglob@(FilterGlobOrUnset _ _) =
(newglob, Widening)
combineViewFilter (FilterValuesOrUnset _ _) newglob@(FilterGlobOrUnset _ _) =
(newglob, Widening)
combineViewFilter (FilterGlobOrUnset oldglob _) new@(FilterValues _) =
combineViewFilter (FilterGlob oldglob) new
combineViewFilter (FilterGlobOrUnset oldglob _) new@(FilterValuesOrUnset _ _) =
let (_, viewchange) = combineViewFilter (FilterGlob oldglob) new
in (new, viewchange)
combineViewFilter (FilterGlobOrUnset old _) newglob@(FilterGlobOrUnset new _)
| old == new = (newglob, Unchanged)
| matchGlob (compileGlob old CaseInsensitive (GlobFilePath False)) new = (newglob, Narrowing)
| otherwise = (newglob, Widening)
combineViewFilter (FilterGlob _) newglob@(FilterGlobOrUnset _ _) =
(newglob, Widening)
combineViewFilter (FilterGlobOrUnset _ _) newglob@(FilterGlob _) =
(newglob, Narrowing)
{- There is not a way to filter a value and also apply an exclude. So:
- When adding an exclude to a filter, use only the exclude.
- When adding a filter to an exclude, use only the filter. -}
combineViewFilter (FilterGlob _) new@(ExcludeValues _) = (new, Narrowing)
combineViewFilter (ExcludeValues _) new@(FilterGlob _) = (new, Widening)
combineViewFilter (FilterValues _) new@(ExcludeValues _) = (new, Narrowing)
combineViewFilter (ExcludeValues _) new@(FilterValues _) = (new, Widening)
combineViewFilter (FilterValuesOrUnset _ _) new@(ExcludeValues _) = (new, Narrowing)
combineViewFilter (ExcludeValues _) new@(FilterValuesOrUnset _ _) = (new, Widening)
combineViewFilter (FilterGlobOrUnset _ _) new@(ExcludeValues _) = (new, Narrowing)
combineViewFilter (ExcludeValues _) new@(FilterGlobOrUnset _ _) = (new, Widening)
{- Generates views for a file from a branch, based on its metadata
- and the filename used in the branch.
-
- Note that a file may appear multiple times in a view, when it
- has multiple matching values for a MetaField used in the View.
-
- Of course if its MetaData does not match the View, it won't appear at
- all.
-
- Note that for efficiency, it's useful to partially
- evaluate this function with the view parameter and reuse
- the result. The globs in the view will then be compiled and memoized.
-}
viewedFiles :: View -> MkViewedFile -> FilePath -> MetaData -> [ViewedFile]
viewedFiles view =
let matchers = map viewComponentMatcher (viewComponents view)
in \mkviewedfile file metadata ->
let matches = map (\m -> m metadata) matchers
in if any isNothing matches
then []
else
let paths = pathProduct $
map (map toviewpath) (visible matches)
in if null paths
then [mkviewedfile file]
else map (</> mkviewedfile file) paths
where
visible = map (fromJust . snd) .
filter (viewVisible . fst) .
zip (viewComponents view)
toviewpath (MatchingMetaValue v) = toViewPath v
toviewpath (MatchingUnset v) = toViewPath (toMetaValue (encodeBS v))
data MatchingValue = MatchingMetaValue MetaValue | MatchingUnset String
{- Checks if metadata matches a ViewComponent filter, and if so
- returns the value, or values that match. Self-memoizing on ViewComponent. -}
viewComponentMatcher :: ViewComponent -> (MetaData -> Maybe [MatchingValue])
viewComponentMatcher viewcomponent = \metadata ->
matcher Nothing (viewFilter viewcomponent)
(currentMetaDataValues metafield metadata)
where
metafield = viewField viewcomponent
matcher matchunset (FilterValues s) =
\values -> setmatches matchunset $ S.intersection s values
matcher matchunset (FilterGlob glob) =
let cglob = compileGlob glob CaseInsensitive (GlobFilePath False)
in \values -> setmatches matchunset $
S.filter (matchGlob cglob . decodeBS . fromMetaValue) values
matcher _ (ExcludeValues excludes) =
\values ->
if S.null (S.intersection values excludes)
then Just []
else Nothing
matcher _ (FilterValuesOrUnset s (ViewUnset u)) =
matcher (Just [MatchingUnset u]) (FilterValues s)
matcher _ (FilterGlobOrUnset glob (ViewUnset u)) =
matcher (Just [MatchingUnset u]) (FilterGlob glob)
setmatches matchunset s
| S.null s = matchunset
| otherwise = Just $
map MatchingMetaValue (S.toList s)
-- This is '', a unicode character that displays the same as '/' but is
-- not it. It is encoded using the filesystem encoding, which allows it
-- to be used even when not in a unicode capable locale.
pseudoSlash :: String
pseudoSlash = "\56546\56456\56469"
-- And this is '╲' similarly.
pseudoBackslash :: String
pseudoBackslash = "\56546\56469\56498"
-- And this is '﹕' similarly.
pseudoColon :: String
pseudoColon = "\56559\56505\56469"
toViewPath :: MetaValue -> FilePath
toViewPath = escapepseudo [] . decodeBS . fromMetaValue
where
escapepseudo s ('/':cs) = escapepseudo (pseudoSlash:s) cs
escapepseudo s ('\\':cs) = escapepseudo (pseudoBackslash:s) cs
escapepseudo s (':':cs) = escapepseudo (pseudoColon:s) cs
escapepseudo s ('%':cs) = escapepseudo ("%%":s) cs
escapepseudo s (c1:c2:c3:cs)
| [c1,c2,c3] == pseudoSlash = escapepseudo ("%":pseudoSlash:s) cs
| [c1,c2,c3] == pseudoBackslash = escapepseudo ("%":pseudoBackslash:s) cs
| [c1,c2,c3] == pseudoColon = escapepseudo ("%":pseudoColon:s) cs
| otherwise = escapepseudo ([c1]:s) (c2:c3:cs)
escapepseudo s (c:cs) = escapepseudo ([c]:s) cs
escapepseudo s [] = concat (reverse s)
fromViewPath :: FilePath -> MetaValue
fromViewPath = toMetaValue . encodeBS . deescapepseudo []
where
deescapepseudo s ('%':escapedc:cs) = deescapepseudo ([escapedc]:s) cs
deescapepseudo s (c1:c2:c3:cs)
| [c1,c2,c3] == pseudoSlash = deescapepseudo ("/":s) cs
| [c1,c2,c3] == pseudoBackslash = deescapepseudo ("\\":s) cs
| [c1,c2,c3] == pseudoColon = deescapepseudo (":":s) cs
| otherwise = deescapepseudo ([c1]:s) (c2:c3:cs)
deescapepseudo s cs = concat (reverse (cs:s))
prop_viewPath_roundtrips :: MetaValue -> Bool
prop_viewPath_roundtrips v = fromViewPath (toViewPath v) == v
pathProduct :: [[FilePath]] -> [FilePath]
pathProduct [] = []
pathProduct (l:ls) = foldl combinel l ls
where
combinel xs ys = [combine x y | x <- xs, y <- ys]
{- Extracts the metadata from a ViewedFile, based on the view that was used
- to construct it.
-
- Derived metadata is excluded.
-}
fromView :: View -> ViewedFile -> MetaData
fromView view f = MetaData $ m `M.difference` derived
where
m = M.fromList $ map convfield $
filter (not . isviewunset) (zip visible values)
visible = filter viewVisible (viewComponents view)
paths = splitDirectories (dropFileName f)
values = map (S.singleton . fromViewPath) paths
MetaData derived = getViewedFileMetaData f
convfield (vc, v) = (viewField vc, v)
-- When a directory is the one used to hold files that don't
-- have the metadata set, don't include it in the MetaData.
isviewunset (vc, v) = case viewFilter vc of
FilterValues {} -> False
FilterGlob {} -> False
ExcludeValues {} -> False
FilterValuesOrUnset _ (ViewUnset vu) -> isviewunset' vu v
FilterGlobOrUnset _ (ViewUnset vu) -> isviewunset' vu v
isviewunset' vu v = S.member (fromViewPath vu) v
{- Constructing a view that will match arbitrary metadata, and applying
- it to a file yields a set of ViewedFile which all contain the same
- MetaFields that were present in the input metadata
- (excluding fields that are not visible). -}
prop_view_roundtrips :: AssociatedFile -> MetaData -> Bool -> Bool
prop_view_roundtrips (AssociatedFile Nothing) _ _ = True
prop_view_roundtrips (AssociatedFile (Just f)) metadata visible = or
[ B.null (P.takeFileName f) && B.null (P.takeDirectory f)
, viewTooLarge view
, all hasfields (viewedFiles view (viewedFileFromReference' Nothing Nothing) (fromRawFilePath f) metadata)
]
where
view = View (Git.Ref "foo") $
map (\(mf, mv) -> ViewComponent mf (FilterValues $ S.filter (not . B.null . fromMetaValue) mv) visible)
(fromMetaData metadata)
visiblefields = sort (map viewField $ filter viewVisible (viewComponents view))
hasfields fv = sort (map fst (fromMetaData (fromView view fv))) == visiblefields
{- A directory foo/bar/baz/ is turned into metadata fields
- /=foo, foo/=bar, foo/bar/=baz.
-
- Note that this may generate MetaFields that legalField rejects.
- This is necessary to have a 1:1 mapping between directory names and
- fields. So this MetaData cannot safely be serialized. -}
getDirMetaData :: FilePath -> MetaData
getDirMetaData d = MetaData $ M.fromList $ zip fields values
where
dirs = splitDirectories d
fields = map (mkMetaFieldUnchecked . T.pack . addTrailingPathSeparator . joinPath)
(inits dirs)
values = map (S.singleton . toMetaValue . encodeBS . fromMaybe "" . headMaybe)
(tails dirs)
getWorkTreeMetaData :: FilePath -> MetaData
getWorkTreeMetaData = getDirMetaData . dropFileName
getViewedFileMetaData :: FilePath -> MetaData
getViewedFileMetaData = getDirMetaData . dirFromViewedFile . takeFileName
{- Applies a view to the currently checked out branch, generating a new
- branch for the view.
-}
applyView :: View -> Maybe Adjustment -> Annex Git.Branch
applyView v ma = do
gc <- Annex.getGitConfig
applyView' (viewedFileFromReference gc) getWorkTreeMetaData v ma
{- Generates a new branch for a View, which must be a more narrow
- version of the View originally used to generate the currently
- checked out branch. That is, it must match a subset of the files
- in view, not any others.
-}
narrowView :: View -> Maybe Adjustment -> Annex Git.Branch
narrowView = applyView' viewedFileReuse getViewedFileMetaData
{- Go through each staged file.
- If the file is not annexed, skip it, unless it's a dotfile in the top,
- or a file in a dotdir in the top.
- Look up the metadata of annexed files, and generate any ViewedFiles,
- and stage them.
-}
applyView' :: MkViewedFile -> (FilePath -> MetaData) -> View -> Maybe Adjustment -> Annex Git.Branch
applyView' mkviewedfile getfilemetadata view madj = do
top <- fromRepo Git.repoPath
(l, clean) <- inRepo $ Git.LsFiles.inRepoDetails [] [top]
applyView'' mkviewedfile getfilemetadata view madj l clean $
\(f, sha, mode) -> do
topf <- inRepo (toTopFilePath f)
k <- lookupKey f
return (topf, sha, toTreeItemType mode, k)
genViewBranch view madj
applyView''
:: MkViewedFile
-> (FilePath -> MetaData)
-> View
-> Maybe Adjustment
-> [t]
-> IO Bool
-> (t -> Annex (TopFilePath, Sha, Maybe TreeItemType, Maybe Key))
-> Annex ()
applyView'' mkviewedfile getfilemetadata view madj l clean conv = do
viewg <- withNewViewIndex gitRepo
withUpdateIndex viewg $ \uh -> do
g <- Annex.gitRepo
gc <- Annex.getGitConfig
-- Streaming the metadata like this is an optimisation.
catObjectStream g $ \mdfeeder mdcloser mdreader -> do
tid <- liftIO . async =<< forkState
(getmetadata gc mdfeeder mdcloser l)
process uh mdreader
join (liftIO (wait tid))
liftIO $ void clean
where
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
getmetadata _ _ mdcloser [] = liftIO mdcloser
getmetadata gc mdfeeder mdcloser (t:ts) = do
v@(topf, _sha, _treeitemtype, mkey) <- conv t
let feed mdlogf = liftIO $ mdfeeder
(v, Git.Ref.branchFileRef Annex.Branch.fullname mdlogf)
case mkey of
Just key -> feed (metaDataLogFile gc key)
Nothing
-- Handle toplevel dotfiles that are not
-- annexed files by feeding through a query
-- for dummy metadata. Calling
-- Git.UpdateIndex.streamUpdateIndex'
-- here would race with process's calls
-- to it.
| "." `B.isPrefixOf` getTopFilePath topf ->
feed "dummy"
| otherwise -> noop
getmetadata gc mdfeeder mdcloser ts
process uh mdreader = liftIO mdreader >>= \case
Just ((topf, _, mtreeitemtype, Just k), mdlog) -> do
let metadata = maybe emptyMetaData parseCurrentMetaData mdlog
let f = fromRawFilePath $ getTopFilePath topf
let metadata' = getfilemetadata f `unionMetaData` metadata
forM_ (genviewedfiles f metadata') $ \fv -> do
f' <- fromRepo (fromTopFilePath $ asTopFilePath $ toRawFilePath fv)
stagefile uh f' k mtreeitemtype
process uh mdreader
Just ((topf, sha, Just treeitemtype, Nothing), _) -> do
liftIO $ Git.UpdateIndex.streamUpdateIndex' uh $
pureStreamer $ updateIndexLine sha treeitemtype topf
process uh mdreader
Just _ -> process uh mdreader
Nothing -> return ()
stagefile uh f k mtreeitemtype = case madj of
Nothing -> stagesymlink uh f k
Just (LinkAdjustment UnlockAdjustment) ->
stagepointerfile uh f k mtreeitemtype
Just (LinkPresentAdjustment UnlockPresentAdjustment) ->
ifM (inAnnex k)
( stagepointerfile uh f k mtreeitemtype
, stagesymlink uh f k
)
Just (PresenceAdjustment HideMissingAdjustment (Just UnlockAdjustment)) ->
whenM (inAnnex k) $
stagepointerfile uh f k mtreeitemtype
Just (PresenceAdjustment HideMissingAdjustment _) ->
whenM (inAnnex k) $
stagesymlink uh f k
_ -> stagesymlink uh f k
stagesymlink uh f k = do
linktarget <- calcRepo (gitAnnexLink f k)
sha <- hashSymlink linktarget
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
=<< inRepo (Git.UpdateIndex.stageSymlink f sha)
stagepointerfile uh f k mtreeitemtype = do
let treeitemtype = if mtreeitemtype == Just TreeExecutable
then TreeExecutable
else TreeFile
sha <- hashPointerFile k
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
=<< inRepo (Git.UpdateIndex.stageFile sha treeitemtype f)
{- Updates the current view with any changes that have been made to its
- parent branch or the metadata since the view was created or last updated.
-
- When there were changes, returns a ref to a commit for the updated view.
- Does not update the view branch with it.
-
- This is not very optimised. An incremental update would be possible to
- implement and would be faster, but more complicated.
-}
updateView :: View -> Maybe Adjustment -> Annex (Maybe Git.Ref)
updateView view madj = do
(l, clean) <- inRepo $ Git.LsTree.lsTree
Git.LsTree.LsTreeRecursive
(Git.LsTree.LsTreeLong True)
(viewParentBranch view)
gc <- Annex.getGitConfig
applyView'' (viewedFileFromReference gc) getWorkTreeMetaData view madj l clean $
\ti -> do
let ref = Git.Ref.branchFileRef (viewParentBranch view)
(getTopFilePath (Git.LsTree.file ti))
k <- case Git.LsTree.size ti of
Nothing -> catKey ref
Just sz -> catKey' ref sz
return
( (Git.LsTree.file ti)
, (Git.LsTree.sha ti)
, (toTreeItemType (Git.LsTree.mode ti))
, k
)
oldcommit <- inRepo $ Git.Ref.sha (branchView view madj)
oldtree <- maybe (pure Nothing) (inRepo . Git.Ref.tree) oldcommit
newtree <- withViewIndex $ inRepo Git.Branch.writeTree
if oldtree /= Just newtree
then Just <$> do
cmode <- annexCommitMode <$> Annex.getGitConfig
let msg = "updated " ++ fromRef (branchView view madj)
let parent = catMaybes [oldcommit]
inRepo (Git.Branch.commitTree cmode [msg] parent newtree)
else return Nothing
{- Diff between currently checked out branch and staged changes, and
- update metadata to reflect the changes that are being committed to the
- view.
-
- Adding a file to a directory adds the metadata represented by
- that directory to the file, and removing a file from a directory
- removes the metadata.
-
- Note that removes must be handled before adds. This is so
- that moving a file from x/foo/ to x/bar/ adds back the metadata for x.
-}
withViewChanges :: (ViewedFile -> Key -> CommandStart) -> (ViewedFile -> Key -> CommandStart) -> Annex ()
withViewChanges addmeta removemeta = do
(diffs, cleanup) <- inRepo $ DiffTree.diffIndex Git.Ref.headRef
forM_ diffs handleremovals
forM_ diffs handleadds
void $ liftIO cleanup
where
handleremovals item
| DiffTree.srcsha item `notElem` nullShas =
handlechange item removemeta
=<< catKey (DiffTree.srcsha item)
| otherwise = noop
handleadds item
| DiffTree.dstsha item `notElem` nullShas =
handlechange item addmeta
=<< catKey (DiffTree.dstsha item)
| otherwise = noop
handlechange item a = maybe noop
(void . commandAction . a (fromRawFilePath $ getTopFilePath $ DiffTree.file item))
{- Runs an action using the view index file.
- Note that the file does not necessarily exist, or can contain
- info staged for an old view. -}
withViewIndex :: Annex a -> Annex a
withViewIndex = withIndexFile ViewIndexFile . const
withNewViewIndex :: Annex a -> Annex a
withNewViewIndex a = do
liftIO . removeWhenExistsWith R.removeLink =<< fromRepo gitAnnexViewIndex
withViewIndex a
{- Generates a branch for a view, using the view index file
- to make a commit to the view branch. The view branch is not
- checked out, but entering it will display the view. -}
genViewBranch :: View -> Maybe Adjustment -> Annex Git.Branch
genViewBranch view madj = withViewIndex $ do
let branch = branchView view madj
cmode <- annexCommitMode <$> Annex.getGitConfig
void $ inRepo $ Git.Branch.commit cmode True (fromRef branch) branch []
return branch
withCurrentView :: (View -> Maybe Adjustment -> Annex a) -> Annex a
withCurrentView a = maybe (giveup "Not in a view.") (uncurry a) =<< currentView

112
Annex/View/ViewedFile.hs Normal file
View file

@ -0,0 +1,112 @@
{- filenames (not paths) used in views
-
- Copyright 2014-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.View.ViewedFile (
ViewedFile,
MkViewedFile,
viewedFileFromReference,
viewedFileFromReference',
viewedFileReuse,
dirFromViewedFile,
prop_viewedFile_roundtrips,
) where
import Annex.Common
import Utility.QuickCheck
import Backend.Utilities (maxExtensions)
import qualified Data.ByteString as S
type FileName = String
type ViewedFile = FileName
type MkViewedFile = FilePath -> ViewedFile
{- Converts a filepath used in a reference branch to the
- filename that will be used in the view.
-
- No two filepaths from the same branch should yield the same result,
- so all directory structure needs to be included in the output filename
- in some way.
-
- So, from dir/subdir/file.foo, generate file_%dir%subdir%.foo
-}
viewedFileFromReference :: GitConfig -> MkViewedFile
viewedFileFromReference g = viewedFileFromReference'
(annexMaxExtensionLength g)
(annexMaxExtensions g)
viewedFileFromReference' :: Maybe Int -> Maybe Int -> MkViewedFile
viewedFileFromReference' maxextlen maxextensions f = concat $
[ escape (fromRawFilePath base')
, if null dirs then "" else "_%" ++ intercalate "%" (map escape dirs) ++ "%"
, escape $ fromRawFilePath $ S.concat extensions'
]
where
(path, basefile) = splitFileName f
dirs = filter (/= ".") $ map dropTrailingPathSeparator (splitPath path)
(base, extensions) = case maxextlen of
Nothing -> splitShortExtensions (toRawFilePath basefile')
Just n -> splitShortExtensions' (n+1) (toRawFilePath basefile')
{- Limit number of extensions. -}
maxextensions' = fromMaybe maxExtensions maxextensions
(base', extensions')
| length extensions <= maxextensions' = (base, extensions)
| otherwise =
let (es,more) = splitAt maxextensions' (reverse extensions)
in (base <> mconcat (reverse more), reverse es)
{- On Windows, if the filename looked like "dir/c:foo" then
- basefile would look like it contains a drive letter, which will
- not work. There cannot really be a filename like that, probably,
- but it prevents the test suite failing. -}
(_basedrive, basefile') = splitDrive basefile
{- To avoid collisions with filenames or directories that contain
- '%', and to allow the original directories to be extracted
- from the ViewedFile, '%' is escaped. )
-}
escape :: String -> String
escape = replace "%" (escchar:'%':[]) . replace [escchar] [escchar, escchar]
escchar :: Char
#ifndef mingw32_HOST_OS
escchar = '\\'
#else
-- \ is path separator on Windows, so instead use !
escchar = '!'
#endif
{- For use when operating already within a view, so whatever filepath
- is present in the work tree is already a ViewedFile. -}
viewedFileReuse :: MkViewedFile
viewedFileReuse = takeFileName
{- Extracts from a ViewedFile the directory where the file is located on
- in the reference branch. -}
dirFromViewedFile :: ViewedFile -> FilePath
dirFromViewedFile = joinPath . drop 1 . sep [] ""
where
sep l _ [] = reverse l
sep l curr (c:cs)
| c == '%' = sep (reverse curr:l) "" cs
| c == escchar = case cs of
(c':cs') -> sep l (c':curr) cs'
[] -> sep l curr cs
| otherwise = sep l (c:curr) cs
prop_viewedFile_roundtrips :: TestableFilePath -> Bool
prop_viewedFile_roundtrips tf
-- Relative filenames wanted, not directories.
| any (isPathSeparator) (end f ++ beginning f) = True
| isAbsolute f || isDrive f = True
| otherwise = dir == dirFromViewedFile
(viewedFileFromReference' Nothing Nothing f)
where
f = fromTestableFilePath tf
dir = joinPath $ beginning $ splitDirectories f

75
Annex/Wanted.hs Normal file
View file

@ -0,0 +1,75 @@
{- git-annex checking whether content is wanted
-
- Copyright 2012-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Wanted where
import Annex.Common
import Logs.PreferredContent
import Annex.UUID
import Annex.CatFile
import Git.FilePath
import qualified Database.Keys
import Types.FileMatcher
import qualified Data.Set as S
{- Check if a file is preferred content for the local repository. -}
wantGet :: Bool -> Maybe Key -> AssociatedFile -> Annex Bool
wantGet d key file = isPreferredContent Nothing S.empty key file d
{- Check if a file is preferred content for a repository. -}
wantGetBy :: Bool -> Maybe Key -> AssociatedFile -> UUID -> Annex Bool
wantGetBy d key file to = isPreferredContent (Just to) S.empty key file d
{- Check if a file is not preferred or required content, and can be
- dropped. When a UUID is provided, checks for that repository.
-
- The AssociatedFile is the one that the user requested to drop.
- There may be other files that use the same key, and preferred content
- may match some of those and not others. If any are preferred content,
- that will prevent dropping. When the other associated files are known,
- they can be provided, otherwise this looks them up.
-}
wantDrop :: Bool -> Maybe UUID -> Maybe Key -> AssociatedFile -> (Maybe [AssociatedFile]) -> Annex Bool
wantDrop d from key file others =
isNothing <$> checkDrop isPreferredContent d from key file others
{- Generalization of wantDrop that can also be used with isRequiredContent.
-
- When the content should not be dropped, returns Just the file that
- the checker matches.
-}
checkDrop :: (Maybe UUID -> AssumeNotPresent -> Maybe Key -> AssociatedFile -> Bool -> Annex Bool) -> Bool -> Maybe UUID -> Maybe Key -> AssociatedFile -> (Maybe [AssociatedFile]) -> Annex (Maybe AssociatedFile)
checkDrop checker d from key file others = do
u <- maybe getUUID (pure . id) from
let s = S.singleton u
let checker' f = checker (Just u) s key f d
ifM (checker' file)
( return (Just file)
, do
others' <- case others of
Just afs -> pure (filter (/= file) afs)
Nothing -> case key of
Just k ->
mapM (\f -> AssociatedFile . Just <$> fromRepo (fromTopFilePath f))
=<< Database.Keys.getAssociatedFiles k
Nothing -> pure []
l <- filterM checker' others'
if null l
then return Nothing
else checkassociated l
)
where
-- Some associated files that are in the keys database may no
-- longer correspond to files in the repository, and should
-- not prevent dropping.
checkassociated [] = return Nothing
checkassociated (af@(AssociatedFile (Just f)):fs) =
catKeyFile f >>= \case
Just k | Just k == key -> return (Just af)
_ -> checkassociated fs
checkassociated (AssociatedFile Nothing:fs) = checkassociated fs

60
Annex/WorkTree.hs Normal file
View file

@ -0,0 +1,60 @@
{- git-annex worktree files
-
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.WorkTree where
import Annex.Common
import Annex.Link
import Annex.CatFile
import Annex.CurrentBranch
import qualified Database.Keys
{- Looks up the key corresponding to an annexed file in the work tree,
- by examining what the symlink points to.
-
- An unlocked file will not have a link on disk, so fall back to
- looking for a pointer to a key in git.
-
- When in an adjusted branch that may have hidden the file, looks for a
- pointer to a key in the original branch.
-}
lookupKey :: RawFilePath -> Annex (Maybe Key)
lookupKey = lookupKey' catkeyfile
where
catkeyfile file =
ifM (liftIO $ doesFileExist $ fromRawFilePath file)
( catKeyFile file
, catKeyFileHidden file =<< getCurrentBranch
)
{- Like lookupKey, but only looks at files staged in git, not at unstaged
- changes in the work tree. This means it's slower, but it also has
- consistently the same behavior for locked files as for unlocked files.
-}
lookupKeyStaged :: RawFilePath -> Annex (Maybe Key)
lookupKeyStaged file = catKeyFile file >>= \case
Just k -> return (Just k)
Nothing -> catKeyFileHidden file =<< getCurrentBranch
{- Like lookupKey, but does not find keys for hidden files. -}
lookupKeyNotHidden :: RawFilePath -> Annex (Maybe Key)
lookupKeyNotHidden = lookupKey' catkeyfile
where
catkeyfile file =
ifM (liftIO $ doesFileExist $ fromRawFilePath file)
( catKeyFile file
, return Nothing
)
lookupKey' :: (RawFilePath -> Annex (Maybe Key)) -> RawFilePath -> Annex (Maybe Key)
lookupKey' catkeyfile file = isAnnexLink file >>= \case
Just key -> return (Just key)
Nothing -> catkeyfile file
{- Find all annexed files and update the keys database for them. -}
scanAnnexedFiles :: Annex ()
scanAnnexedFiles = Database.Keys.updateDatabase

124
Annex/WorkerPool.hs Normal file
View file

@ -0,0 +1,124 @@
{- git-annex worker thread pool
-
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.WorkerPool where
import Annex
import Annex.Common
import Types.WorkerPool
import Control.Concurrent
import Control.Concurrent.STM
{- Runs an action and makes the current thread have the specified stage
- while doing so. If too many other threads are running in the specified
- stage, waits for one of them to become idle.
-
- Noop if the current thread already has the requested stage, or if the
- current thread is not in the worker pool, or if concurrency is not
- enabled.
-
- Also a noop if the stage is not one of the stages that the worker pool
- uses.
-}
enteringStage :: WorkerStage -> Annex a -> Annex a
enteringStage newstage a = Annex.getState Annex.workers >>= \case
Nothing -> a
Just tv -> do
mytid <- liftIO myThreadId
let set = changeStageTo mytid tv (const newstage)
let restore = maybe noop (void . changeStageTo mytid tv . const)
bracket set restore (const a)
{- Transition the current thread to the initial stage.
- This is done once the thread is ready to begin work.
-}
enteringInitialStage :: Annex ()
enteringInitialStage = Annex.getState Annex.workers >>= \case
Nothing -> noop
Just tv -> do
mytid <- liftIO myThreadId
void $ changeStageTo mytid tv initialStage
{- This needs to leave the WorkerPool with the same number of
- idle and active threads, and with the same number of threads for each
- WorkerStage. So, all it can do is swap the WorkerStage of our thread's
- ActiveWorker with an IdleWorker.
-
- Must avoid a deadlock if all worker threads end up here at the same
- time, or if there are no suitable IdleWorkers left. So if necessary
- we first replace our ActiveWorker with an IdleWorker in the pool, to allow
- some other thread to use it, before waiting for a suitable IdleWorker
- for us to use.
-
- Note that the spareVals in the WorkerPool does not get anything added to
- it when adding the IdleWorker, so there will for a while be more IdleWorkers
- in the pool than spareVals. That does not prevent other threads that call
- this from using them though, so it's fine.
-}
changeStageTo :: ThreadId -> TMVar (WorkerPool t) -> (UsedStages -> WorkerStage) -> Annex (Maybe WorkerStage)
changeStageTo mytid tv getnewstage = liftIO $
replaceidle >>= maybe
(return Nothing)
(either waitidle (return . Just))
where
replaceidle = atomically $ do
pool <- takeTMVar tv
let newstage = getnewstage (usedStages pool)
let notchanging = do
putTMVar tv pool
return Nothing
if memberStage newstage (usedStages pool)
then case removeThreadIdWorkerPool mytid pool of
Just ((myaid, oldstage), pool')
| oldstage /= newstage -> case getIdleWorkerSlot newstage pool' of
Nothing -> do
putTMVar tv $
addWorkerPool (IdleWorker oldstage) pool'
return $ Just $ Left (myaid, newstage, oldstage)
Just pool'' -> do
-- optimisation
putTMVar tv $
addWorkerPool (IdleWorker oldstage) $
addWorkerPool (ActiveWorker myaid newstage) pool''
return $ Just $ Right oldstage
| otherwise -> notchanging
_ -> notchanging
else notchanging
waitidle (myaid, newstage, oldstage) = atomically $ do
pool <- waitIdleWorkerSlot newstage =<< takeTMVar tv
putTMVar tv $ addWorkerPool (ActiveWorker myaid newstage) pool
return (Just oldstage)
-- | Waits until there's an idle StartStage worker in the worker pool,
-- removes it from the pool, and returns its state.
waitStartWorkerSlot :: TMVar (WorkerPool t) -> STM (t, WorkerStage)
waitStartWorkerSlot tv = do
pool <- takeTMVar tv
v <- go pool
return (v, StartStage)
where
go pool = case spareVals pool of
[] -> retry
(v:vs) -> do
let pool' = pool { spareVals = vs }
putTMVar tv =<< waitIdleWorkerSlot StartStage pool'
return v
waitIdleWorkerSlot :: WorkerStage -> WorkerPool t -> STM (WorkerPool t)
waitIdleWorkerSlot wantstage = maybe retry return . getIdleWorkerSlot wantstage
getIdleWorkerSlot :: WorkerStage -> WorkerPool t -> Maybe (WorkerPool t)
getIdleWorkerSlot wantstage pool = do
l <- findidle [] (workerList pool)
return $ pool { workerList = l }
where
findidle _ [] = Nothing
findidle c ((IdleWorker stage):rest)
| stage == wantstage = Just (c ++ rest)
findidle c (w:rest) = findidle (w:c) rest

410
Annex/YoutubeDl.hs Normal file
View file

@ -0,0 +1,410 @@
{- yt-dlp (and deprecated youtube-dl) integration for git-annex
-
- Copyright 2017-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE DeriveGeneric #-}
module Annex.YoutubeDl (
youtubeDl,
youtubeDlTo,
youtubeDlSupported,
youtubeDlCheck,
youtubeDlFileName,
youtubeDlFileNameHtmlOnly,
youtubeDlCommand,
youtubePlaylist,
YoutubePlaylistItem(..),
) where
import Annex.Common
import qualified Annex
import Annex.Content
import Annex.Url
import Utility.DiskFree
import Utility.HtmlDetect
import Utility.Process.Transcript
import Utility.Metered
import Utility.Tmp
import Messages.Progress
import Logs.Transfer
import Network.URI
import Control.Concurrent.Async
import Text.Read
import Data.Either
import qualified Data.Aeson as Aeson
import GHC.Generics
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
-- youtube-dl can follow redirects to anywhere, including potentially
-- localhost or a private address. So, it's only allowed to download
-- content if the user has allowed access to all addresses.
youtubeDlAllowed :: Annex Bool
youtubeDlAllowed = ipAddressesUnlimited
youtubeDlNotAllowedMessage :: String
youtubeDlNotAllowedMessage = unwords
[ "This url is supported by yt-dlp, but"
, "yt-dlp could potentially access any address, and the"
, "configuration of annex.security.allowed-ip-addresses"
, "does not allow that. Not using yt-dlp (or youtube-dl)."
]
-- Runs youtube-dl in a work directory, to download a single media file
-- from the url. Returns the path to the media file in the work directory.
--
-- Displays a progress meter as youtube-dl downloads.
--
-- If no file is downloaded, or the program is not installed,
-- returns Right Nothing.
--
-- youtube-dl can write to multiple files, either temporary files, or
-- multiple videos found at the url, and git-annex needs only one file.
-- So we need to find the destination file, and make sure there is not
-- more than one. With yt-dlp use --print-to-file to make it record the
-- file(s) it downloads. With youtube-dl, the best that can be done is
-- to require that the work directory end up with only 1 file in it.
-- (This can fail, but youtube-dl is deprecated, and they closed my
-- issue requesting something like --print-to-file;
-- <https://github.com/rg3/youtube-dl/issues/14864>)
youtubeDl :: URLString -> FilePath -> MeterUpdate -> Annex (Either String (Maybe FilePath))
youtubeDl url workdir p = ifM ipAddressesUnlimited
( withUrlOptions $ youtubeDl' url workdir p
, return $ Left youtubeDlNotAllowedMessage
)
youtubeDl' :: URLString -> FilePath -> MeterUpdate -> UrlOptions -> Annex (Either String (Maybe FilePath))
youtubeDl' url workdir p uo
| supportedScheme uo url = do
cmd <- youtubeDlCommand
ifM (liftIO $ inSearchPath cmd)
( runcmd cmd >>= \case
Right True -> downloadedfiles cmd >>= \case
(f:[]) -> return (Right (Just f))
[] -> return (nofiles cmd)
fs -> return (toomanyfiles cmd fs)
Right False -> workdirfiles >>= \case
[] -> return (Right Nothing)
_ -> return (Left $ cmd ++ " download is incomplete. Run the command again to resume.")
Left msg -> return (Left msg)
, return (Right Nothing)
)
| otherwise = return (Right Nothing)
where
nofiles cmd = Left $ cmd ++ " did not put any media in its work directory, perhaps it's been configured to store files somewhere else?"
toomanyfiles cmd fs = Left $ cmd ++ " downloaded multiple media files; git-annex is only able to deal with one per url: " ++ show fs
downloadedfiles cmd
| isytdlp cmd = liftIO $
(nub . lines <$> readFile filelistfile)
`catchIO` (pure . const [])
| otherwise = workdirfiles
workdirfiles = liftIO $ filter (/= filelistfile)
<$> (filterM (doesFileExist) =<< dirContents workdir)
filelistfile = workdir </> filelistfilebase
filelistfilebase = "git-annex-file-list-file"
isytdlp cmd = cmd == "yt-dlp"
runcmd cmd = youtubeDlMaxSize workdir >>= \case
Left msg -> return (Left msg)
Right maxsize -> do
opts <- youtubeDlOpts (dlopts cmd ++ maxsize)
oh <- mkOutputHandlerQuiet
-- The size is unknown to start. Once youtube-dl
-- outputs some progress, the meter will be updated
-- with the size, which is why it's important the
-- meter is passed into commandMeter'
let unknownsize = Nothing :: Maybe FileSize
ok <- metered (Just p) unknownsize Nothing $ \meter meterupdate ->
liftIO $ commandMeter'
(if isytdlp cmd then parseYtdlpProgress else parseYoutubeDlProgress)
oh (Just meter) meterupdate cmd opts
(\pr -> pr { cwd = Just workdir })
return (Right ok)
dlopts cmd =
[ Param url
-- To make it only download one file when given a
-- page with a video and a playlist, download only the video.
, Param "--no-playlist"
-- And when given a page with only a playlist, download only
-- the first video on the playlist. (Assumes the video is
-- somewhat stable, but this is the only way to prevent
-- it from downloading the whole playlist.)
, Param "--playlist-items", Param "0"
] ++
if isytdlp cmd
then
-- Avoid warnings, which go to
-- stderr and may mess up
-- git-annex's display.
[ Param "--no-warnings"
, Param "--progress-template"
, Param progressTemplate
, Param "--print-to-file"
, Param "after_move:filepath"
, Param filelistfilebase
]
else []
-- To honor annex.diskreserve, ask youtube-dl to not download too
-- large a media file. Factors in other downloads that are in progress,
-- and any files in the workdir that it may have partially downloaded
-- before.
youtubeDlMaxSize :: FilePath -> Annex (Either String [CommandParam])
youtubeDlMaxSize workdir = ifM (Annex.getRead Annex.force)
( return $ Right []
, liftIO (getDiskFree workdir) >>= \case
Just have -> do
inprogress <- sizeOfDownloadsInProgress (const True)
partial <- liftIO $ sum
<$> (mapM (getFileSize . toRawFilePath) =<< dirContents workdir)
reserve <- annexDiskReserve <$> Annex.getGitConfig
let maxsize = have - reserve - inprogress + partial
if maxsize > 0
then return $ Right
[ Param "--max-filesize"
, Param (show maxsize)
]
else return $ Left $
needMoreDiskSpace $
negate maxsize + 1024
Nothing -> return $ Right []
)
-- Download a media file to a destination,
youtubeDlTo :: Key -> URLString -> FilePath -> MeterUpdate -> Annex Bool
youtubeDlTo key url dest p = do
res <- withTmpWorkDir key $ \workdir ->
youtubeDl url (fromRawFilePath workdir) p >>= \case
Right (Just mediafile) -> do
liftIO $ moveFile (toRawFilePath mediafile) (toRawFilePath dest)
return (Just True)
Right Nothing -> return (Just False)
Left msg -> do
warning (UnquotedString msg)
return Nothing
return (fromMaybe False res)
-- youtube-dl supports downloading urls that are not html pages,
-- but we don't want to use it for such urls, since they can be downloaded
-- without it. So, this first downloads part of the content and checks
-- if it's a html page; only then is youtube-dl used.
htmlOnly :: URLString -> a -> Annex a -> Annex a
htmlOnly url fallback a = withUrlOptions $ \uo ->
liftIO (downloadPartial url uo htmlPrefixLength) >>= \case
Just bs | isHtmlBs bs -> a
_ -> return fallback
-- Check if youtube-dl supports downloading content from an url.
youtubeDlSupported :: URLString -> Annex Bool
youtubeDlSupported url = either (const False) id
<$> withUrlOptions (youtubeDlCheck' url)
-- Check if youtube-dl can find media in an url.
--
-- While this does not download anything, it checks youtubeDlAllowed
-- for symmetry with youtubeDl; the check should not succeed if the
-- download won't succeed.
youtubeDlCheck :: URLString -> Annex (Either String Bool)
youtubeDlCheck url = ifM youtubeDlAllowed
( withUrlOptions $ youtubeDlCheck' url
, return $ Left youtubeDlNotAllowedMessage
)
youtubeDlCheck' :: URLString -> UrlOptions -> Annex (Either String Bool)
youtubeDlCheck' url uo
| supportedScheme uo url = catchMsgIO $ htmlOnly url False $ do
opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
cmd <- youtubeDlCommand
liftIO $ snd <$> processTranscript cmd (toCommand opts) Nothing
| otherwise = return (Right False)
-- Ask youtube-dl for the filename of media in an url.
--
-- (This is not always identical to the filename it uses when downloading.)
youtubeDlFileName :: URLString -> Annex (Either String FilePath)
youtubeDlFileName url = withUrlOptions go
where
go uo
| supportedScheme uo url = flip catchIO (pure . Left . show) $
htmlOnly url nomedia (youtubeDlFileNameHtmlOnly' url uo)
| otherwise = return nomedia
nomedia = Left "no media in url"
-- Does not check if the url contains htmlOnly; use when that's already
-- been verified.
youtubeDlFileNameHtmlOnly :: URLString -> Annex (Either String FilePath)
youtubeDlFileNameHtmlOnly = withUrlOptions . youtubeDlFileNameHtmlOnly'
youtubeDlFileNameHtmlOnly' :: URLString -> UrlOptions -> Annex (Either String FilePath)
youtubeDlFileNameHtmlOnly' url uo
| supportedScheme uo url = flip catchIO (pure . Left . show) go
| otherwise = return nomedia
where
go = do
-- Sometimes youtube-dl will fail with an ugly backtrace
-- (eg, http://bugs.debian.org/874321)
-- so catch stderr as well as stdout to avoid the user
-- seeing it. --no-warnings avoids warning messages that
-- are output to stdout.
opts <- youtubeDlOpts
[ Param url
, Param "--get-filename"
, Param "--no-warnings"
, Param "--no-playlist"
]
cmd <- youtubeDlCommand
let p = (proc cmd (toCommand opts))
{ std_out = CreatePipe
, std_err = CreatePipe
}
liftIO $ withCreateProcess p waitproc
waitproc Nothing (Just o) (Just e) pid = do
errt <- async $ discardstderr pid e
output <- hGetContentsStrict o
ok <- liftIO $ checkSuccessProcess pid
wait errt
return $ case (ok, lines output) of
(True, (f:_)) | not (null f) -> Right f
_ -> nomedia
waitproc _ _ _ _ = error "internal"
discardstderr pid e = hGetLineUntilExitOrEOF pid e >>= \case
Nothing -> return ()
Just _ -> discardstderr pid e
nomedia = Left "no media in url"
youtubeDlOpts :: [CommandParam] -> Annex [CommandParam]
youtubeDlOpts addopts = do
opts <- map Param . annexYoutubeDlOptions <$> Annex.getGitConfig
return (opts ++ addopts)
youtubeDlCommand :: Annex String
youtubeDlCommand = annexYoutubeDlCommand <$> Annex.getGitConfig >>= \case
Just c -> pure c
Nothing -> ifM (liftIO $ inSearchPath "yt-dlp")
( return "yt-dlp"
, return "youtube-dl"
)
supportedScheme :: UrlOptions -> URLString -> Bool
supportedScheme uo url = case parseURIRelaxed url of
Nothing -> False
Just u -> case uriScheme u of
-- avoid ugly message from youtube-dl about not supporting file:
"file:" -> False
-- ftp indexes may look like html pages, and there's no point
-- involving youtube-dl in a ftp download
"ftp:" -> False
_ -> allowedScheme uo u
progressTemplate :: String
progressTemplate = "ANNEX %(progress.downloaded_bytes)i %(progress.total_bytes_estimate)i %(progress.total_bytes)i ANNEX"
{- The progressTemplate makes output look like "ANNEX 10 100 NA ANNEX" or
- "ANNEX 10 NA 100 ANNEX" depending on whether the total bytes are estimated
- or known. That makes parsing much easier (and less fragile) than parsing
- the usual progress output.
-}
parseYtdlpProgress :: ProgressParser
parseYtdlpProgress = go [] . reverse . progresschunks
where
delim = '\r'
progresschunks = splitc delim
go remainder [] = (Nothing, Nothing, remainder)
go remainder (x:xs) = case splitc ' ' x of
("ANNEX":downloaded_bytes_s:total_bytes_estimate_s:total_bytes_s:"ANNEX":[]) ->
case (readMaybe downloaded_bytes_s, readMaybe total_bytes_estimate_s, readMaybe total_bytes_s) of
(Just downloaded_bytes, Nothing, Just total_bytes) ->
( Just (BytesProcessed downloaded_bytes)
, Just (TotalSize total_bytes)
, remainder
)
(Just downloaded_bytes, Just total_bytes_estimate, _) ->
( Just (BytesProcessed downloaded_bytes)
, Just (TotalSize total_bytes_estimate)
, remainder
)
_ -> go (remainder++x) xs
_ -> go (remainder++x) xs
{- youtube-dl is deprecated, parsing its progress was attempted before but
- was buggy and is no longer done. -}
parseYoutubeDlProgress :: ProgressParser
parseYoutubeDlProgress _ = (Nothing, Nothing, "")
{- List the items that yt-dlp can download from an url.
-
- Note that this does not check youtubeDlAllowed because it does not
- download content.
-}
youtubePlaylist :: URLString -> Annex (Either String [YoutubePlaylistItem])
youtubePlaylist url = do
cmd <- youtubeDlCommand
if cmd == "yt-dlp"
then liftIO $ youtubePlaylist' url cmd
else return $ Left $ "Scraping needs yt-dlp, but git-annex has been configured to use " ++ cmd
youtubePlaylist' :: URLString -> String -> IO (Either String [YoutubePlaylistItem])
youtubePlaylist' url cmd = withTmpFile "yt-dlp" $ \tmpfile h -> do
hClose h
(outerr, ok) <- processTranscript cmd
[ "--simulate"
, "--flat-playlist"
-- Skip live videos in progress
, "--match-filter", "!is_live"
, "--print-to-file"
-- Write json with selected fields.
, "%(.{" ++ intercalate "," youtubePlaylistItemFields ++ "})j"
, tmpfile
, url
]
Nothing
if ok
then flip catchIO (pure . Left . show) $ do
v <- map Aeson.eitherDecodeStrict . B8.lines
<$> B.readFile tmpfile
return $ case partitionEithers v of
((parserr:_), _) ->
Left $ "yt-dlp json parse error: " ++ parserr
([], r) -> Right r
else return $ Left $ if null outerr
then "yt-dlp failed"
else "yt-dlp failed: " ++ outerr
-- There are other fields that yt-dlp can extract, but these are similar to
-- the information from an RSS feed.
youtubePlaylistItemFields :: [String]
youtubePlaylistItemFields =
[ "playlist_title"
, "playlist_uploader"
, "title"
, "description"
, "license"
, "url"
, "timestamp"
]
-- Parse JSON generated by yt-dlp for playlist. Note that any field
-- may be omitted when that information is not supported for a given website.
data YoutubePlaylistItem = YoutubePlaylistItem
{ youtube_playlist_title :: Maybe String
, youtube_playlist_uploader :: Maybe String
, youtube_title :: Maybe String
, youtube_description :: Maybe String
, youtube_license :: Maybe String
, youtube_url :: Maybe String
, youtube_timestamp :: Maybe Integer -- ^ unix timestamp
} deriving (Generic, Show)
instance Aeson.FromJSON YoutubePlaylistItem
where
parseJSON = Aeson.genericParseJSON Aeson.defaultOptions
{ Aeson.fieldLabelModifier = drop (length "youtube_") }

194
Assistant.hs Normal file
View file

@ -0,0 +1,194 @@
{- git-annex assistant daemon
-
- Copyright 2012-2013 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Assistant where
import qualified Annex
import Assistant.Common
import Assistant.DaemonStatus
import Assistant.NamedThread
import Assistant.Types.ThreadedMonad
import Assistant.Threads.DaemonStatus
import Assistant.Threads.Watcher
import Assistant.Threads.Committer
import Assistant.Threads.Pusher
import Assistant.Threads.Exporter
import Assistant.Threads.Merger
import Assistant.Threads.TransferWatcher
import Assistant.Threads.Transferrer
import Assistant.Threads.RemoteControl
import Assistant.Threads.SanityChecker
import Assistant.Threads.Cronner
import Assistant.Threads.ProblemFixer
#ifndef mingw32_HOST_OS
import Assistant.Threads.MountWatcher
#endif
import Assistant.Threads.NetWatcher
import Assistant.Threads.Upgrader
import Assistant.Threads.UpgradeWatcher
import Assistant.Threads.TransferScanner
import Assistant.Threads.TransferPoller
import Assistant.Threads.ConfigMonitor
import Assistant.Threads.Glacier
#ifdef WITH_WEBAPP
import Assistant.WebApp
import Assistant.Threads.WebApp
#ifdef WITH_PAIRING
import Assistant.Threads.PairListener
#endif
#else
import Assistant.Types.UrlRenderer
#endif
import qualified Utility.Daemon
import Utility.ThreadScheduler
import Utility.HumanTime
import Annex.Perms
import Annex.BranchState
import Utility.LogFile
import Annex.Path
#ifdef mingw32_HOST_OS
import Utility.Env
import System.Environment (getArgs)
#endif
import qualified Utility.Debug as Debug
import Network.Socket (HostName, PortNumber)
stopDaemon :: Annex ()
stopDaemon = liftIO . Utility.Daemon.stopDaemon . fromRawFilePath
=<< fromRepo gitAnnexPidFile
{- Starts the daemon. If the daemon is run in the foreground, once it's
- running, can start the browser.
-
- startbrowser is passed the url and html shim file, as well as the original
- stdout and stderr descriptors. -}
startDaemon :: Bool -> Bool -> Maybe Duration -> Maybe String -> Maybe HostName -> Maybe PortNumber -> Maybe (Maybe Handle -> Maybe Handle -> String -> FilePath -> IO ()) -> Annex ()
startDaemon assistant foreground startdelay cannotrun listenhost listenport startbrowser = do
Annex.changeState $ \s -> s { Annex.daemon = True }
enableInteractiveBranchAccess
pidfile <- fromRepo gitAnnexPidFile
logfile <- fromRepo gitAnnexDaemonLogFile
liftIO $ Debug.debug "Assistant" $ "logging to " ++ fromRawFilePath logfile
createAnnexDirectory (parentDir pidfile)
#ifndef mingw32_HOST_OS
createAnnexDirectory (parentDir logfile)
let logfd = handleToFd =<< openLog (fromRawFilePath logfile)
if foreground
then do
origout <- liftIO $ catchMaybeIO $
fdToHandle =<< dup stdOutput
origerr <- liftIO $ catchMaybeIO $
fdToHandle =<< dup stdError
let undaemonize = Utility.Daemon.foreground logfd (Just (fromRawFilePath pidfile))
start undaemonize $
case startbrowser of
Nothing -> Nothing
Just a -> Just $ a origout origerr
else do
git_annex <- liftIO programPath
ps <- gitAnnexDaemonizeParams
start (Utility.Daemon.daemonize git_annex ps logfd (Just (fromRawFilePath pidfile)) False) Nothing
#else
-- Windows doesn't daemonize, but does redirect output to the
-- log file. The only way to do so is to restart the program.
when (foreground || not foreground) $ do
let flag = "GIT_ANNEX_OUTPUT_REDIR"
createAnnexDirectory (parentDir logfile)
ifM (liftIO $ isNothing <$> getEnv flag)
( liftIO $ withNullHandle $ \nullh -> do
loghandle <- openLog (fromRawFilePath logfile)
e <- getEnvironment
cmd <- programPath
ps <- getArgs
let p = (proc cmd ps)
{ env = Just (addEntry flag "1" e)
, std_in = UseHandle nullh
, std_out = UseHandle loghandle
, std_err = UseHandle loghandle
}
exitcode <- withCreateProcess p $ \_ _ _ pid ->
waitForProcess pid
exitWith exitcode
, start (Utility.Daemon.foreground (Just (fromRawFilePath pidfile))) $
case startbrowser of
Nothing -> Nothing
Just a -> Just $ a Nothing Nothing
)
#endif
where
start daemonize webappwaiter = withThreadState $ \st -> do
checkCanWatch
dstatus <- startDaemonStatus
logfile <- fromRepo gitAnnexDaemonLogFile
liftIO $ Debug.debug "Assistant" $ "logging to " ++ fromRawFilePath logfile
liftIO $ daemonize $
flip runAssistant (go webappwaiter)
=<< newAssistantData st dstatus
#ifdef WITH_WEBAPP
go webappwaiter = do
d <- getAssistant id
#else
go _webappwaiter = do
#endif
urlrenderer <- liftIO newUrlRenderer
#ifdef WITH_WEBAPP
let webappthread = [ assist $ webAppThread d urlrenderer False cannotrun Nothing listenhost listenport webappwaiter ]
#else
let webappthread = []
#endif
let threads = if isJust cannotrun
then webappthread
else webappthread ++
[ watch commitThread
#ifdef WITH_WEBAPP
#ifdef WITH_PAIRING
, assist $ pairListenerThread urlrenderer
#endif
#endif
, assist pushThread
, assist pushRetryThread
, assist exportThread
, assist exportRetryThread
, assist mergeThread
, assist transferWatcherThread
, assist transferPollerThread
, assist transfererThread
, assist remoteControlThread
, assist daemonStatusThread
, assist $ sanityCheckerDailyThread urlrenderer
, assist sanityCheckerHourlyThread
, assist $ problemFixerThread urlrenderer
#ifndef mingw32_HOST_OS
, assist $ mountWatcherThread urlrenderer
#endif
, assist netWatcherThread
, assist $ upgraderThread urlrenderer
, assist $ upgradeWatcherThread urlrenderer
, assist netWatcherFallbackThread
, assist $ transferScannerThread urlrenderer
, assist $ cronnerThread urlrenderer
, assist configMonitorThread
, assist glacierThread
, watch watchThread
-- must come last so that all threads that wait
-- on it have already started waiting
, watch $ sanityCheckerStartupThread startdelay
]
mapM_ (startthread urlrenderer) threads
liftIO waitForTermination
watch a = (True, a)
assist a = (False, a)
startthread urlrenderer (watcher, t)
| watcher || assistant = startNamedThread urlrenderer t
| otherwise = noop

460
Assistant/Alert.hs Normal file
View file

@ -0,0 +1,460 @@
{- git-annex assistant alerts
-
- Copyright 2012-2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings, CPP, BangPatterns #-}
module Assistant.Alert where
import Annex.Common
import Assistant.Types.Alert
import Assistant.Alert.Utility
import qualified Remote
import Utility.Tense
import Types.Transfer
import Types.Distribution
import Git.Types (RemoteName)
import Data.String
import qualified Data.Text as T
import qualified Control.Exception as E
#ifdef WITH_WEBAPP
import Assistant.DaemonStatus
import Assistant.WebApp.Types
import Assistant.WebApp (renderUrl)
#endif
import Assistant.Monad
import Assistant.Types.UrlRenderer
{- Makes a button for an alert that opens a Route.
-
- If autoclose is set, the button will close the alert it's
- attached to when clicked. -}
#ifdef WITH_WEBAPP
mkAlertButton :: Bool -> T.Text -> UrlRenderer -> Route WebApp -> Assistant AlertButton
mkAlertButton autoclose label urlrenderer route = do
close <- asIO1 removeAlert
url <- liftIO $ renderUrl urlrenderer route []
return $ AlertButton
{ buttonLabel = label
, buttonUrl = url
, buttonAction = if autoclose then Just close else Nothing
, buttonPrimary = True
}
#endif
renderData :: Alert -> TenseText
renderData = tenseWords . alertData
baseActivityAlert :: Alert
baseActivityAlert = Alert
{ alertClass = Activity
, alertHeader = Nothing
, alertMessageRender = renderData
, alertData = []
, alertCounter = 0
, alertBlockDisplay = False
, alertClosable = False
, alertPriority = Medium
, alertIcon = Just ActivityIcon
, alertCombiner = Nothing
, alertName = Nothing
, alertButtons = []
}
warningAlert :: String -> String -> Alert
warningAlert name msg = Alert
{ alertClass = Warning
, alertHeader = Just $ tenseWords ["warning"]
, alertMessageRender = renderData
, alertData = [UnTensed $ T.pack msg]
, alertCounter = 0
, alertBlockDisplay = True
, alertClosable = True
, alertPriority = High
, alertIcon = Just ErrorIcon
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertName = Just $ WarningAlert name
, alertButtons = []
}
errorAlert :: String -> [AlertButton] -> Alert
errorAlert msg buttons = Alert
{ alertClass = Error
, alertHeader = Nothing
, alertMessageRender = renderData
, alertData = [UnTensed $ T.pack msg]
, alertCounter = 0
, alertBlockDisplay = True
, alertClosable = True
, alertPriority = Pinned
, alertIcon = Just ErrorIcon
, alertCombiner = Nothing
, alertName = Nothing
, alertButtons = buttons
}
activityAlert :: Maybe TenseText -> [TenseChunk] -> Alert
activityAlert header dat = baseActivityAlert
{ alertHeader = header
, alertData = dat
}
startupScanAlert :: Alert
startupScanAlert = activityAlert Nothing
[Tensed "Performing" "Performed", "startup scan"]
{- Displayed when a shutdown is occurring, so will be seen after shutdown
- has happened. -}
shutdownAlert :: Alert
shutdownAlert = warningAlert "shutdown" "git-annex has been shut down"
commitAlert :: Alert
commitAlert = activityAlert Nothing
[Tensed "Committing" "Committed", "changes to git"]
showRemotes :: [RemoteName] -> TenseChunk
showRemotes = UnTensed . T.intercalate ", " . map T.pack
syncAlert :: [Remote] -> Alert
syncAlert = syncAlert' . map Remote.name
syncAlert' :: [RemoteName] -> Alert
syncAlert' rs = baseActivityAlert
{ alertName = Just SyncAlert
, alertHeader = Just $ tenseWords
[Tensed "Syncing" "Synced", "with", showRemotes rs]
, alertPriority = Low
, alertIcon = Just SyncIcon
}
syncResultAlert :: [Remote] -> [Remote] -> Alert
syncResultAlert succeeded failed = syncResultAlert'
(map Remote.name succeeded)
(map Remote.name failed)
syncResultAlert' :: [RemoteName] -> [RemoteName] -> Alert
syncResultAlert' succeeded failed = makeAlertFiller (not $ null succeeded) $
baseActivityAlert
{ alertName = Just SyncAlert
, alertHeader = Just $ tenseWords msg
}
where
msg
| null succeeded = ["Failed to sync with", showRemotes failed]
| null failed = ["Synced with", showRemotes succeeded]
| otherwise =
[ "Synced with", showRemotes succeeded
, "but not with", showRemotes failed
]
sanityCheckAlert :: Alert
sanityCheckAlert = activityAlert
(Just $ tenseWords [Tensed "Running" "Ran", "daily sanity check"])
["to make sure everything is ok."]
sanityCheckFixAlert :: String -> Alert
sanityCheckFixAlert msg = Alert
{ alertClass = Warning
, alertHeader = Just $ tenseWords ["Fixed a problem"]
, alertMessageRender = render
, alertData = [UnTensed $ T.pack msg]
, alertCounter = 0
, alertBlockDisplay = True
, alertPriority = High
, alertClosable = True
, alertIcon = Just ErrorIcon
, alertName = Just SanityCheckFixAlert
, alertCombiner = Just $ dataCombiner (++)
, alertButtons = []
}
where
render alert = tenseWords $ alerthead : alertData alert ++ [alertfoot]
alerthead = "The daily sanity check found and fixed a problem:"
alertfoot = "If these problems persist, consider filing a bug report."
fsckingAlert :: AlertButton -> Maybe Remote -> Alert
fsckingAlert button mr = baseActivityAlert
{ alertData = case mr of
Nothing -> [ UnTensed $ T.pack $ "Consistency check in progress" ]
Just r -> [ UnTensed $ T.pack $ "Consistency check of " ++ Remote.name r ++ " in progress"]
, alertButtons = [button]
}
showFscking :: UrlRenderer -> Maybe Remote -> IO (Either E.SomeException a) -> Assistant a
showFscking urlrenderer mr a = do
#ifdef WITH_WEBAPP
button <- mkAlertButton False (T.pack "Configure") urlrenderer ConfigFsckR
r <- alertDuring (fsckingAlert button mr) $
liftIO a
#else
r <- liftIO a
#endif
either (liftIO . E.throwIO) return r
notFsckedNudge :: UrlRenderer -> Maybe Remote -> Assistant ()
#ifdef WITH_WEBAPP
notFsckedNudge urlrenderer mr = do
button <- mkAlertButton True (T.pack "Configure") urlrenderer ConfigFsckR
void $ addAlert (notFsckedAlert mr button)
#else
notFsckedNudge _ _ = noop
#endif
notFsckedAlert :: Maybe Remote -> AlertButton -> Alert
notFsckedAlert mr button = Alert
{ alertHeader = Just $ fromString $ concat
[ "You should enable consistency checking to protect your data"
, maybe "" (\r -> " in " ++ Remote.name r) mr
, "."
]
, alertIcon = Just InfoIcon
, alertPriority = High
, alertButtons = [button]
, alertClosable = True
, alertClass = Message
, alertMessageRender = renderData
, alertCounter = 0
, alertBlockDisplay = True
, alertName = Just NotFsckedAlert
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertData = []
}
baseUpgradeAlert :: [AlertButton] -> TenseText -> Alert
baseUpgradeAlert buttons message = Alert
{ alertHeader = Just message
, alertIcon = Just UpgradeIcon
, alertPriority = High
, alertButtons = buttons
, alertClosable = True
, alertClass = Message
, alertMessageRender = renderData
, alertCounter = 0
, alertBlockDisplay = True
, alertName = Just UpgradeAlert
, alertCombiner = Just $ fullCombiner $ \new _old -> new
, alertData = []
}
canUpgradeAlert :: AlertPriority -> GitAnnexVersion -> AlertButton -> Alert
canUpgradeAlert priority version button =
(baseUpgradeAlert [button] $ fromString msg)
{ alertPriority = priority
, alertData = [fromString $ " (version " ++ version ++ ")"]
}
where
msg = if priority >= High
then "An important upgrade of git-annex is available!"
else "An upgrade of git-annex is available."
upgradeReadyAlert :: AlertButton -> Alert
upgradeReadyAlert button = baseUpgradeAlert [button] $
fromString "A new version of git-annex has been installed."
upgradingAlert :: Alert
upgradingAlert = activityAlert Nothing [ fromString "Upgrading git-annex" ]
upgradeFinishedAlert :: Maybe AlertButton -> GitAnnexVersion -> Alert
upgradeFinishedAlert button version =
baseUpgradeAlert (maybeToList button) $ fromString $
"Finished upgrading git-annex to version " ++ version
upgradeFailedAlert :: String -> Alert
upgradeFailedAlert msg = (errorAlert msg [])
{ alertHeader = Just $ fromString "Upgrade failed." }
unusedFilesAlert :: [AlertButton] -> String -> Alert
unusedFilesAlert buttons message = Alert
{ alertHeader = Just $ fromString $ unwords
[ "Old and deleted files are piling up --"
, message
]
, alertIcon = Just InfoIcon
, alertPriority = High
, alertButtons = buttons
, alertClosable = True
, alertClass = Message
, alertMessageRender = renderData
, alertCounter = 0
, alertBlockDisplay = True
, alertName = Just UnusedFilesAlert
, alertCombiner = Just $ fullCombiner $ \new _old -> new
, alertData = []
}
brokenRepositoryAlert :: [AlertButton] -> Alert
brokenRepositoryAlert = errorAlert "Serious problems have been detected with your repository. This needs your immediate attention!"
repairingAlert :: String -> Alert
repairingAlert repodesc = activityAlert Nothing
[ Tensed "Attempting to repair" "Repaired"
, UnTensed $ T.pack repodesc
]
pairingAlert :: AlertButton -> Alert
pairingAlert button = baseActivityAlert
{ alertData = [ UnTensed "Pairing in progress" ]
, alertPriority = High
, alertButtons = [button]
}
pairRequestReceivedAlert :: String -> AlertButton -> Alert
pairRequestReceivedAlert who button = Alert
{ alertClass = Message
, alertHeader = Nothing
, alertMessageRender = renderData
, alertData = [UnTensed $ T.pack $ who ++ " is sending a pair request."]
, alertCounter = 0
, alertBlockDisplay = False
, alertPriority = High
, alertClosable = True
, alertIcon = Just InfoIcon
, alertName = Just $ PairAlert who
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertButtons = [button]
}
pairRequestAcknowledgedAlert :: String -> Maybe AlertButton -> Alert
pairRequestAcknowledgedAlert who button = baseActivityAlert
{ alertData = ["Pairing with", UnTensed (T.pack who), Tensed "in progress" "complete"]
, alertPriority = High
, alertName = Just $ PairAlert who
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertButtons = maybeToList button
}
connectionNeededAlert :: AlertButton -> Alert
connectionNeededAlert button = Alert
{ alertHeader = Just "Share with friends, and keep your devices in sync across the cloud."
, alertIcon = Just ConnectionIcon
, alertPriority = High
, alertButtons = [button]
, alertClosable = True
, alertClass = Message
, alertMessageRender = renderData
, alertCounter = 0
, alertBlockDisplay = True
, alertName = Just ConnectionNeededAlert
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertData = []
}
cloudRepoNeededAlert :: Maybe String -> AlertButton -> Alert
cloudRepoNeededAlert friendname button = Alert
{ alertHeader = Just $ fromString $ unwords
[ "Unable to download files from"
, (fromMaybe "your other devices" friendname) ++ "."
]
, alertIcon = Just ErrorIcon
, alertPriority = High
, alertButtons = [button]
, alertClosable = True
, alertClass = Message
, alertMessageRender = renderData
, alertCounter = 0
, alertBlockDisplay = True
, alertName = Just $ CloudRepoNeededAlert
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertData = []
}
remoteRemovalAlert :: String -> AlertButton -> Alert
remoteRemovalAlert desc button = Alert
{ alertHeader = Just $ fromString $
"The repository \"" ++ desc ++
"\" has been emptied, and can now be removed."
, alertIcon = Just InfoIcon
, alertPriority = High
, alertButtons = [button]
, alertClosable = True
, alertClass = Message
, alertMessageRender = renderData
, alertCounter = 0
, alertBlockDisplay = True
, alertName = Just $ RemoteRemovalAlert desc
, alertCombiner = Just $ dataCombiner $ \_old new -> new
, alertData = []
}
{- Show a message that relates to a list of files.
-
- The most recent several files are shown, and a count of any others. -}
fileAlert :: TenseChunk -> [FilePath] -> Alert
fileAlert msg files = (activityAlert Nothing shortfiles)
{ alertName = Just $ FileAlert msg
, alertMessageRender = renderer
, alertCounter = counter
, alertCombiner = Just $ fullCombiner combiner
}
where
maxfilesshown = 10
(!somefiles, !counter) = splitcounter (dedupadjacent files)
!shortfiles = map (fromString . shortFile . takeFileName) somefiles
renderer alert = tenseWords $ msg : alertData alert ++ showcounter
where
showcounter = case alertCounter alert of
0 -> []
_ -> [fromString $ "and " ++ show (alertCounter alert) ++ " other files"]
dedupadjacent (x:y:rest)
| x == y = dedupadjacent (y:rest)
| otherwise = x : dedupadjacent (y:rest)
dedupadjacent (x:[]) = [x]
dedupadjacent [] = []
{- Note that this ensures the counter is never 1; no need to say
- "1 file" when the filename could be shown. -}
splitcounter l
| length l <= maxfilesshown = (l, 0)
| otherwise =
let (keep, rest) = splitAt (maxfilesshown - 1) l
in (keep, length rest)
combiner new old =
let (!fs, n) = splitcounter $
dedupadjacent $ alertData new ++ alertData old
!cnt = n + alertCounter new + alertCounter old
in old
{ alertData = fs
, alertCounter = cnt
}
addFileAlert :: [FilePath] -> Alert
addFileAlert = fileAlert (Tensed "Adding" "Added")
{- This is only used as a success alert after a transfer, not during it. -}
transferFileAlert :: Direction -> Bool -> FilePath -> Alert
transferFileAlert direction True file
| direction == Upload = fileAlert "Uploaded" [file]
| otherwise = fileAlert "Downloaded" [file]
transferFileAlert direction False file
| direction == Upload = fileAlert "Upload failed" [file]
| otherwise = fileAlert "Download failed" [file]
dataCombiner :: ([TenseChunk] -> [TenseChunk] -> [TenseChunk]) -> AlertCombiner
dataCombiner combiner = fullCombiner $
\new old -> old { alertData = alertData new `combiner` alertData old }
fullCombiner :: (Alert -> Alert -> Alert) -> AlertCombiner
fullCombiner combiner new old
| alertClass new /= alertClass old = Nothing
| alertName new == alertName old =
Just $! new `combiner` old
| otherwise = Nothing
shortFile :: FilePath -> String
shortFile f
| len < maxlen = f
| otherwise = take half f ++ ".." ++ drop (len - half) f
where
len = length f
maxlen = 20
half = (maxlen - 2) `div` 2

129
Assistant/Alert/Utility.hs Normal file
View file

@ -0,0 +1,129 @@
{- git-annex assistant alert utilities
-
- Copyright 2012, 2013 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Assistant.Alert.Utility where
import Annex.Common
import Assistant.Types.Alert
import Utility.Tense
import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Map.Strict as M
{- This is as many alerts as it makes sense to display at a time.
- A display might be smaller, or larger, the point is to not overwhelm the
- user with a ton of alerts. -}
displayAlerts :: Int
displayAlerts = 6
{- This is not a hard maximum, but there's no point in keeping a great
- many filler alerts in an AlertMap, so when there's more than this many,
- they start being pruned, down toward displayAlerts. -}
maxAlerts :: Int
maxAlerts = displayAlerts * 2
type AlertPair = (AlertId, Alert)
{- The desired order is the reverse of:
-
- - Pinned alerts
- - High priority alerts, newest first
- - Medium priority Activity, newest first (mostly used for Activity)
- - Low priority alerts, newest first
- - Filler priority alerts, newest first
- - Ties are broken by the AlertClass, with Errors etc coming first.
-}
compareAlertPairs :: AlertPair -> AlertPair -> Ordering
compareAlertPairs
(aid, Alert { alertClass = aclass, alertPriority = aprio })
(bid, Alert { alertClass = bclass, alertPriority = bprio })
= compare aprio bprio
`mappend` compare aid bid
`mappend` compare aclass bclass
sortAlertPairs :: [AlertPair] -> [AlertPair]
sortAlertPairs = sortBy compareAlertPairs
{- Renders an alert's header for display, if it has one. -}
renderAlertHeader :: Alert -> Maybe Text
renderAlertHeader alert = renderTense (alertTense alert) <$> alertHeader alert
{- Renders an alert's message for display. -}
renderAlertMessage :: Alert -> Text
renderAlertMessage alert = renderTense (alertTense alert) $
(alertMessageRender alert) alert
showAlert :: Alert -> String
showAlert alert = T.unpack $ T.unwords $ catMaybes
[ renderAlertHeader alert
, Just $ renderAlertMessage alert
]
alertTense :: Alert -> Tense
alertTense alert
| alertClass alert == Activity = Present
| otherwise = Past
{- Checks if two alerts display the same. -}
effectivelySameAlert :: Alert -> Alert -> Bool
effectivelySameAlert x y = all id
[ alertClass x == alertClass y
, alertHeader x == alertHeader y
, alertData x == alertData y
, alertBlockDisplay x == alertBlockDisplay y
, alertClosable x == alertClosable y
, alertPriority x == alertPriority y
]
makeAlertFiller :: Bool -> Alert -> Alert
makeAlertFiller success alert
| isFiller alert = alert
| otherwise = alert
{ alertClass = if c == Activity then c' else c
, alertPriority = Filler
, alertClosable = True
, alertButtons = []
, alertIcon = Just $ if success then SuccessIcon else ErrorIcon
}
where
c = alertClass alert
c'
| success = Success
| otherwise = Error
isFiller :: Alert -> Bool
isFiller alert = alertPriority alert == Filler
{- Updates the Alertmap, adding or updating an alert.
-
- Any old filler that looks the same as the alert is removed.
-
- Or, if the alert has an alertCombiner that combines it with
- an old alert, the old alert is replaced with the result, and the
- alert is removed.
-
- Old filler alerts are pruned once maxAlerts is reached.
-}
mergeAlert :: AlertId -> Alert -> AlertMap -> AlertMap
mergeAlert i al m = maybe updatePrune updateCombine (alertCombiner al)
where
pruneSame k al' = k == i || not (effectivelySameAlert al al')
pruneBloat m'
| bloat > 0 = M.fromList $ pruneold $ M.toList m'
| otherwise = m'
where
bloat = M.size m' - maxAlerts
pruneold l =
let (f, rest) = partition (\(_, a) -> isFiller a) l
in drop bloat f ++ rest
updatePrune = pruneBloat $ M.filterWithKey pruneSame $ M.insert i al m
updateCombine combiner =
let combined = M.mapMaybe (combiner al) m
in if M.null combined
then updatePrune
else M.delete i $ M.union combined m

19
Assistant/BranchChange.hs Normal file
View file

@ -0,0 +1,19 @@
{- git-annex assistant git-annex branch change tracking
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Assistant.BranchChange where
import Assistant.Common
import Assistant.Types.BranchChange
import Control.Concurrent.MSampleVar
branchChanged :: Assistant ()
branchChanged = flip writeSV () <<~ (fromBranchChangeHandle . branchChangeHandle)
waitBranchChange :: Assistant ()
waitBranchChange = readSV <<~ (fromBranchChangeHandle . branchChangeHandle)

Some files were not shown because too many files have changed in this diff Show more