Merge branch 'master' into anons3

This commit is contained in:
Joey Hess 2022-11-04 15:08:29 -04:00
commit de1e8201a6
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
66 changed files with 1120 additions and 307 deletions

View file

@ -287,12 +287,8 @@ run (st, rd) a = do
run' :: MVar AnnexState -> AnnexRead -> Annex a -> IO (a, (AnnexState, AnnexRead)) run' :: MVar AnnexState -> AnnexRead -> Annex a -> IO (a, (AnnexState, AnnexRead))
run' mvar rd a = do run' mvar rd a = do
r <- runReaderT (runAnnex a) (mvar, rd) r <- runReaderT (runAnnex a) (mvar, rd)
`onException` (flush rd)
flush rd
st <- takeMVar mvar st <- takeMVar mvar
return (r, (st, rd)) return (r, (st, rd))
where
flush = Keys.flushDbQueue . keysdbhandle
{- Performs an action in the Annex monad from a starting state, {- Performs an action in the Annex monad from a starting state,
- and throws away the changed state. -} - and throws away the changed state. -}

View file

@ -1,6 +1,6 @@
{- git-annex actions {- git-annex actions
- -
- Copyright 2010-2020 Joey Hess <id@joeyh.name> - Copyright 2010-2022 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -11,7 +11,7 @@ module Annex.Action (
action, action,
verifiedAction, verifiedAction,
startup, startup,
shutdown, quiesce,
stopCoProcesses, stopCoProcesses,
) where ) where
@ -25,6 +25,7 @@ import Annex.CheckAttr
import Annex.HashObject import Annex.HashObject
import Annex.CheckIgnore import Annex.CheckIgnore
import Annex.TransferrerPool import Annex.TransferrerPool
import qualified Database.Keys
import Control.Concurrent.STM import Control.Concurrent.STM
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
@ -74,12 +75,25 @@ startup = do
return () return ()
#endif #endif
{- Cleanup actions. -} {- Rn all cleanup actions, save all state, stop all long-running child
shutdown :: Bool -> Annex () - processes.
shutdown nocommit = do -
- This can be run repeatedly with other Annex actions run in between,
- but usually it is run only once at the end.
-
- When passed True, avoids making any commits to the git-annex branch,
- leaving changes in the journal for later commit.
-}
quiesce :: Bool -> Annex ()
quiesce nocommit = do
cas <- Annex.withState $ \st -> return
( st { Annex.cleanupactions = mempty }
, Annex.cleanupactions st
)
sequence_ (M.elems cas)
saveState nocommit saveState nocommit
sequence_ =<< M.elems <$> Annex.getState Annex.cleanupactions
stopCoProcesses stopCoProcesses
Database.Keys.closeDb
{- Stops all long-running child processes, including git query processes. -} {- Stops all long-running child processes, including git query processes. -}
stopCoProcesses :: Annex () stopCoProcesses :: Annex ()

View file

@ -718,7 +718,7 @@ listKeys' keyloc want = do
saveState :: Bool -> Annex () saveState :: Bool -> Annex ()
saveState nocommit = doSideAction $ do saveState nocommit = doSideAction $ do
Annex.Queue.flush Annex.Queue.flush
Database.Keys.closeDb Database.Keys.flushDb
unless nocommit $ unless nocommit $
whenM (annexAlwaysCommit <$> Annex.getGitConfig) $ whenM (annexAlwaysCommit <$> Annex.getGitConfig) $
Annex.Branch.commit =<< Annex.Branch.commitMessage Annex.Branch.commit =<< Annex.Branch.commitMessage

View file

@ -184,6 +184,9 @@ recordImportTree remote importtreeconfig importable = do
unlessM (stillpresent db oldkey) $ unlessM (stillpresent db oldkey) $
logChange oldkey (Remote.uuid remote) InfoMissing logChange oldkey (Remote.uuid remote) InfoMissing
_ -> noop _ -> noop
-- When the remote is versioned, it still contains keys
-- that are not present in the new tree.
unless (Remote.versionedExport (Remote.exportActions remote)) $ do
db <- Export.openDb (Remote.uuid remote) db <- Export.openDb (Remote.uuid remote)
forM_ (exportedTreeishes oldexport) $ \oldtree -> forM_ (exportedTreeishes oldexport) $ \oldtree ->
Export.runExportDiffUpdater updater db oldtree finaltree Export.runExportDiffUpdater updater db oldtree finaltree

View file

@ -1,6 +1,6 @@
{- git-annex program path {- git-annex program path
- -
- Copyright 2013-2021 Joey Hess <id@joeyh.name> - Copyright 2013-2022 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -11,6 +11,7 @@ module Annex.Path (
gitAnnexChildProcess, gitAnnexChildProcess,
gitAnnexChildProcessParams, gitAnnexChildProcessParams,
gitAnnexDaemonizeParams, gitAnnexDaemonizeParams,
cleanStandaloneEnvironment,
) where ) where
import Annex.Common import Annex.Common
@ -19,7 +20,7 @@ import Utility.Env
import Annex.PidLock import Annex.PidLock
import qualified Annex import qualified Annex
import System.Environment (getExecutablePath, getArgs) import System.Environment (getExecutablePath, getArgs, getProgName)
{- A fully qualified path to the currently running git-annex program. {- A fully qualified path to the currently running git-annex program.
- -
@ -29,13 +30,16 @@ import System.Environment (getExecutablePath, getArgs)
- or searching for the command name in PATH. - or searching for the command name in PATH.
- -
- The standalone build runs git-annex via ld.so, and defeats - The standalone build runs git-annex via ld.so, and defeats
- getExecutablePath. It sets GIT_ANNEX_PROGRAMPATH to the correct path - getExecutablePath. It sets GIT_ANNEX_DIR to the location of the
- to the wrapper script to use. - standalone build directory, and there are wrapper scripts for git-annex
- and git-annex-shell in that directory.
-} -}
programPath :: IO FilePath programPath :: IO FilePath
programPath = go =<< getEnv "GIT_ANNEX_PROGRAMPATH" programPath = go =<< getEnv "GIT_ANNEX_DIR"
where where
go (Just p) = return p go (Just dir) = do
name <- getProgName
return (dir </> name)
go Nothing = do go Nothing = do
exe <- getExecutablePath exe <- getExecutablePath
p <- if isAbsolute exe p <- if isAbsolute exe
@ -97,3 +101,25 @@ gitAnnexDaemonizeParams = do
-- Get every parameter git-annex was run with. -- Get every parameter git-annex was run with.
ps <- liftIO getArgs ps <- liftIO getArgs
return (map Param ps ++ cps) return (map Param ps ++ cps)
{- Returns a cleaned up environment that lacks path and other settings
- used to make the standalone builds use their bundled libraries and programs.
- Useful when calling programs not included in the standalone builds.
-
- For a non-standalone build, returns Nothing.
-}
cleanStandaloneEnvironment :: IO (Maybe [(String, String)])
cleanStandaloneEnvironment = clean <$> getEnvironment
where
clean environ
| null vars = Nothing
| otherwise = Just $ catMaybes $ map (restoreorig environ) environ
where
vars = words $ fromMaybe "" $
lookup "GIT_ANNEX_STANDLONE_ENV" environ
restoreorig oldenviron p@(k, _v)
| k `elem` vars = case lookup ("ORIG_" ++ k) oldenviron of
(Just v')
| not (null v') -> Just (k, v')
_ -> Nothing
| otherwise = Just p

View file

@ -1,6 +1,6 @@
{- git-annex worktree files {- git-annex worktree files
- -
- Copyright 2013-2021 Joey Hess <id@joeyh.name> - Copyright 2013-2022 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -14,7 +14,7 @@ import Annex.CurrentBranch
import qualified Database.Keys import qualified Database.Keys
{- Looks up the key corresponding to an annexed file in the work tree, {- Looks up the key corresponding to an annexed file in the work tree,
- by examining what the file links to. - by examining what the symlink points to.
- -
- An unlocked file will not have a link on disk, so fall back to - An unlocked file will not have a link on disk, so fall back to
- looking for a pointer to a key in git. - looking for a pointer to a key in git.
@ -31,6 +31,16 @@ lookupKey = lookupKey' catkeyfile
, catKeyFileHidden file =<< getCurrentBranch , catKeyFileHidden file =<< getCurrentBranch
) )
{- Like lookupKey, but only looks at files staged in git, not at unstaged
- changes in the work tree. This means it's slower, but it also has
- consistently the same behavior for locked files as for unlocked files.
-}
lookupKeyStaged :: RawFilePath -> Annex (Maybe Key)
lookupKeyStaged file = catKeyFile file >>= \case
Just k -> return (Just k)
Nothing -> catKeyFileHidden file =<< getCurrentBranch
{- Like lookupKey, but does not find keys for hidden files. -}
lookupKeyNotHidden :: RawFilePath -> Annex (Maybe Key) lookupKeyNotHidden :: RawFilePath -> Annex (Maybe Key)
lookupKeyNotHidden = lookupKey' catkeyfile lookupKeyNotHidden = lookupKey' catkeyfile
where where
@ -45,23 +55,6 @@ lookupKey' catkeyfile file = isAnnexLink file >>= \case
Just key -> return (Just key) Just key -> return (Just key)
Nothing -> catkeyfile file Nothing -> catkeyfile file
{- Modifies an action to only act on files that are already annexed, {- Find all annexed files and update the keys database for them. -}
- and passes the key on to it. -}
whenAnnexed :: (RawFilePath -> Key -> Annex (Maybe a)) -> RawFilePath -> Annex (Maybe a)
whenAnnexed a file = ifAnnexed file (a file) (return Nothing)
ifAnnexed :: RawFilePath -> (Key -> Annex a) -> Annex a -> Annex a
ifAnnexed file yes no = maybe no yes =<< lookupKey file
{- Find all annexed files and update the keys database for them.
-
- Normally the keys database is updated incrementally when it's being
- opened, and changes are noticed. Calling this explicitly allows
- running the update at an earlier point.
-
- All that needs to be done is to open the database,
- that will result in Database.Keys.reconcileStaged
- running, and doing the work.
-}
scanAnnexedFiles :: Annex () scanAnnexedFiles :: Annex ()
scanAnnexedFiles = Database.Keys.runWriter (const noop) scanAnnexedFiles = Database.Keys.updateDatabase

View file

@ -171,25 +171,3 @@ installFileManagerHooks program = unlessM osAndroid $ do
#else #else
installFileManagerHooks _ = noop installFileManagerHooks _ = noop
#endif #endif
{- Returns a cleaned up environment that lacks settings used to make the
- standalone builds use their bundled libraries and programs.
- Useful when calling programs not included in the standalone builds.
-
- For a non-standalone build, returns Nothing.
-}
cleanEnvironment :: IO (Maybe [(String, String)])
cleanEnvironment = clean <$> getEnvironment
where
clean environ
| null vars = Nothing
| otherwise = Just $ catMaybes $ map (restoreorig environ) environ
where
vars = words $ fromMaybe "" $
lookup "GIT_ANNEX_STANDLONE_ENV" environ
restoreorig oldenviron p@(k, _v)
| k `elem` vars = case lookup ("ORIG_" ++ k) oldenviron of
(Just v')
| not (null v') -> Just (k, v')
_ -> Nothing
| otherwise = Just p

View file

@ -49,7 +49,7 @@ inDir dir a = do
state <- Annex.new state <- Annex.new
=<< Git.Config.read =<< Git.Config.read
=<< Git.Construct.fromPath (toRawFilePath dir) =<< Git.Construct.fromPath (toRawFilePath dir)
Annex.eval state $ a `finally` stopCoProcesses Annex.eval state $ a `finally` quiesce True
{- Creates a new repository, and returns its UUID. -} {- Creates a new repository, and returns its UUID. -}
initRepo :: Bool -> Bool -> FilePath -> Maybe String -> Maybe StandardGroup -> IO UUID initRepo :: Bool -> Bool -> FilePath -> Maybe String -> Maybe StandardGroup -> IO UUID

View file

@ -24,6 +24,7 @@ import Annex.Content
import Annex.WorkTree import Annex.WorkTree
import Git.Command import Git.Command
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
import Annex.Action
import Data.Time.Clock import Data.Time.Clock
import Data.Char import Data.Char
@ -69,7 +70,9 @@ main = do
state <- Annex.new =<< Git.Construct.fromPath (toRawFilePath ".") state <- Annex.new =<< Git.Construct.fromPath (toRawFilePath ".")
ood <- Annex.eval state $ do ood <- Annex.eval state $ do
buildrpms topdir updated buildrpms topdir updated
makeinfos updated version is <- makeinfos updated version
quiesce False
return is
syncToArchiveOrg syncToArchiveOrg
unless (null ood) $ unless (null ood) $
error $ "Some info files are out of date: " ++ show (map fst ood) error $ "Some info files are out of date: " ++ show (map fst ood)

View file

@ -164,8 +164,6 @@ installLinkerShim top linker exe = do
createSymbolicLink (fromRawFilePath link) (top </> exelink) createSymbolicLink (fromRawFilePath link) (top </> exelink)
writeFile exe $ unlines writeFile exe $ unlines
[ "#!/bin/sh" [ "#!/bin/sh"
, "GIT_ANNEX_PROGRAMPATH=\"$0\""
, "export GIT_ANNEX_PROGRAMPATH"
, "exec \"$GIT_ANNEX_DIR/" ++ exelink ++ "\" --library-path \"$GIT_ANNEX_LD_LIBRARY_PATH\" \"$GIT_ANNEX_DIR/shimmed/" ++ base ++ "/" ++ base ++ "\" \"$@\"" , "exec \"$GIT_ANNEX_DIR/" ++ exelink ++ "\" --library-path \"$GIT_ANNEX_LD_LIBRARY_PATH\" \"$GIT_ANNEX_DIR/shimmed/" ++ base ++ "/" ++ base ++ "\" \"$@\""
] ]
modifyFileMode (toRawFilePath exe) $ addModes executeModes modifyFileMode (toRawFilePath exe) $ addModes executeModes

View file

@ -1,16 +1,32 @@
git-annex (10.20221004) UNRELEASED; urgency=medium * S3: Support signature=anonymous to access a S3 bucket anonymously.
This can be used, for example, with importtree=yes to import from
a public bucket.
git-annex (10.20221103) upstream; urgency=medium
* Doubled the speed of git-annex drop when operating on many files,
and of git-annex get when operating on many tiny files.
* trust, untrust, semitrust, dead: Fix behavior when provided with * trust, untrust, semitrust, dead: Fix behavior when provided with
multiple repositories to operate on. multiple repositories to operate on.
* trust, untrust, semitrust, dead: When provided with no parameters, * trust, untrust, semitrust, dead: When provided with no parameters,
do not operate on a repository that has an empty name. do not operate on a repository that has an empty name.
* move: Fix openFile crash with -J * move: Fix openFile crash with -J
(Fixes a reversion in 8.20201103) (Fixes a reversion in 8.20201103)
* S3: Support signature=anonymous to access a S3 bucket anonymously. * S3: Speed up importing from a large bucket when fileprefix= is set,
This can be used, for example, with importtree=yes to import from by only asking for files under the prefix.
a public bucket. * When importing from versioned remotes, fix tracking of the content
of deleted files.
* More robust handling of ErrorBusy when writing to sqlite databases.
* Avoid hanging when a suspended git-annex process is keeping a sqlite
database locked.
* Make --batch mode handle unstaged annexed files consistently
whether the file is unlocked or not. Note that this changes the
behavior of --batch when it is provided with locked files that are
in the process of being added to the repository, but have not yet been
staged in git.
* Make git-annex enable-tor work when using the linux standalone build.
-- Joey Hess <id@joeyh.name> Mon, 03 Oct 2022 13:36:42 -0400 -- Joey Hess <id@joeyh.name> Thu, 03 Nov 2022 14:07:31 -0400
git-annex (10.20221003) upstream; urgency=medium git-annex (10.20221003) upstream; urgency=medium

View file

@ -63,7 +63,7 @@ dispatch' subcommandname args fuzzy cmds allargs allcmds fields getgitrepo progn
prepRunCommand cmd annexsetter prepRunCommand cmd annexsetter
startup startup
performCommandAction True cmd seek $ performCommandAction True cmd seek $
shutdown $ cmdnocommit cmd quiesce $ cmdnocommit cmd
go (Left norepo) = do go (Left norepo) = do
let ingitrepo = \a -> a =<< Git.Config.global let ingitrepo = \a -> a =<< Git.Config.global
-- Parse command line with full cmdparser first, -- Parse command line with full cmdparser first,

View file

@ -186,8 +186,9 @@ batchAnnexed fmt seeker keyaction = do
matcher <- getMatcher matcher <- getMatcher
batchFilesKeys fmt $ \(si, v) -> batchFilesKeys fmt $ \(si, v) ->
case v of case v of
Right bf -> flip whenAnnexed bf $ \f k -> Right f -> lookupKeyStaged f >>= \case
checkpresent k $ Nothing -> return Nothing
Just k -> checkpresent k $
startAction seeker si f k startAction seeker si f k
Left k -> ifM (matcher (MatchingInfo (mkinfo k))) Left k -> ifM (matcher (MatchingInfo (mkinfo k)))
( checkpresent k $ ( checkpresent k $

View file

@ -17,6 +17,7 @@ import Utility.AuthToken
import Annex.UUID import Annex.UUID
import P2P.Address import P2P.Address
import P2P.Auth import P2P.Auth
import Annex.Action
run :: [String] -> IO () run :: [String] -> IO ()
run (_remotename:address:[]) = forever $ run (_remotename:address:[]) = forever $
@ -59,6 +60,8 @@ connectService address port service = do
g <- Annex.gitRepo g <- Annex.gitRepo
conn <- liftIO $ connectPeer g (TorAnnex address port) conn <- liftIO $ connectPeer g (TorAnnex address port)
runst <- liftIO $ mkRunState Client runst <- liftIO $ mkRunState Client
liftIO $ runNetProto runst conn $ auth myuuid authtoken noop >>= \case r <- liftIO $ runNetProto runst conn $ auth myuuid authtoken noop >>= \case
Just _theiruuid -> connect service stdin stdout Just _theiruuid -> connect service stdin stdout
Nothing -> giveup $ "authentication failed, perhaps you need to set " ++ p2pAuthTokenEnv Nothing -> giveup $ "authentication failed, perhaps you need to set " ++ p2pAuthTokenEnv
quiesce False
return r

View file

@ -11,7 +11,6 @@ module Command (
) where ) where
import Annex.Common as ReExported import Annex.Common as ReExported
import Annex.WorkTree as ReExported (whenAnnexed, ifAnnexed)
import Types.Command as ReExported import Types.Command as ReExported
import Types.DeferredParse as ReExported import Types.DeferredParse as ReExported
import CmdLine.Seek as ReExported import CmdLine.Seek as ReExported

View file

@ -18,6 +18,7 @@ import Annex.FileMatcher
import Annex.Link import Annex.Link
import Annex.Tmp import Annex.Tmp
import Annex.HashObject import Annex.HashObject
import Annex.WorkTree
import Messages.Progress import Messages.Progress
import Git.FilePath import Git.FilePath
import Git.Types import Git.Types
@ -202,7 +203,9 @@ start dr si file addunlockedmatcher =
mk <- liftIO $ isPointerFile file mk <- liftIO $ isPointerFile file
maybe (go s) (fixuppointer s) mk maybe (go s) (fixuppointer s) mk
where where
go s = ifAnnexed file (addpresent s) (add s) go s = lookupKey file >>= \case
Just k -> addpresent s k
Nothing -> add s
add s = starting "add" (ActionItemTreeFile file) si $ add s = starting "add" (ActionItemTreeFile file) si $
skipWhenDryRun dr $ skipWhenDryRun dr $
if isSymbolicLink s if isSymbolicLink s

View file

@ -20,6 +20,7 @@ import Annex.Ingest
import Annex.CheckIgnore import Annex.CheckIgnore
import Annex.Perms import Annex.Perms
import Annex.UUID import Annex.UUID
import Annex.WorkTree
import Annex.YoutubeDl import Annex.YoutubeDl
import Annex.UntrustedFilePath import Annex.UntrustedFilePath
import Logs.Web import Logs.Web
@ -183,7 +184,9 @@ startRemote addunlockedmatcher r o si file uri sz = do
performRemote addunlockedmatcher r o uri (toRawFilePath file') sz performRemote addunlockedmatcher r o uri (toRawFilePath file') sz
performRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> URLString -> RawFilePath -> Maybe Integer -> CommandPerform performRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> URLString -> RawFilePath -> Maybe Integer -> CommandPerform
performRemote addunlockedmatcher r o uri file sz = ifAnnexed file adduri geturi performRemote addunlockedmatcher r o uri file sz = lookupKey file >>= \case
Just k -> adduri k
Nothing -> geturi
where where
loguri = setDownloader uri OtherDownloader loguri = setDownloader uri OtherDownloader
adduri = addUrlChecked o loguri file (Remote.uuid r) checkexistssize adduri = addUrlChecked o loguri file (Remote.uuid r) checkexistssize
@ -270,7 +273,9 @@ checkPreserveFileNameSecurity f = do
] ]
performWeb :: AddUnlockedMatcher -> AddUrlOptions -> URLString -> RawFilePath -> Url.UrlInfo -> CommandPerform performWeb :: AddUnlockedMatcher -> AddUrlOptions -> URLString -> RawFilePath -> Url.UrlInfo -> CommandPerform
performWeb addunlockedmatcher o url file urlinfo = ifAnnexed file addurl geturl performWeb addunlockedmatcher o url file urlinfo = lookupKey file >>= \case
Just k -> addurl k
Nothing -> geturl
where where
geturl = next $ isJust <$> addUrlFile addunlockedmatcher (downloadOptions o) url urlinfo file geturl = next $ isJust <$> addUrlFile addunlockedmatcher (downloadOptions o) url urlinfo file
addurl = addUrlChecked o url file webUUID $ \k -> addurl = addUrlChecked o url file webUUID $ \k ->
@ -335,9 +340,9 @@ downloadWeb addunlockedmatcher o url urlinfo file =
tryyoutubedl tmp = youtubeDlFileNameHtmlOnly url >>= \case tryyoutubedl tmp = youtubeDlFileNameHtmlOnly url >>= \case
Right mediafile -> Right mediafile ->
let f = youtubeDlDestFile o file (toRawFilePath mediafile) let f = youtubeDlDestFile o file (toRawFilePath mediafile)
in ifAnnexed f in lookupKey f >>= \case
(alreadyannexed (fromRawFilePath f)) Just k -> alreadyannexed (fromRawFilePath f) k
(dl f) Nothing -> dl f
Left err -> checkRaw (Just err) o Nothing (normalfinish tmp) Left err -> checkRaw (Just err) o Nothing (normalfinish tmp)
where where
dl dest = withTmpWorkDir mediakey $ \workdir -> do dl dest = withTmpWorkDir mediakey $ \workdir -> do

View file

@ -60,9 +60,10 @@ start _os = do
gitannex <- liftIO programPath gitannex <- liftIO programPath
let ps = [Param (cmdname cmd), Param (show curruserid)] let ps = [Param (cmdname cmd), Param (show curruserid)]
sucommand <- liftIO $ mkSuCommand gitannex ps sucommand <- liftIO $ mkSuCommand gitannex ps
cleanenv <- liftIO $ cleanStandaloneEnvironment
maybe noop showLongNote maybe noop showLongNote
(describePasswordPrompt' sucommand) (describePasswordPrompt' sucommand)
ifM (liftIO $ runSuCommand sucommand) ifM (liftIO $ runSuCommand sucommand cleanenv)
( next checkHiddenService ( next checkHiddenService
, giveup $ unwords $ , giveup $ unwords $
[ "Failed to run as root:" , gitannex ] ++ toCommand ps [ "Failed to run as root:" , gitannex ] ++ toCommand ps

View file

@ -378,7 +378,7 @@ cleanupUnexport r db eks loc = do
removeExportedLocation db ek loc removeExportedLocation db ek loc
flushDbQueue db flushDbQueue db
-- An versionedExport remote supports removeExportLocation to remove -- A versionedExport remote supports removeExportLocation to remove
-- the file from the exported tree, but still retains the content -- the file from the exported tree, but still retains the content
-- and allows retrieving it. -- and allows retrieving it.
unless (versionedExport (exportActions r)) $ do unless (versionedExport (exportActions r)) $ do

View file

@ -40,6 +40,7 @@ import Command.AddUrl (addUrlFile, downloadRemoteFile, parseDownloadOptions, Dow
import Annex.UUID import Annex.UUID
import Backend.URL (fromUrl) import Backend.URL (fromUrl)
import Annex.Content import Annex.Content
import Annex.WorkTree
import Annex.YoutubeDl import Annex.YoutubeDl
import Types.MetaData import Types.MetaData
import Logs.MetaData import Logs.MetaData
@ -297,7 +298,9 @@ performDownload' started addunlockedmatcher opts cache todownload = case locatio
- to be re-downloaded. -} - to be re-downloaded. -}
makeunique url n file = ifM alreadyexists makeunique url n file = ifM alreadyexists
( ifM forced ( ifM forced
( ifAnnexed (toRawFilePath f) checksameurl tryanother ( lookupKey (toRawFilePath f) >>= \case
Just k -> checksameurl k
Nothing -> tryanother
, tryanother , tryanother
) )
, return $ Just f , return $ Just f

View file

@ -28,6 +28,7 @@ import Utility.DiskFree
import Annex.Content import Annex.Content
import Annex.UUID import Annex.UUID
import Annex.CatFile import Annex.CatFile
import Annex.WorkTree
import Logs.UUID import Logs.UUID
import Logs.Trust import Logs.Trust
import Logs.Location import Logs.Location
@ -174,9 +175,9 @@ itemInfo o (si, p) = ifM (isdir p)
Right u -> uuidInfo o u si Right u -> uuidInfo o u si
Left _ -> do Left _ -> do
relp <- liftIO $ relPathCwdToFile (toRawFilePath p) relp <- liftIO $ relPathCwdToFile (toRawFilePath p)
ifAnnexed relp lookupKey relp >>= \case
(fileInfo o (fromRawFilePath relp) si) Just k -> fileInfo o (fromRawFilePath relp) si k
(treeishInfo o p si) Nothing -> treeishInfo o p si
) )
where where
isdir = liftIO . catchBoolIO . (isDirectory <$$> getFileStatus) isdir = liftIO . catchBoolIO . (isDirectory <$$> getFileStatus)

View file

@ -155,7 +155,7 @@ parseJSONInput i = case eitherDecode (BU.fromString i) of
startBatch :: (SeekInput, (Either RawFilePath Key, MetaData)) -> CommandStart startBatch :: (SeekInput, (Either RawFilePath Key, MetaData)) -> CommandStart
startBatch (si, (i, (MetaData m))) = case i of startBatch (si, (i, (MetaData m))) = case i of
Left f -> do Left f -> do
mk <- lookupKey f mk <- lookupKeyStaged f
case mk of case mk of
Just k -> go k (mkActionItem (k, AssociatedFile (Just f))) Just k -> go k (mkActionItem (k, AssociatedFile (Just f)))
Nothing -> return Nothing Nothing -> return Nothing

View file

@ -16,6 +16,7 @@ import Annex.Perms
import Annex.ReplaceFile import Annex.ReplaceFile
import Logs.Location import Logs.Location
import Annex.InodeSentinal import Annex.InodeSentinal
import Annex.WorkTree
import Utility.InodeCache import Utility.InodeCache
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
@ -61,7 +62,9 @@ seek o = case batchOption o of
(toRawFilePath file, fromMaybe (giveup "bad key") (deserializeKey skey)) (toRawFilePath file, fromMaybe (giveup "bad key") (deserializeKey skey))
start :: SeekInput -> (RawFilePath, Key) -> CommandStart start :: SeekInput -> (RawFilePath, Key) -> CommandStart
start si (file, newkey) = ifAnnexed file go stop start si (file, newkey) = lookupKey file >>= \case
Just k -> go k
Nothing -> stop
where where
go oldkey go oldkey
| oldkey == newkey = stop | oldkey == newkey = stop

View file

@ -31,9 +31,7 @@ start (_, key) = fieldTransfer Download key $ \_p -> do
ifM (getViaTmp rsp DefaultVerify key (AssociatedFile Nothing) go) ifM (getViaTmp rsp DefaultVerify key (AssociatedFile Nothing) go)
( do ( do
logStatus key InfoPresent logStatus key InfoPresent
-- forcibly quit after receiving one key, _ <- quiesce True
-- and shutdown cleanly
_ <- shutdown True
return True return True
, return False , return False
) )

View file

@ -13,6 +13,7 @@ import Annex.Content
import Backend import Backend
import Types.KeySource import Types.KeySource
import Utility.Metered import Utility.Metered
import Annex.WorkTree
import qualified Git import qualified Git
cmd :: Command cmd :: Command
@ -45,9 +46,9 @@ startSrcDest :: [FilePath] -> CommandStart
startSrcDest ps@(src:dest:[]) startSrcDest ps@(src:dest:[])
| src == dest = stop | src == dest = stop
| otherwise = notAnnexed src' $ | otherwise = notAnnexed src' $
ifAnnexed (toRawFilePath dest) lookupKey (toRawFilePath dest) >>= \case
go Just k -> go k
(giveup $ src ++ " is not an annexed file") Nothing -> giveup $ src ++ " is not an annexed file"
where where
src' = toRawFilePath src src' = toRawFilePath src
go key = starting "reinject" ai si $ go key = starting "reinject" ai si $
@ -79,9 +80,9 @@ notAnnexed :: RawFilePath -> CommandStart -> CommandStart
notAnnexed src a = notAnnexed src a =
ifM (fromRepo Git.repoIsLocalBare) ifM (fromRepo Git.repoIsLocalBare)
( a ( a
, ifAnnexed src , lookupKey src >>= \case
(giveup $ "cannot used annexed file as src: " ++ fromRawFilePath src) Just _ -> giveup $ "cannot used annexed file as src: " ++ fromRawFilePath src
a Nothing -> a
) )
perform :: RawFilePath -> Key -> CommandPerform perform :: RawFilePath -> Key -> CommandPerform

View file

@ -9,6 +9,7 @@ module Command.RmUrl where
import Command import Command
import Logs.Web import Logs.Web
import Annex.WorkTree
cmd :: Command cmd :: Command
cmd = notBareRepo $ cmd = notBareRepo $
@ -46,7 +47,9 @@ batchParser s = case separate (== ' ') (reverse s) of
return $ Right (f', reverse ru) return $ Right (f', reverse ru)
start :: (SeekInput, (FilePath, URLString)) -> CommandStart start :: (SeekInput, (FilePath, URLString)) -> CommandStart
start (si, (file, url)) = flip whenAnnexed file' $ \_ key -> do start (si, (file, url)) = lookupKeyStaged file' >>= \case
Nothing -> stop
Just key -> do
let ai = mkActionItem (key, AssociatedFile (Just file')) let ai = mkActionItem (key, AssociatedFile (Just file'))
starting "rmurl" ai si $ starting "rmurl" ai si $
next $ cleanup url key next $ cleanup url key

View file

@ -50,6 +50,7 @@ import Config.DynamicConfig
import Annex.Path import Annex.Path
import Annex.Wanted import Annex.Wanted
import Annex.Content import Annex.Content
import Annex.WorkTree
import Command.Get (getKey') import Command.Get (getKey')
import qualified Command.Move import qualified Command.Move
import qualified Command.Export import qualified Command.Export
@ -765,7 +766,10 @@ seekSyncContent o rs currbranch = do
seekHelper fst3 ww LsFiles.inRepoDetails l seekHelper fst3 ww LsFiles.inRepoDetails l
seekincludinghidden origbranch mvar l bloomfeeder = seekincludinghidden origbranch mvar l bloomfeeder =
seekFiltered (const (pure True)) (\(si, f) -> ifAnnexed f (commandAction . gofile bloomfeeder mvar si f) noop) $ let filterer = \(si, f) -> lookupKey f >>= \case
Just k -> (commandAction $ gofile bloomfeeder mvar si f k)
Nothing -> noop
in seekFiltered (const (pure True)) filterer $
seekHelper id ww (LsFiles.inRepoOrBranch origbranch) l seekHelper id ww (LsFiles.inRepoOrBranch origbranch) l
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles

View file

@ -18,6 +18,7 @@ import qualified Database.Keys
import Annex.Content import Annex.Content
import Annex.Init import Annex.Init
import Annex.CheckIgnore import Annex.CheckIgnore
import Annex.WorkTree
import Utility.FileMode import Utility.FileMode
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
@ -51,12 +52,16 @@ seek ps = do
withFilesNotInGit withFilesNotInGit
(CheckGitIgnore False) (CheckGitIgnore False)
WarnUnmatchWorkTreeItems WarnUnmatchWorkTreeItems
(\(_, f) -> commandAction $ whenAnnexed (startCheckIncomplete . fromRawFilePath) f) checksymlinks
l l
withFilesInGitAnnex ww (Command.Unannex.seeker True) l withFilesInGitAnnex ww (Command.Unannex.seeker True) l
finish finish
where where
ww = WarnUnmatchLsFiles ww = WarnUnmatchLsFiles
checksymlinks (_, f) =
commandAction $ lookupKey f >>= \case
Nothing -> stop
Just k -> startCheckIncomplete (fromRawFilePath f) k
{- git annex symlinks that are not checked into git could be left by an {- git annex symlinks that are not checked into git could be left by an
- interrupted add. -} - interrupted add. -}

View file

@ -22,6 +22,7 @@ import Utility.WebApp
import Utility.Daemon (checkDaemon) import Utility.Daemon (checkDaemon)
import Utility.UserInfo import Utility.UserInfo
import Annex.Init import Annex.Init
import Annex.Path
import qualified Git import qualified Git
import Git.Types (fromConfigValue) import Git.Types (fromConfigValue)
import qualified Git.Config import qualified Git.Config
@ -30,6 +31,7 @@ import qualified Annex
import Config.Files.AutoStart import Config.Files.AutoStart
import Upgrade import Upgrade
import Annex.Version import Annex.Version
import Annex.Action
import Utility.Android import Utility.Android
import Control.Concurrent import Control.Concurrent
@ -126,8 +128,10 @@ startNoRepo o = go =<< liftIO (filterM doesDirectoryExist =<< readAutoStartFile)
Right state -> void $ Annex.eval state $ do Right state -> void $ Annex.eval state $ do
whenM (fromRepo Git.repoIsLocalBare) $ whenM (fromRepo Git.repoIsLocalBare) $
giveup $ d ++ " is a bare git repository, cannot run the webapp in it" giveup $ d ++ " is a bare git repository, cannot run the webapp in it"
callCommandAction $ r <- callCommandAction $
start' False o start' False o
quiesce False
return r
cannotStartIn :: FilePath -> String -> IO () cannotStartIn :: FilePath -> String -> IO ()
cannotStartIn d reason = warningIO $ "unable to start webapp in repository " ++ d ++ ": " ++ reason cannotStartIn d reason = warningIO $ "unable to start webapp in repository " ++ d ++ ": " ++ reason
@ -219,7 +223,7 @@ openBrowser' mcmd htmlshim realurl outh errh =
#endif #endif
hPutStrLn (fromMaybe stdout outh) $ "Launching web browser on " ++ url hPutStrLn (fromMaybe stdout outh) $ "Launching web browser on " ++ url
hFlush stdout hFlush stdout
environ <- cleanEnvironment environ <- cleanStandaloneEnvironment
let p' = p let p' = p
{ env = environ { env = environ
, std_out = maybe Inherit UseHandle outh , std_out = maybe Inherit UseHandle outh

View file

@ -1,6 +1,6 @@
{- Persistent sqlite database handles. {- Persistent sqlite database handles.
- -
- Copyright 2015-2019 Joey Hess <id@joeyh.name> - Copyright 2015-2022 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -21,6 +21,7 @@ import Utility.Exception
import Utility.FileSystemEncoding import Utility.FileSystemEncoding
import Utility.Debug import Utility.Debug
import Utility.DebugLocks import Utility.DebugLocks
import Utility.InodeCache
import Database.Persist.Sqlite import Database.Persist.Sqlite
import qualified Database.Sqlite as Sqlite import qualified Database.Sqlite as Sqlite
@ -38,7 +39,7 @@ import System.IO
{- A DbHandle is a reference to a worker thread that communicates with {- A DbHandle is a reference to a worker thread that communicates with
- the database. It has a MVar which Jobs are submitted to. -} - the database. It has a MVar which Jobs are submitted to. -}
data DbHandle = DbHandle (Async ()) (MVar Job) data DbHandle = DbHandle RawFilePath (Async ()) (MVar Job)
{- Name of a table that should exist once the database is initialized. -} {- Name of a table that should exist once the database is initialized. -}
type TableName = String type TableName = String
@ -48,17 +49,17 @@ type TableName = String
openDb :: RawFilePath -> TableName -> IO DbHandle openDb :: RawFilePath -> TableName -> IO DbHandle
openDb db tablename = do openDb db tablename = do
jobs <- newEmptyMVar jobs <- newEmptyMVar
worker <- async (workerThread (T.pack (fromRawFilePath db)) tablename jobs) worker <- async (workerThread db tablename jobs)
-- work around https://github.com/yesodweb/persistent/issues/474 -- work around https://github.com/yesodweb/persistent/issues/474
liftIO $ fileEncoding stderr liftIO $ fileEncoding stderr
return $ DbHandle worker jobs return $ DbHandle db worker jobs
{- This is optional; when the DbHandle gets garbage collected it will {- This is optional; when the DbHandle gets garbage collected it will
- auto-close. -} - auto-close. -}
closeDb :: DbHandle -> IO () closeDb :: DbHandle -> IO ()
closeDb (DbHandle worker jobs) = do closeDb (DbHandle _db worker jobs) = do
debugLocks $ putMVar jobs CloseJob debugLocks $ putMVar jobs CloseJob
wait worker wait worker
@ -73,7 +74,7 @@ closeDb (DbHandle worker jobs) = do
- it is able to run. - it is able to run.
-} -}
queryDb :: DbHandle -> SqlPersistM a -> IO a queryDb :: DbHandle -> SqlPersistM a -> IO a
queryDb (DbHandle _ jobs) a = do queryDb (DbHandle _db _ jobs) a = do
res <- newEmptyMVar res <- newEmptyMVar
putMVar jobs $ QueryJob $ putMVar jobs $ QueryJob $
debugLocks $ liftIO . putMVar res =<< tryNonAsync a debugLocks $ liftIO . putMVar res =<< tryNonAsync a
@ -82,25 +83,32 @@ queryDb (DbHandle _ jobs) a = do
{- Writes a change to the database. {- Writes a change to the database.
- -
- Writes can fail if another write is happening concurrently. - Writes can fail when another write is happening concurrently.
- So write failures are caught and retried repeatedly for up to 10 - So write failures are caught and retried.
- seconds, which should avoid all but the most exceptional problems. -
- Retries repeatedly for up to 60 seconds. Part that point, it continues
- retrying only if the database shows signs of being modified by another
- process at least once each 30 seconds.
-} -}
commitDb :: DbHandle -> SqlPersistM () -> IO () commitDb :: DbHandle -> SqlPersistM () -> IO ()
commitDb h wa = robustly Nothing 100 (commitDb' h wa) commitDb h@(DbHandle db _ _) wa =
robustly (commitDb' h wa) maxretries emptyDatabaseInodeCache
where where
robustly :: Maybe SomeException -> Int -> IO (Either SomeException ()) -> IO () robustly a retries ic = do
robustly e 0 _ = error $ "failed to commit changes to sqlite database: " ++ show e
robustly _ n a = do
r <- a r <- a
case r of case r of
Right _ -> return () Right _ -> return ()
Left e -> do Left err -> do
threadDelay 100000 -- 1/10th second threadDelay briefdelay
robustly (Just e) (n-1) a retryHelper "write to" err maxretries db retries ic $
robustly a
briefdelay = 100000 -- 1/10th second
maxretries = 300 :: Int -- 30 seconds of briefdelay
commitDb' :: DbHandle -> SqlPersistM () -> IO (Either SomeException ()) commitDb' :: DbHandle -> SqlPersistM () -> IO (Either SomeException ())
commitDb' (DbHandle _ jobs) a = do commitDb' (DbHandle _ _ jobs) a = do
debug "Database.Handle" "commitDb start" debug "Database.Handle" "commitDb start"
res <- newEmptyMVar res <- newEmptyMVar
putMVar jobs $ ChangeJob $ putMVar jobs $ ChangeJob $
@ -117,7 +125,7 @@ data Job
| ChangeJob (SqlPersistM ()) | ChangeJob (SqlPersistM ())
| CloseJob | CloseJob
workerThread :: T.Text -> TableName -> MVar Job -> IO () workerThread :: RawFilePath -> TableName -> MVar Job -> IO ()
workerThread db tablename jobs = newconn workerThread db tablename jobs = newconn
where where
newconn = do newconn = do
@ -145,44 +153,46 @@ workerThread db tablename jobs = newconn
getjob :: IO (Either BlockedIndefinitelyOnMVar Job) getjob :: IO (Either BlockedIndefinitelyOnMVar Job)
getjob = try $ takeMVar jobs getjob = try $ takeMVar jobs
-- Like runSqlite, but more robust. {- Like runSqlite, but more robust.
-- -
-- New database connections can sometimes take a while to become usable. - New database connections can sometimes take a while to become usable,
-- This may be due to WAL mode recovering after a crash, or perhaps a bug - and selects will fail with ErrorBusy in the meantime. This may be due to
-- like described in blob 500f777a6ab6c45ca5f9790e0a63575f8e3cb88f. - WAL mode recovering after a crash, or a concurrent writer.
-- So, loop until a select succeeds; once one succeeds the connection will - So, wait until a select succeeds; once one succeeds the connection will
-- stay usable. - stay usable.
-- -
-- And sqlite sometimes throws ErrorIO when there's not really an IO problem, - Also sqlite sometimes throws ErrorIO when there's not really an IO
-- but perhaps just a short read(). That's caught and retried several times. - problem, but perhaps just a short read(). So also retry on ErrorIO.
runSqliteRobustly :: TableName -> T.Text -> (SqlPersistM a) -> IO a -
- Retries repeatedly for up to 60 seconds. Part that point, it continues
- retrying only if the database shows signs of being modified by another
- process at least once each 30 seconds.
-}
runSqliteRobustly :: TableName -> RawFilePath -> (SqlPersistM a) -> IO a
runSqliteRobustly tablename db a = do runSqliteRobustly tablename db a = do
conn <- opensettle maxretries conn <- opensettle maxretries emptyDatabaseInodeCache
go conn maxretries go conn maxretries emptyDatabaseInodeCache
where where
maxretries = 100 :: Int go conn retries ic = do
rethrow msg e = throwIO $ userError $ show e ++ "(" ++ msg ++ ")"
go conn retries = do
r <- try $ runResourceT $ runNoLoggingT $ r <- try $ runResourceT $ runNoLoggingT $
withSqlConnRobustly (wrapConnection conn) $ withSqlConnRobustly db (wrapConnection conn) $
runSqlConn a runSqlConn a
case r of case r of
Right v -> return v Right v -> return v
Left ex@(Sqlite.SqliteException { Sqlite.seError = e }) Left ex@(Sqlite.SqliteException { Sqlite.seError = e })
| e == Sqlite.ErrorIO -> | e == Sqlite.ErrorIO -> do
let retries' = retries - 1 briefdelay
in if retries' < 1 retryHelper "access" ex maxretries db retries ic $
then rethrow "after successful open" ex go conn
else go conn retries' | otherwise -> rethrow $ errmsg "after successful open" ex
| otherwise -> rethrow "after successful open" ex
opensettle retries = do opensettle retries ic = do
conn <- Sqlite.open db conn <- Sqlite.open tdb
settle conn retries settle conn retries ic
settle conn retries = do tdb = T.pack (fromRawFilePath db)
settle conn retries ic = do
r <- try $ do r <- try $ do
stmt <- Sqlite.prepare conn nullselect stmt <- Sqlite.prepare conn nullselect
void $ Sqlite.step stmt void $ Sqlite.step stmt
@ -190,27 +200,27 @@ runSqliteRobustly tablename db a = do
case r of case r of
Right _ -> return conn Right _ -> return conn
Left ex@(Sqlite.SqliteException { Sqlite.seError = e }) Left ex@(Sqlite.SqliteException { Sqlite.seError = e })
| e == Sqlite.ErrorBusy -> do | e == Sqlite.ErrorBusy || e == Sqlite.ErrorIO -> do
-- Wait and retry any number of times; it when (e == Sqlite.ErrorIO) $
-- will stop being busy eventually.
briefdelay
settle conn retries
| e == Sqlite.ErrorIO -> do
-- Could be a real IO error,
-- so don't retry indefinitely.
Sqlite.close conn Sqlite.close conn
briefdelay briefdelay
let retries' = retries - 1 retryHelper "open" ex maxretries db retries ic $
if retries' < 1 if e == Sqlite.ErrorIO
then rethrow "while opening database connection" ex then opensettle
else opensettle retries' else settle conn
| otherwise -> rethrow "while opening database connection" ex | otherwise -> rethrow $ errmsg "while opening database connection" ex
-- This should succeed for any table. -- This should succeed for any table.
nullselect = T.pack $ "SELECT null from " ++ tablename ++ " limit 1" nullselect = T.pack $ "SELECT null from " ++ tablename ++ " limit 1"
briefdelay = threadDelay 1000 -- 1/1000th second briefdelay = threadDelay 1000 -- 1/1000th second
maxretries = 30000 :: Int -- 30 seconds of briefdelays
rethrow = throwIO . userError
errmsg msg e = show e ++ "(" ++ msg ++ ")"
-- Like withSqlConn, but more robust. -- Like withSqlConn, but more robust.
withSqlConnRobustly withSqlConnRobustly
:: (MonadUnliftIO m :: (MonadUnliftIO m
@ -219,45 +229,99 @@ withSqlConnRobustly
, BaseBackend backend ~ SqlBackend , BaseBackend backend ~ SqlBackend
, BackendCompatible SqlBackend backend , BackendCompatible SqlBackend backend
) )
=> (LogFunc -> IO backend) => RawFilePath
-> (LogFunc -> IO backend)
-> (backend -> m a) -> (backend -> m a)
-> m a -> m a
withSqlConnRobustly open f = do withSqlConnRobustly db open f = do
logFunc <- askLoggerIO logFunc <- askLoggerIO
withRunInIO $ \run -> bracket withRunInIO $ \run -> bracket
(open logFunc) (open logFunc)
closeRobustly (closeRobustly db)
(run . f) (run . f)
-- Sqlite can throw ErrorBusy while closing a database; this catches {- Sqlite can throw ErrorBusy while closing a database; this catches
-- the exception and retries. - the exception and retries.
-
- Retries repeatedly for up to 60 seconds. Part that point, it continues
- retrying only if the database shows signs of being modified by another
- process at least once each 30 seconds.
-}
closeRobustly closeRobustly
:: (IsPersistBackend backend :: (IsPersistBackend backend
, BaseBackend backend ~ SqlBackend , BaseBackend backend ~ SqlBackend
, BackendCompatible SqlBackend backend , BackendCompatible SqlBackend backend
) )
=> backend => RawFilePath
-> backend
-> IO () -> IO ()
closeRobustly conn = go maxretries briefdelay closeRobustly db conn = go maxretries emptyDatabaseInodeCache
where where
briefdelay = 1000 -- 1/1000th second go retries ic = do
-- Try up to 14 times; with the delay doubling each time,
-- the maximum delay before giving up is 16 seconds.
maxretries = 14 :: Int
go retries delay = do
r <- try $ close' conn r <- try $ close' conn
case r of case r of
Right () -> return () Right () -> return ()
Left ex@(Sqlite.SqliteException { Sqlite.seError = e }) Left ex@(Sqlite.SqliteException { Sqlite.seError = e })
| e == Sqlite.ErrorBusy -> do | e == Sqlite.ErrorBusy -> do
threadDelay delay threadDelay briefdelay
let delay' = delay * 2 retryHelper "close" ex maxretries db retries ic go
| otherwise -> rethrow $ errmsg "while closing database connection" ex
briefdelay = 1000 -- 1/1000th second
maxretries = 30000 :: Int -- 30 seconds of briefdelays
rethrow = throwIO . userError
errmsg msg e = show e ++ "(" ++ msg ++ ")"
{- Retries a sqlite action repeatedly, but not forever. Detects situations
- when another git-annex process is suspended and has the database locked,
- and eventually gives up. The retries is the current number of retries
- that are left. The maxretries is how many retries to make each time
- the database is seen to have been modified by some other process.
-}
retryHelper
:: Show err
=> String
-> err
-> Int
-> RawFilePath
-> Int
-> DatabaseInodeCache
-> (Int -> DatabaseInodeCache -> IO a)
-> IO a
retryHelper action err maxretries db retries ic a = do
let retries' = retries - 1 let retries' = retries - 1
if retries' < 1 if retries' < 1
then rethrow "while closing database connection" ex then do
else go retries' delay' ic' <- getDatabaseInodeCache db
| otherwise -> rethrow "while closing database connection" ex if isDatabaseModified ic ic'
then a maxretries ic'
else giveup (databaseAccessStalledMsg action db err)
else a retries' ic
rethrow msg e = throwIO $ userError $ show e ++ "(" ++ msg ++ ")" databaseAccessStalledMsg :: Show err => String -> RawFilePath -> err -> String
databaseAccessStalledMsg action db err =
"Repeatedly unable to " ++ action ++ " sqlite database " ++ fromRawFilePath db
++ ": " ++ show err ++ ". "
++ "Perhaps another git-annex process is suspended and is "
++ "keeping this database locked?"
data DatabaseInodeCache = DatabaseInodeCache (Maybe InodeCache) (Maybe InodeCache)
emptyDatabaseInodeCache :: DatabaseInodeCache
emptyDatabaseInodeCache = DatabaseInodeCache Nothing Nothing
getDatabaseInodeCache :: RawFilePath -> IO DatabaseInodeCache
getDatabaseInodeCache db = DatabaseInodeCache
<$> genInodeCache db noTSDelta
<*> genInodeCache (db <> "-wal") noTSDelta
isDatabaseModified :: DatabaseInodeCache -> DatabaseInodeCache -> Bool
isDatabaseModified (DatabaseInodeCache a1 b1) (DatabaseInodeCache a2 b2) =
ismodified a1 a2 || ismodified b1 b2
where
ismodified (Just a) (Just b) = not (compareStrong a b)
ismodified Nothing Nothing = False
ismodified _ _ = True

View file

@ -1,6 +1,6 @@
{- Sqlite database of information about Keys {- Sqlite database of information about Keys
- -
- Copyright 2015-2021 Joey Hess <id@joeyh.name> - Copyright 2015-2022 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -12,6 +12,7 @@
module Database.Keys ( module Database.Keys (
DbHandle, DbHandle,
closeDb, closeDb,
flushDb,
addAssociatedFile, addAssociatedFile,
getAssociatedFiles, getAssociatedFiles,
getAssociatedFilesIncluding, getAssociatedFilesIncluding,
@ -24,11 +25,13 @@ module Database.Keys (
removeInodeCache, removeInodeCache,
isInodeKnown, isInodeKnown,
runWriter, runWriter,
updateDatabase,
) where ) where
import qualified Database.Keys.SQL as SQL import qualified Database.Keys.SQL as SQL
import Database.Types import Database.Types
import Database.Keys.Handle import Database.Keys.Handle
import Database.Keys.Tables
import qualified Database.Queue as H import qualified Database.Queue as H
import Database.Init import Database.Init
import Annex.Locations import Annex.Locations
@ -63,49 +66,53 @@ import Control.Concurrent.Async
- If the database is already open, any writes are flushed to it, to ensure - If the database is already open, any writes are flushed to it, to ensure
- consistency. - consistency.
- -
- Any queued writes will be flushed before the read. - Any queued writes to the table will be flushed before the read.
-} -}
runReader :: Monoid v => (SQL.ReadHandle -> Annex v) -> Annex v runReader :: Monoid v => DbTable -> (SQL.ReadHandle -> Annex v) -> Annex v
runReader a = do runReader t a = do
h <- Annex.getRead Annex.keysdbhandle h <- Annex.getRead Annex.keysdbhandle
withDbState h go withDbState h go
where where
go DbUnavailable = return (mempty, DbUnavailable) go DbUnavailable = return (mempty, DbUnavailable)
go st@(DbOpen qh) = do go (DbOpen (qh, tableschanged)) = do
tableschanged' <- if isDbTableChanged tableschanged t
then do
liftIO $ H.flushDbQueue qh liftIO $ H.flushDbQueue qh
return mempty
else return tableschanged
v <- a (SQL.ReadHandle qh) v <- a (SQL.ReadHandle qh)
return (v, st) return (v, DbOpen (qh, tableschanged'))
go DbClosed = do go DbClosed = do
st' <- openDb False DbClosed st <- openDb False DbClosed
v <- case st' of v <- case st of
(DbOpen qh) -> a (SQL.ReadHandle qh) (DbOpen (qh, _)) -> a (SQL.ReadHandle qh)
_ -> return mempty _ -> return mempty
return (v, st') return (v, st)
runReaderIO :: Monoid v => (SQL.ReadHandle -> IO v) -> Annex v runReaderIO :: Monoid v => DbTable -> (SQL.ReadHandle -> IO v) -> Annex v
runReaderIO a = runReader (liftIO . a) runReaderIO t a = runReader t (liftIO . a)
{- Runs an action that writes to the database. Typically this is used to {- Runs an action that writes to the database. Typically this is used to
- queue changes, which will be flushed at a later point. - queue changes, which will be flushed at a later point.
- -
- The database is created if it doesn't exist yet. -} - The database is created if it doesn't exist yet. -}
runWriter :: (SQL.WriteHandle -> Annex ()) -> Annex () runWriter :: DbTable -> (SQL.WriteHandle -> Annex ()) -> Annex ()
runWriter a = do runWriter t a = do
h <- Annex.getRead Annex.keysdbhandle h <- Annex.getRead Annex.keysdbhandle
withDbState h go withDbState h go
where where
go st@(DbOpen qh) = do go (DbOpen (qh, tableschanged)) = do
v <- a (SQL.WriteHandle qh) v <- a (SQL.WriteHandle qh)
return (v, st) return (v, DbOpen (qh, addDbTable tableschanged t))
go st = do go st = do
st' <- openDb True st st' <- openDb True st
v <- case st' of v <- case st' of
DbOpen qh -> a (SQL.WriteHandle qh) DbOpen (qh, _) -> a (SQL.WriteHandle qh)
_ -> error "internal" _ -> error "internal"
return (v, st') return (v, st')
runWriterIO :: (SQL.WriteHandle -> IO ()) -> Annex () runWriterIO :: DbTable -> (SQL.WriteHandle -> IO ()) -> Annex ()
runWriterIO a = runWriter (liftIO . a) runWriterIO t a = runWriter t (liftIO . a)
{- Opens the database, creating it if it doesn't exist yet. {- Opens the database, creating it if it doesn't exist yet.
- -
@ -138,26 +145,29 @@ openDb forwrite _ = do
open db = do open db = do
qh <- liftIO $ H.openDbQueue db SQL.containedTable qh <- liftIO $ H.openDbQueue db SQL.containedTable
reconcileStaged qh tc <- reconcileStaged qh
return $ DbOpen qh return $ DbOpen (qh, tc)
{- Closes the database if it was open. Any writes will be flushed to it. {- Closes the database if it was open. Any writes will be flushed to it.
- -
- This does not normally need to be called; the database will auto-close - This does not prevent further use of the database; it will be re-opened
- when the handle is garbage collected. However, this can be used to - as necessary.
- force a re-read of the database, in case another process has written
- data to it.
-} -}
closeDb :: Annex () closeDb :: Annex ()
closeDb = liftIO . closeDbHandle =<< Annex.getRead Annex.keysdbhandle closeDb = liftIO . closeDbHandle =<< Annex.getRead Annex.keysdbhandle
{- Flushes any queued writes to the database. -}
flushDb :: Annex ()
flushDb = liftIO . flushDbQueue =<< Annex.getRead Annex.keysdbhandle
addAssociatedFile :: Key -> TopFilePath -> Annex () addAssociatedFile :: Key -> TopFilePath -> Annex ()
addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile k f addAssociatedFile k f = runWriterIO AssociatedTable $ SQL.addAssociatedFile k f
{- Note that the files returned were once associated with the key, but {- Note that the files returned were once associated with the key, but
- some of them may not be any longer. -} - some of them may not be any longer. -}
getAssociatedFiles :: Key -> Annex [TopFilePath] getAssociatedFiles :: Key -> Annex [TopFilePath]
getAssociatedFiles k = emptyWhenBare $ runReaderIO $ SQL.getAssociatedFiles k getAssociatedFiles k = emptyWhenBare $ runReaderIO AssociatedTable $
SQL.getAssociatedFiles k
{- Queries for associated files never return anything when in a bare {- Queries for associated files never return anything when in a bare
- repository, since without a work tree there can be no associated files. - repository, since without a work tree there can be no associated files.
@ -183,10 +193,12 @@ getAssociatedFilesIncluding afile k = emptyWhenBare $ do
{- Gets any keys that are on record as having a particular associated file. {- Gets any keys that are on record as having a particular associated file.
- (Should be one or none but the database doesn't enforce that.) -} - (Should be one or none but the database doesn't enforce that.) -}
getAssociatedKey :: TopFilePath -> Annex [Key] getAssociatedKey :: TopFilePath -> Annex [Key]
getAssociatedKey f = emptyWhenBare $ runReaderIO $ SQL.getAssociatedKey f getAssociatedKey f = emptyWhenBare $ runReaderIO AssociatedTable $
SQL.getAssociatedKey f
removeAssociatedFile :: Key -> TopFilePath -> Annex () removeAssociatedFile :: Key -> TopFilePath -> Annex ()
removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile k removeAssociatedFile k = runWriterIO AssociatedTable .
SQL.removeAssociatedFile k
{- Stats the files, and stores their InodeCaches. -} {- Stats the files, and stores their InodeCaches. -}
storeInodeCaches :: Key -> [RawFilePath] -> Annex () storeInodeCaches :: Key -> [RawFilePath] -> Annex ()
@ -195,7 +207,7 @@ storeInodeCaches k fs = withTSDelta $ \d ->
=<< liftIO (mapM (\f -> genInodeCache f d) fs) =<< liftIO (mapM (\f -> genInodeCache f d) fs)
addInodeCaches :: Key -> [InodeCache] -> Annex () addInodeCaches :: Key -> [InodeCache] -> Annex ()
addInodeCaches k is = runWriterIO $ SQL.addInodeCaches k is addInodeCaches k is = runWriterIO ContentTable $ SQL.addInodeCaches k is
{- A key may have multiple InodeCaches; one for the annex object, and one {- A key may have multiple InodeCaches; one for the annex object, and one
- for each pointer file that is a copy of it. - for each pointer file that is a copy of it.
@ -207,18 +219,19 @@ addInodeCaches k is = runWriterIO $ SQL.addInodeCaches k is
- for pointer files, but none recorded for the annex object. - for pointer files, but none recorded for the annex object.
-} -}
getInodeCaches :: Key -> Annex [InodeCache] getInodeCaches :: Key -> Annex [InodeCache]
getInodeCaches = runReaderIO . SQL.getInodeCaches getInodeCaches = runReaderIO ContentTable . SQL.getInodeCaches
{- Remove all inodes cached for a key. -} {- Remove all inodes cached for a key. -}
removeInodeCaches :: Key -> Annex () removeInodeCaches :: Key -> Annex ()
removeInodeCaches = runWriterIO . SQL.removeInodeCaches removeInodeCaches = runWriterIO ContentTable . SQL.removeInodeCaches
{- Remove cached inodes, for any key. -} {- Remove cached inodes, for any key. -}
removeInodeCache :: InodeCache -> Annex () removeInodeCache :: InodeCache -> Annex ()
removeInodeCache = runWriterIO . SQL.removeInodeCache removeInodeCache = runWriterIO ContentTable . SQL.removeInodeCache
isInodeKnown :: InodeCache -> SentinalStatus -> Annex Bool isInodeKnown :: InodeCache -> SentinalStatus -> Annex Bool
isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s) isInodeKnown i s = or <$> runReaderIO ContentTable
((:[]) <$$> SQL.isInodeKnown i s)
{- Looks at staged changes to annexed files, and updates the keys database, {- Looks at staged changes to annexed files, and updates the keys database,
- so that its information is consistent with the state of the repository. - so that its information is consistent with the state of the repository.
@ -247,18 +260,21 @@ isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s)
- So when using getAssociatedFiles, have to make sure the file still - So when using getAssociatedFiles, have to make sure the file still
- is an associated file. - is an associated file.
-} -}
reconcileStaged :: H.DbQueue -> Annex () reconcileStaged :: H.DbQueue -> Annex DbTablesChanged
reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo)
( return mempty
, do
gitindex <- inRepo currentIndexFile gitindex <- inRepo currentIndexFile
indexcache <- fromRawFilePath <$> calcRepo' gitAnnexKeysDbIndexCache indexcache <- fromRawFilePath <$> calcRepo' gitAnnexKeysDbIndexCache
withTSDelta (liftIO . genInodeCache gitindex) >>= \case withTSDelta (liftIO . genInodeCache gitindex) >>= \case
Just cur -> readindexcache indexcache >>= \case Just cur -> readindexcache indexcache >>= \case
Nothing -> go cur indexcache =<< getindextree Nothing -> go cur indexcache =<< getindextree
Just prev -> ifM (compareInodeCaches prev cur) Just prev -> ifM (compareInodeCaches prev cur)
( noop ( return mempty
, go cur indexcache =<< getindextree , go cur indexcache =<< getindextree
) )
Nothing -> noop Nothing -> return mempty
)
where where
lastindexref = Ref "refs/annex/last-index" lastindexref = Ref "refs/annex/last-index"
@ -283,6 +299,7 @@ reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do
-- against next time. -- against next time.
inRepo $ update' lastindexref newtree inRepo $ update' lastindexref newtree
fastDebug "Database.Keys" "reconcileStaged end" fastDebug "Database.Keys" "reconcileStaged end"
return (DbTablesChanged True True)
-- git write-tree will fail if the index is locked or when there is -- git write-tree will fail if the index is locked or when there is
-- a merge conflict. To get up-to-date with the current index, -- a merge conflict. To get up-to-date with the current index,
-- diff --staged with the old index tree. The current index tree -- diff --staged with the old index tree. The current index tree
@ -304,6 +321,7 @@ reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do
void $ updatetodiff g Nothing "--staged" void $ updatetodiff g Nothing "--staged"
(procmergeconflictdiff mdfeeder) (procmergeconflictdiff mdfeeder)
fastDebug "Database.Keys" "reconcileStaged end" fastDebug "Database.Keys" "reconcileStaged end"
return (DbTablesChanged True True)
updatetodiff g old new processor = do updatetodiff g old new processor = do
(l, cleanup) <- pipeNullSplit' (diff old new) g (l, cleanup) <- pipeNullSplit' (diff old new) g
@ -479,3 +497,9 @@ reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do
largediff :: Int largediff :: Int
largediff = 1000 largediff = 1000
{- Normally the keys database is updated incrementally when opened,
- by reconcileStaged. Calling this explicitly allows running the
- update at an earlier point.
-}
updateDatabase :: Annex ()
updateDatabase = runWriter ContentTable (const noop)

View file

@ -15,6 +15,7 @@ module Database.Keys.Handle (
) where ) where
import qualified Database.Queue as H import qualified Database.Queue as H
import Database.Keys.Tables
import Utility.Exception import Utility.Exception
import Utility.DebugLocks import Utility.DebugLocks
@ -29,7 +30,7 @@ newtype DbHandle = DbHandle (MVar DbState)
-- The database can be closed or open, but it also may have been -- The database can be closed or open, but it also may have been
-- tried to open (for read) and didn't exist yet or is not readable. -- tried to open (for read) and didn't exist yet or is not readable.
data DbState = DbClosed | DbOpen H.DbQueue | DbUnavailable data DbState = DbClosed | DbOpen (H.DbQueue, DbTablesChanged) | DbUnavailable
newDbHandle :: IO DbHandle newDbHandle :: IO DbHandle
newDbHandle = DbHandle <$> newMVar DbClosed newDbHandle = DbHandle <$> newMVar DbClosed
@ -52,15 +53,17 @@ withDbState (DbHandle mvar) a = do
return v return v
flushDbQueue :: DbHandle -> IO () flushDbQueue :: DbHandle -> IO ()
flushDbQueue (DbHandle mvar) = go =<< debugLocks (readMVar mvar) flushDbQueue h = withDbState h go
where where
go (DbOpen qh) = H.flushDbQueue qh go (DbOpen (qh, _)) = do
go _ = return () H.flushDbQueue qh
return ((), DbOpen (qh, mempty))
go st = return ((), st)
closeDbHandle :: DbHandle -> IO () closeDbHandle :: DbHandle -> IO ()
closeDbHandle h = withDbState h go closeDbHandle h = withDbState h go
where where
go (DbOpen qh) = do go (DbOpen (qh, _)) = do
H.closeDbQueue qh H.closeDbQueue qh
return ((), DbClosed) return ((), DbClosed)
go st = return ((), st) go st = return ((), st)

38
Database/Keys/Tables.hs Normal file
View file

@ -0,0 +1,38 @@
{- Keeping track of which tables in the keys database have changed
-
- Copyright 2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Database.Keys.Tables where
import Data.Monoid
import qualified Data.Semigroup as Sem
import Prelude
data DbTable = AssociatedTable | ContentTable
deriving (Eq, Show)
data DbTablesChanged = DbTablesChanged
{ associatedTable :: Bool
, contentTable :: Bool
}
deriving (Show)
instance Sem.Semigroup DbTablesChanged where
a <> b = DbTablesChanged
{ associatedTable = associatedTable a || associatedTable b
, contentTable = contentTable a || contentTable b
}
instance Monoid DbTablesChanged where
mempty = DbTablesChanged False False
addDbTable :: DbTablesChanged -> DbTable -> DbTablesChanged
addDbTable ts AssociatedTable = ts { associatedTable = True }
addDbTable ts ContentTable = ts { contentTable = True }
isDbTableChanged :: DbTablesChanged -> DbTable -> Bool
isDbTableChanged ts AssociatedTable = associatedTable ts
isDbTableChanged ts ContentTable = contentTable ts

View file

@ -355,7 +355,8 @@ tryGitConfigRead autoinit r hasuuid
":" ++ show e ":" ++ show e
Annex.getState Annex.repo Annex.getState Annex.repo
s <- newLocal r s <- newLocal r
liftIO $ Annex.eval s $ check `finally` stopCoProcesses liftIO $ Annex.eval s $ check
`finally` quiesce True
failedreadlocalconfig = do failedreadlocalconfig = do
unless hasuuid $ case Git.remoteName r of unless hasuuid $ case Git.remoteName r of
@ -449,7 +450,6 @@ dropKey' repo r st@(State connpool duc _ _ _) key
Annex.Content.lockContentForRemoval key cleanup $ \lock -> do Annex.Content.lockContentForRemoval key cleanup $ \lock -> do
Annex.Content.removeAnnex lock Annex.Content.removeAnnex lock
cleanup cleanup
Annex.Content.saveState True
, giveup "remote does not have expected annex.uuid value" , giveup "remote does not have expected annex.uuid value"
) )
| Git.repoIsHttp repo = giveup "dropping from http remote not supported" | Git.repoIsHttp repo = giveup "dropping from http remote not supported"
@ -577,11 +577,9 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate
let checksuccess = liftIO checkio >>= \case let checksuccess = liftIO checkio >>= \case
Just err -> giveup err Just err -> giveup err
Nothing -> return True Nothing -> return True
res <- logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file $ \dest -> logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file $ \dest ->
metered (Just (combineMeterUpdate meterupdate p)) key bwlimit $ \_ p' -> metered (Just (combineMeterUpdate meterupdate p)) key bwlimit $ \_ p' ->
copier object (fromRawFilePath dest) key p' checksuccess verify copier object (fromRawFilePath dest) key p' checksuccess verify
Annex.Content.saveState True
return res
) )
unless res $ unless res $
giveup "failed to send content to remote" giveup "failed to send content to remote"
@ -606,7 +604,7 @@ repairRemote r a = return $ do
Annex.eval s $ do Annex.eval s $ do
Annex.BranchState.disableUpdate Annex.BranchState.disableUpdate
ensureInitialized (pure []) ensureInitialized (pure [])
a `finally` stopCoProcesses a `finally` quiesce True
data LocalRemoteAnnex = LocalRemoteAnnex Git.Repo (MVar [(Annex.AnnexState, Annex.AnnexRead)]) data LocalRemoteAnnex = LocalRemoteAnnex Git.Repo (MVar [(Annex.AnnexState, Annex.AnnexRead)])
@ -618,8 +616,8 @@ mkLocalRemoteAnnex repo = LocalRemoteAnnex repo <$> liftIO (newMVar [])
{- Runs an action from the perspective of a local remote. {- Runs an action from the perspective of a local remote.
- -
- The AnnexState is cached for speed and to avoid resource leaks. - The AnnexState is cached for speed and to avoid resource leaks.
- However, coprocesses are stopped after each call to avoid git - However, it is quiesced after each call to avoid git processes
- processes hanging around on removable media. - hanging around on removable media.
- -
- The remote will be automatically initialized/upgraded first, - The remote will be automatically initialized/upgraded first,
- when possible. - when possible.
@ -655,7 +653,7 @@ onLocal' (LocalRemoteAnnex repo mv) a = liftIO (takeMVar mv) >>= \case
go ((st, rd), a') = do go ((st, rd), a') = do
curro <- Annex.getState Annex.output curro <- Annex.getState Annex.output
let act = Annex.run (st { Annex.output = curro }, rd) $ let act = Annex.run (st { Annex.output = curro }, rd) $
a' `finally` stopCoProcesses a' `finally` quiesce True
(ret, (st', _rd)) <- liftIO $ act `onException` cache (st, rd) (ret, (st', _rd)) <- liftIO $ act `onException` cache (st, rd)
liftIO $ cache (st', rd) liftIO $ cache (st', rd)
return ret return ret

View file

@ -225,7 +225,7 @@ gen r u rc gc rs = do
, renameExport = renameExportS3 hdl this rs info , renameExport = renameExportS3 hdl this rs info
} }
, importActions = ImportActions , importActions = ImportActions
{ listImportableContents = listImportableContentsS3 hdl this info { listImportableContents = listImportableContentsS3 hdl this info c
, importKey = Nothing , importKey = Nothing
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierS3 hdl this rs info , retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierS3 hdl this rs info
, storeExportWithContentIdentifier = storeExportWithContentIdentifierS3 hdl this rs info magic , storeExportWithContentIdentifier = storeExportWithContentIdentifierS3 hdl this rs info magic
@ -561,8 +561,8 @@ renameExportS3 hv r rs info k src dest = Just <$> go
srcobject = T.pack $ bucketExportLocation info src srcobject = T.pack $ bucketExportLocation info src
dstobject = T.pack $ bucketExportLocation info dest dstobject = T.pack $ bucketExportLocation info dest
listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize))) listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> ParsedRemoteConfig -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
listImportableContentsS3 hv r info = listImportableContentsS3 hv r info c =
withS3Handle hv $ \case withS3Handle hv $ \case
Nothing -> giveup $ needS3Creds (uuid r) Nothing -> giveup $ needS3Creds (uuid r)
Just h -> Just <$> go h Just h -> Just <$> go h
@ -571,6 +571,8 @@ listImportableContentsS3 hv r info =
ic <- liftIO $ runResourceT $ extractFromResourceT =<< startlist h ic <- liftIO $ runResourceT $ extractFromResourceT =<< startlist h
return (ImportableContentsComplete ic) return (ImportableContentsComplete ic)
fileprefix = T.pack <$> getRemoteConfigValue fileprefixField c
startlist h startlist h
| versioning info = do | versioning info = do
rsp <- sendS3Handle h $ rsp <- sendS3Handle h $
@ -578,7 +580,8 @@ listImportableContentsS3 hv r info =
continuelistversioned h [] rsp continuelistversioned h [] rsp
| otherwise = do | otherwise = do
rsp <- sendS3Handle h $ rsp <- sendS3Handle h $
S3.getBucket (bucket info) (S3.getBucket (bucket info))
{ S3.gbPrefix = fileprefix }
continuelistunversioned h [] rsp continuelistunversioned h [] rsp
continuelistunversioned h l rsp continuelistunversioned h l rsp
@ -586,6 +589,7 @@ listImportableContentsS3 hv r info =
rsp' <- sendS3Handle h $ rsp' <- sendS3Handle h $
(S3.getBucket (bucket info)) (S3.getBucket (bucket info))
{ S3.gbMarker = S3.gbrNextMarker rsp { S3.gbMarker = S3.gbrNextMarker rsp
, S3.gbPrefix = fileprefix
} }
continuelistunversioned h (rsp:l) rsp' continuelistunversioned h (rsp:l) rsp'
| otherwise = return $ | otherwise = return $
@ -597,6 +601,7 @@ listImportableContentsS3 hv r info =
(S3.getBucketObjectVersions (bucket info)) (S3.getBucketObjectVersions (bucket info))
{ S3.gbovKeyMarker = S3.gbovrNextKeyMarker rsp { S3.gbovKeyMarker = S3.gbovrNextKeyMarker rsp
, S3.gbovVersionIdMarker = S3.gbovrNextVersionIdMarker rsp , S3.gbovVersionIdMarker = S3.gbovrNextVersionIdMarker rsp
, S3.gbovPrefix = fileprefix
} }
continuelistversioned h (rsp:l) rsp' continuelistversioned h (rsp:l) rsp'
| otherwise = return $ | otherwise = return $

View file

@ -130,9 +130,9 @@ tests n crippledfilesystem adjustedbranchok opts =
: concatMap mkrepotests testmodes : concatMap mkrepotests testmodes
where where
testmodes = catMaybes testmodes = catMaybes
[ canadjust ("v8 adjusted unlocked branch", (testMode opts (RepoVersion 8)) { adjustedUnlockedBranch = True }) [ canadjust ("v10 adjusted unlocked branch", (testMode opts (RepoVersion 10)) { adjustedUnlockedBranch = True })
, unlesscrippled ("v8 unlocked", (testMode opts (RepoVersion 8)) { unlockedFiles = True }) , unlesscrippled ("v10 unlocked", (testMode opts (RepoVersion 10)) { unlockedFiles = True })
, unlesscrippled ("v8 locked", testMode opts (RepoVersion 8)) , unlesscrippled ("v10 locked", testMode opts (RepoVersion 10))
] ]
remotetestmode = testMode opts (RepoVersion 8) remotetestmode = testMode opts (RepoVersion 8)
unlesscrippled v unlesscrippled v

View file

@ -270,9 +270,10 @@ data ExportActions a = ExportActions
-- Can throw exception if unable to access remote, or if remote -- Can throw exception if unable to access remote, or if remote
-- refuses to remove the content. -- refuses to remove the content.
, removeExport :: Key -> ExportLocation -> a () , removeExport :: Key -> ExportLocation -> a ()
-- Set when the content of a Key stored in the remote to an -- Set when the remote is versioned, so once a Key is stored
-- ExportLocation and then removed with removeExport remains -- to an ExportLocation, a subsequent deletion of that
-- accessible to retrieveKeyFile and checkPresent. -- ExportLocation leaves the key still accessible to retrieveKeyFile
-- and checkPresent.
, versionedExport :: Bool , versionedExport :: Bool
-- Removes an exported directory. Typically the directory will be -- Removes an exported directory. Typically the directory will be
-- empty, but it could possibly contain files or other directories, -- empty, but it could possibly contain files or other directories,

View file

@ -16,6 +16,7 @@ import Types.Upgrade
import Annex.CatFile import Annex.CatFile
import qualified Database.Keys import qualified Database.Keys
import qualified Database.Keys.SQL import qualified Database.Keys.SQL
import Database.Keys.Tables
import qualified Git.LsFiles as LsFiles import qualified Git.LsFiles as LsFiles
import qualified Git import qualified Git
import Git.FilePath import Git.FilePath
@ -114,8 +115,9 @@ populateKeysDb = unlessM isBareRepo $ do
Nothing -> noop Nothing -> noop
Just k -> do Just k -> do
topf <- inRepo $ toTopFilePath $ toRawFilePath f topf <- inRepo $ toTopFilePath $ toRawFilePath f
Database.Keys.runWriter $ \h -> liftIO $ do Database.Keys.runWriter AssociatedTable $ \h -> liftIO $
Database.Keys.SQL.addAssociatedFile k topf h Database.Keys.SQL.addAssociatedFile k topf h
Database.Keys.runWriter ContentTable $ \h -> liftIO $
Database.Keys.SQL.addInodeCaches k [ic] h Database.Keys.SQL.addInodeCaches k [ic] h
liftIO $ void cleanup liftIO $ void cleanup
Database.Keys.closeDb Database.Keys.closeDb

View file

@ -57,9 +57,9 @@ describePasswordPrompt' :: Maybe SuCommand -> Maybe String
describePasswordPrompt' (Just (SuCommand p _ _)) = describePasswordPrompt p describePasswordPrompt' (Just (SuCommand p _ _)) = describePasswordPrompt p
describePasswordPrompt' Nothing = Nothing describePasswordPrompt' Nothing = Nothing
runSuCommand :: (Maybe SuCommand) -> IO Bool runSuCommand :: (Maybe SuCommand) -> Maybe [(String, String)] -> IO Bool
runSuCommand (Just (SuCommand _ cmd ps)) = boolSystem cmd ps runSuCommand (Just (SuCommand _ cmd ps)) environ = boolSystemEnv cmd ps environ
runSuCommand Nothing = return False runSuCommand Nothing _ = return False
-- Generates a SuCommand that runs a command as root, fairly portably. -- Generates a SuCommand that runs a command as root, fairly portably.
-- --

View file

@ -0,0 +1,51 @@
### Please describe the problem.
### What steps will reproduce the problem?
I have system:
Linux RPI-4B 5.15.74-2-MANJARO-ARM-RPI #1 SMP PREEMPT Thu Oct 20 16:43:17 UTC 2022 aarch64 GNU/Linux
Doesnt't have git-annex in aur and pacman.
So download git-annex-standalone-arm64.tar.gz from web.
./runshell
```git-annex enable-tor (adjusted/master(unlocked)+2) 10:25:22
enable-tor
You may be prompted for a password
git-annex: Failed to run as root: /home/gyurmo/.local/git-annex.linux/bin/git-annex enable-tor 1000
failed
enable-tor: 1 failed```
sudo /home/gyurmo/.local/git-annex.linux/bin/git-annex enable-tor 1000
[sudo] gyurmo jelszava:
/home/gyurmo/.local/git-annex.linux/bin/git-annex: sor: 4: /exe/git-annex: No such file or directory
### What version of git-annex are you using? On what operating system?
git-annex version: 10.20220121-g0bcb94487
build flags: Assistant Webapp Pairing Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite S3 WebDAV
dependency versions: aws-0.22 bloomfilter-2.0.1.0 cryptonite-0.26 DAV-1.3.4 feed-1.3.0.1 ghc-8.8.4 http-client-0.6.4.1 persistent-sqlite-2.10.6.2 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.1.0
key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL X*
remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs httpalso borg hook external
operating system: linux aarch64
supported repository versions: 8 9 10
upgrade supported from repository versions: 0 1 2 3 4 5 6 7 8 9 10
local repository version: 8
### Please provide any additional information below.
[[!format sh """
# If you can, paste a complete transcript of the problem occurring here.
# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log
# End of transcript or log.
"""]]
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
> [[fixed|done]] --[[Joey]]

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2022-10-26T18:53:40Z"
content="""
Using this command instead should work:
sudo /home/gyurmo/.local/git-annex.linux/git-annex enable-tor 1000
"""]]

View file

@ -0,0 +1,7 @@
[[!comment format=mdwn
username="joey"
subject="""comment 2"""
date="2022-10-26T19:44:24Z"
content="""
And I've fixed this problem now.
"""]]

View file

@ -0,0 +1,39 @@
(Sorry for the uninformative title, but I had to work within the character limit.)
### Please describe the problem.
`git-annex metadata` does nothing on Windows if invoked while `git-annex addurl` is in progress on other files.
### What steps will reproduce the problem?
On Windows (but not on Linux or macOS, where everything works fine):
- Start `git-annex addurl` in batch mode
- Feed it two or more URLs
- After reading the completion message for a URL from addurl's stdout, but before reading the remaining output, run `git-annex metadata` in batch mode and try to set the metadata for the file that was just downloaded.
- `git-annex metadata` will output an empty line (i.e., just CR LF), and if nothing further is fed to it, it will exit successfully without printing anything else on stdout or stderr.
- Querying the file's metadata normally after `git-annex addurl` exits will show that no metadata was set for the file.
The Python script at <https://github.com/jwodder/git-annex-bug-20221024/blob/master/mvce.py> (Python 3.8+ required) will run the above steps and show the output from `git-annex metadata`. A sample run under GitHub Actions can be seen at <https://github.com/jwodder/git-annex-bug-20221024/actions/runs/3322463020/jobs/5491516209>; note the following section of the output under "Run script":
```
16:04:04 [DEBUG ] __main__: Opening pipe to: git-annex metadata --batch --json --json-error-messages
16:04:04 [DEBUG ] __main__: Input to metadata: b'{"file": "programming/gameboy.pdf", "fields": {"title": ["GameBoy Programming Manual"]}}\n'
16:04:04 [DEBUG ] __main__: r.returncode=0
16:04:04 [DEBUG ] __main__: r.stdout=b'\r\n'
16:04:04 [DEBUG ] __main__: r.stderr=b''
```
This problem does not always occur, but it seems to occur most of the time. Using `git-annex registerurl` in place of `git-annex metadata` works fine.
### What version of git-annex are you using? On what operating system?
git-annex 10.20221003, provided by datalad/git-annex, on Microsoft Windows Server 2022
### Please provide any additional information below.
This affects a hobby project of mine "gamdam", implemented in [Python](https://github.com/jwodder/gamdam) and [Rust](https://github.com/jwodder/gamdam-rust) — that interacts with git-annex.
[[!meta author=jwodder]]
> [[fixed|done]], see my comments --[[Joey]]

View file

@ -0,0 +1,42 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2022-10-26T16:44:21Z"
content="""
Windows is not needed, this will happen in a
repository where `git annex adjust --unlock` has been run.
A simpler example:
joey@darkstar:~/tmp/t2#master(unlocked)>git-annex addurl --batch
http://google.com/
addurl http://google.com/
(to google.com_) ok
^Z
joey@darkstar:~/tmp/t2#master(unlocked)>git-annex metadata --batch --json
{"file":"google.com_","fields":{"author":["bar"]}}
I'm not sure if this is a bug, because it's documented to output a blank
line when batch mode is provided a file that is not an annexed file, and
the file is not an annexed file yet due to the pointer not yet having been
staged in git. Which is needed, when in an adjusted unlocked branch, for
git-annex to know that this is an annexed file.
When the file is locked, it just stats the symlink, so the fact that the
symlink is not yet staged in git doesn't matter.
It does not seem to make sense to have addurl update the index
after each file it adds, because that would make addurl of a lot
of files unncessarily slow.
So, I think if anything is changed, it would need to be a change to make
the behavior with unlocked files consistent with the behavior with locked
files. Eg, when the symlink is not yet staged in git, treat it as a
non-annexed file. Which is also consistent with other handling of such
files by git-annex when not in batch mode.
The solution for your program, though, seems like it will be to end the
git-annex addurl process before trying to set metadata on just-added files.
Or, alternatively, to use addurl with --json, extract the key, and set the
metadata of the key.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="joey"
subject="""comment 2"""
date="2022-10-26T18:13:42Z"
content="""
I've made --batch handling of unstaged locked files consistent with the
handling of unstaged unlocked files.
"""]]

View file

@ -0,0 +1,16 @@
### Please describe the problem.
Using the S3 special remote with version support enabled (`versioning=yes`) leads to a bit of a strange effect with multiple `annex import` calls.
First import from an existing bucket is fine. Now, if there were changes done to a file in the bucket and one runs `annex import` again, git-annex will record that the old version of said file is gone from the remote, requiring a `git annex fsck` to be called right after that import to fix it again. That seems a bit strange given that this versioning support comes with a "native" special remote.
I suppose that's probably a more general issue with how import/export works, since there's no way for a special remote to communicate to annex whether two different versions of the same file (same remote path, but different key) would overwrite each other. Neither an importtree nor an exporttree remote has a way to tell annex whether and how the availability of a previous key associated with the same remote path was affected.
### What version of git-annex are you using? On what operating system?
Observed with several versions from 8-10 on linux. As laid out above, I strongly suspect this is true for all versions.
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
Lots. I love git-annex.
> [[fixed|done]] --[[Joey]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2022-10-11T16:28:39Z"
content="""
This looks like a simple fix. After importing from a versioned remote,
it can just skip updating the location logs to remove the keys that are not
present in the current tree. The same as is already done when exporting
to a versioned remote. I've made that change.
"""]]

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="benjamin.poldrack@d09ccff6d42dd20277610b59867cf7462927b8e3"
nickname="benjamin.poldrack"
avatar="http://cdn.libravatar.org/avatar/5c1a901caa7c2cfeeb7e17e786c5230d"
subject="comment 2"
date="2022-10-12T06:12:17Z"
content="""
Thank you!
"""]]

View file

@ -19,4 +19,10 @@ an ever-growing amount of memory, and be slowed down by the write attempts.
Still, it does give it something better to do while the write is failing Still, it does give it something better to do while the write is failing
than sleeping and retrying, eg to do the rest of the work it's been asked than sleeping and retrying, eg to do the rest of the work it's been asked
to do. to do.
(Update: Reads from a database first call flushDbQueue, and it would
not be safe for that to return without actually writing to the database,
since the read would then see possible stale information. It turns
out that `git-annex get` does do a database read per file (getAssociatedFiles).
So it seems this approach will not work.)
"""]] """]]

View file

@ -0,0 +1,42 @@
[[!comment format=mdwn
username="joey"
subject="""comment 25"""
date="2022-10-11T17:17:13Z"
content="""
Revisiting this, I don't understand why the htop at the top of this page
lists so many `git-annex get` processes. They all seem to be children
of a single parent `git-annex get` process. But `git-annex get` does
not fork children like those. I wonder if perhaps those child
processes were forked() to do something else, but somehow hung before
they could exec?!
By comment #2, annex.stalldetection is enabled, so `git-annex get`
runs 5 `git-annex transferrer` processes. Each of those can write to the
database, so concurrent sqlite writes can happen. So, my "re: comment 16"
comment was off-base in thinking there was a single git-annex process.
And so, I don't think the debug info requested in that comment is needed.
Also, it turns out that the database queue is being flushed after every
file it gets, which is causing a sqlite write per file. So there are a
lot of sqlite writes happening, which probably makes this issue much more
likely to occur, on systems with slow enough disk IO that it does occur.
Especially if the files are relatively small.
The reason for the queue flush is partly that Annex.run forces a queue
flush after every action. That could, I think be avoided. That was only
done to make sure the queue is flushed before the program exits, which
should be able to be handled in a different way. But also,
the queue has to be flushed before reading from the database in order
for the read to see current information. In the `git-annex get` case,
it queues a change to the inode cache, and then reads the associated
files. To avoid that, it would need to keep track of the two different
tables in the keys db, and flush the queue only when querying a table
that a write had been queued to. That would be worth doing
just to generally speed up `git-annex get`. A quick benchmark shows
a get of 1000 small files that takes 17s will only take 12s once that's
done. And that's on a fast SSD, probably much more on a hard drive!
So I don't have a full solution, but speeding git-annex up significantly and
also making whatever the problem in this bug is probably much less likely
to occur is a good next step..
"""]]

View file

@ -0,0 +1,16 @@
[[!comment format=mdwn
username="joey"
subject="""comment 26"""
date="2022-10-12T19:34:05Z"
content="""
I've avoided the excessive sqlite database writes. Which doubled the speed
of git-annex in some circumstances, wow!
@yoh see if it still happens once you upgrade to a git-annex
with [[!commit 6fbd337e34cd62881ed5e21616e9c00d7bf35378]].
It would be possible for git-annex to do its own locking around writes
to the sqlite database. That would surely avoid any problem that sqlite might
have that would cause ErrorBusy. I want to think some about
[[todo/withExclusiveLock_blocking_issue]] first.
"""]]

View file

@ -0,0 +1,13 @@
[[!comment format=mdwn
username="joey"
subject="""comment 27"""
date="2022-10-17T18:49:47Z"
content="""
I've made it retry as long as necessary on ErrorBusy, while also noticing
when another process is suspended and has the sqlite database locked,
and avoiding retrying forever in that situation.
This seems to be as far as I can take this bug report, I don't know
100% for sure if I've fixed it, but git-annex's behavior should certainly
be improved.
"""]]

View file

@ -150,6 +150,13 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`.
Indicates that `IMPORTKEY` can be used. Indicates that `IMPORTKEY` can be used.
* `IMPORTKEYSUPPORTED-FAILURE` * `IMPORTKEYSUPPORTED-FAILURE`
Indicates that `IMPORTKEY` cannot be used. Indicates that `IMPORTKEY` cannot be used.
* `VERSIONED`
Used to check if the special remote is versioned.
Note that this request may be made before or after `PREPARE`.
* `ISVERSIONED`
Indicates that the remote is versioned.
* `NOTVERSIONED`
Indicates that the remote is not versioned.
* `LISTIMPORTABLECONTENTS` * `LISTIMPORTABLECONTENTS`
Used to get a list of all the files that are stored in the special Used to get a list of all the files that are stored in the special
remote. A block of responses remote. A block of responses
@ -170,6 +177,8 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`.
be nested multiple levels deep. be nested multiple levels deep.
This should only be used when the remote supports using This should only be used when the remote supports using
"TRANSFER RECEIVE Key" to retrieve historical versions of files. "TRANSFER RECEIVE Key" to retrieve historical versions of files.
And, it should only be used when the remote replies `ISVERSIONED`
to the `VERSIONED` message.
* `END` * `END`
Indicates the end of a block of responses. Indicates the end of a block of responses.
* `LOCATION Name` * `LOCATION Name`

View file

@ -0,0 +1,23 @@
Hi,
I've setup a remote on Github with LFS enabled. Running `git-annex sync --content` failed to push on the main branch :
```
pull lfstest
From github.com:myuser/myremote
* branch HEAD -> FETCH_HEAD
ok
push lfstest
Everything up-to-date
To github.com:myuser/myremote.git
! [rejected] main -> main (non-fast-forward)
error: failed to push some refs to 'github.com:myuser/myremote.git'
hint: Updates were rejected because the tip of your current branch is behind
hint: its remote counterpart. Integrate the remote changes (e.g.
hint: 'git pull ...') before pushing again.
hint: See the 'Note about fast-forwards' in 'git push --help' for details.
ok
```
Is this an expected behaviour ? Is it possible to correct the push failure ?
Thansk in advance

View file

@ -0,0 +1,13 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2022-10-17T16:28:48Z"
content="""
This doesn't involve LFS at all, it's a regular git branch being pushed in
the regular way. So you can certianly solve the problem with some
combination of `git pull`, `git merge`, and `git push`.
That said, I don't know why `git-annex sync` didn't work in your situation.
I created some test git-lfs repos on github and never saw any difficulty
syncing with them.
"""]]

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="AlexPraga"
avatar="http://cdn.libravatar.org/avatar/7c4e10fd352b81279b405f9f5337cdb7"
subject="comment 2"
date="2022-10-22T15:12:26Z"
content="""
Thanks for answering. I managed to correct it with force-pushing it : `git push lfs main -f`.
Not sure why it did not work before but it seems to be working now.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="xloem"
avatar="http://cdn.libravatar.org/avatar/b8c087f7c5e6a9358748f0727c077f3b"
subject="ipfs"
date="2022-10-31T13:36:51Z"
content="""
It would be nice if the rpm or repository included auxiliarity scripts such as git-annex-remote-ipfs so that these would get installed/uninstalled/upgraded alongside the main project.
"""]]

View file

@ -1,23 +0,0 @@
git-annex 10.20220624 released with [[!toggle text="these changes"]]
[[!toggleable text=""" * init: Added --no-autoenable option.
* info: Added --autoenable option.
* initremote: Improve handling of type=git special remotes.
The location value no longer needs to match the url of an existing
git remote, and locations not using ssh:// will work now, including
both paths and host:/path
* Fix retrival of an empty file that is stored in a special remote with
chunking enabled.
(Fixes a reversion in 8.20201103)
* move: Improve resuming a move that succeeded in transferring the
content, but where dropping failed due to eg a network problem,
in cases where numcopies checks prevented the resumed
move from dropping the object from the source repository.
* add, fix, lock, rekey: When several files were being processed,
replacing an annex symlink of a file that was already processed
with a new large file could sometimes cause that large file to be
added to git. These races have been fixed.
* add: Also fix a similar race that could cause a large file be added
to git when a small file was modified or overwritten while it was
being added.
* add --batch: Fix handling of a file that is skipped due to being
gitignored."""]]

View file

@ -0,0 +1,22 @@
git-annex 10.20221103 released with [[!toggle text="these changes"]]
[[!toggleable text=""" * Doubled the speed of git-annex drop when operating on many files,
and of git-annex get when operating on many tiny files.
* trust, untrust, semitrust, dead: Fix behavior when provided with
multiple repositories to operate on.
* trust, untrust, semitrust, dead: When provided with no parameters,
do not operate on a repository that has an empty name.
* move: Fix openFile crash with -J
(Fixes a reversion in 8.20201103)
* S3: Speed up importing from a large bucket when fileprefix= is set,
by only asking for files under the prefix.
* When importing from versioned remotes, fix tracking of the content
of deleted files.
* More robust handling of ErrorBusy when writing to sqlite databases.
* Avoid hanging when a suspended git-annex process is keeping a sqlite
database locked.
* Make --batch mode handle unstaged annexed files consistently
whether the file is unlocked or not. Note that this changes the
behavior of --batch when it is provided with locked files that are
in the process of being added to the repository, but have not yet been
staged in git.
* Make git-annex enable-tor work when using the linux standalone build."""]]

View file

@ -3,7 +3,7 @@ The following steps are tested on Windows 10 21h1 with Ubuntu 20 and are designe
** Limitations ** ** Limitations **
* The repository must be created with `annex.tune.objecthashlower=true`. * The repository must be created with `annex.tune.objecthashlower=true`.
* `git annex adjust --unlock` will not work. Unlocked files will work most of the time. Avoid `annex.addunlocked=true` because it is likely to not work. * `git annex adjust --unlock` will not work. Avoid `annex.addunlocked=true` and do not add multiple unlocked files to the index.
**Setup** **Setup**
@ -13,7 +13,7 @@ The following steps are tested on Windows 10 21h1 with Ubuntu 20 and are designe
* `git config annex.sshcaching false` * `git config annex.sshcaching false`
* `git annex init` * `git annex init`
* git-annex should not detect the filesystem as crippled but now set `git config annex.crippledfilesystem true` * git-annex should not detect the filesystem as crippled but now set `git config annex.crippledfilesystem true`
* Safety of locked files will require these settings and scripts and the patch below. * Safety of locked files will require these settings and scripts.
* `git config annex.freezecontent-command 'wsl-freezecontent %path'` * `git config annex.freezecontent-command 'wsl-freezecontent %path'`
* `git config annex.thawcontent-command 'wsl-thawcontent %path'` * `git config annex.thawcontent-command 'wsl-thawcontent %path'`
@ -71,8 +71,123 @@ fi
** Patches ** ** Patches **
These patches may introduce problems when there are multiple independent processes writing to the repository. Use at your own risk.
<details> <details>
<summary>This patch allows `git annex fix` on a crippled file system.</summary> <summary>Create symlink to annexed objects in-place. The add, addunused, lock, and rekey commands will create symlinks in-place instead of in a temporary directory.</summary>
```
From d871289d22d2e86cb62776841343baf6c0f83484 Mon Sep 17 00:00:00 2001
From: Reiko Asakura <asakurareiko@protonmail.ch>
Date: Wed, 12 Oct 2022 17:13:55 -0400
Subject: [PATCH 2/3] Create symlink to annexed objects in-place
---
Annex/Ingest.hs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs
index 89dc8acea..ec35fb15d 100644
--- a/Annex/Ingest.hs
+++ b/Annex/Ingest.hs
@@ -301,7 +301,7 @@ restoreFile file key e = do
makeLink :: RawFilePath -> Key -> Maybe InodeCache -> Annex LinkTarget
makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do
l <- calcRepo $ gitAnnexLink file key
- replaceWorkTreeFile file' $ makeAnnexLink l . toRawFilePath
+ makeAnnexLink l file
-- touch symlink to have same time as the original file,
-- as provided in the InodeCache
--
2.30.2
```
</details>
<details>
<summary>Recreate symlinks after remote transfer. The copy, move, get, sync commands will recreate the symlink after transferring the file from a remote.</summary>
```
From 82ea0ffb02fbc5e4003a466a216c8d1030b7d70a Mon Sep 17 00:00:00 2001
From: Reiko Asakura <asakurareiko@protonmail.ch>
Date: Wed, 12 Oct 2022 19:10:07 -0400
Subject: [PATCH 3/3] Recreate symlinks after remote transfer
---
Annex/Link.hs | 7 +++++++
Command/Get.hs | 3 ++-
Command/Move.hs | 3 ++-
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/Annex/Link.hs b/Annex/Link.hs
index 1a344d07e..e0f172d1d 100644
--- a/Annex/Link.hs
+++ b/Annex/Link.hs
@@ -96,6 +96,13 @@ getAnnexLinkTarget' file coresymlinks = if coresymlinks
then mempty
else s
+relinkAssociatedFile :: AssociatedFile -> Bool -> Annex ()
+relinkAssociatedFile (AssociatedFile (Just file)) True =
+ getAnnexLinkTarget file >>= \case
+ Just target -> makeAnnexLink target file
+ _ -> noop
+relinkAssociatedFile _ _ = noop
+
makeAnnexLink :: LinkTarget -> RawFilePath -> Annex ()
makeAnnexLink = makeGitLink
diff --git a/Command/Get.hs b/Command/Get.hs
index a25fd8bf1..e16362f79 100644
--- a/Command/Get.hs
+++ b/Command/Get.hs
@@ -12,6 +12,7 @@ import qualified Remote
import Annex.Transfer
import Annex.NumCopies
import Annex.Wanted
+import Annex.Link
import qualified Command.Move
cmd :: Command
@@ -95,7 +96,7 @@ getKey' key afile = dispatch
showNote "not available"
showlocs []
return False
- dispatch remotes = notifyTransfer Download afile $ \witness -> do
+ dispatch remotes = observe (relinkAssociatedFile afile) $ notifyTransfer Download afile $ \witness -> do
ok <- pickRemote remotes $ \r -> ifM (probablyPresent r)
( docopy r witness
, return False
diff --git a/Command/Move.hs b/Command/Move.hs
index 55fed5c37..d733a7cbb 100644
--- a/Command/Move.hs
+++ b/Command/Move.hs
@@ -20,6 +20,7 @@ import Logs.Presence
import Logs.Trust
import Logs.File
import Annex.NumCopies
+import Annex.Link
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
@@ -241,7 +242,7 @@ fromPerform src removewhen key afile = do
then dispatch removewhen deststartedwithcopy True
else dispatch removewhen deststartedwithcopy =<< get
where
- get = notifyTransfer Download afile $
+ get = observe (relinkAssociatedFile afile) $ notifyTransfer Download afile $
download src key afile stdRetry
dispatch _ deststartedwithcopy False = do
--
2.30.2
```
</details>
<details>
<summary>Allow git-annex fix on crippled filesystem</summary>
``` ```
From 65fe6e362dfbf2f54c8da5ca17c59af26de5ff83 Mon Sep 17 00:00:00 2001 From 65fe6e362dfbf2f54c8da5ca17c59af26de5ff83 Mon Sep 17 00:00:00 2001
@ -105,7 +220,7 @@ index 39853c894..2d66c1461 100644
** Usage tips ** ** Usage tips **
* WSL1 will not create symlinks that work in Windows if created before the target file exists, such as after `git annex add` or `git annex get`. This can be fixed by recreating them with any method, such as delete them and `git checkout`. * WSL1 will not create symlinks that work in Windows if created before the target file exists. This can be fixed by recreating them with any method, such as delete them and `git checkout`. Also see the above patches to make git-annex automatically recreate symlinks.
<details> <details>
<summary>Sample script to recreate all symlinks under the current directory</summary> <summary>Sample script to recreate all symlinks under the current directory</summary>
@ -129,10 +244,7 @@ do(pathlib.Path('.'))
``` ```
</details> </details>
* Sometimes there will SQLite errors using multiple jobs but retrying will work most of the time.
** Related bugs ** ** Related bugs **
* [[bugs/WSL_adjusted_braches__58___smudge_fails_with_sqlite_thread_crashed_-_locking_protocol]] * [[bugs/WSL_adjusted_braches__58___smudge_fails_with_sqlite_thread_crashed_-_locking_protocol]]
* [[bugs/WSL1__58___git-annex-add_fails_in_DrvFs_filesystem]] * [[bugs/WSL1__58___git-annex-add_fails_in_DrvFs_filesystem]]
* [[bugs/problems_with_SSH_and_relative_paths]]

View file

@ -49,9 +49,11 @@ This should display something like:
Once you are sure things went on okay, you can synchronise this with `marcos`: Once you are sure things went on okay, you can synchronise this with `marcos`:
git annex sync git annex sync --allow-unrelated-histories
This will push the metadata information to marcos, so it knows which files are available on `angela`. From there on, you can freely get and move files between the two repos! This will push the metadata information to marcos, so it knows which files
are available on `angela`. From there on, you can freely get and move files
between the two repos!
Importing files from a third directory Importing files from a third directory
-------------------------------------- --------------------------------------
@ -61,7 +63,7 @@ Say that some files on `angela` are actually spread out outside of the `~/mp3` d
cd ~/mp3 cd ~/mp3
git annex import ~/music/ git annex import ~/music/
(!) Be careful that `~/music` is not a git-annex repository, or this will [[destroy it!|bugs/git annex import destroys a fellow git annex repository]]. (!) Be careful that `~/music` is not a git-annex repository.
Deleting deleted files Deleting deleted files
---------------------- ----------------------
@ -73,7 +75,3 @@ It is quite possible some files were removed (or renamed!) on `marcos` but not o
This will show files that are on `angela` and not on `marcos`. They could be new files that were only added on `angela`, so be careful! A manual analysis is necessary, but let's say you are certain those files are not relevant anymore, you can delete them from `angela`: This will show files that are on `angela` and not on `marcos`. They could be new files that were only added on `angela`, so be careful! A manual analysis is necessary, but let's say you are certain those files are not relevant anymore, you can delete them from `angela`:
git annex drop <file> git annex drop <file>
If the file is a renamed or modified version from the original, you may need to use `--force`, but be careful! If you delete the wrong file, it will be lost forever!
> (!) Maybe this wouldn't happen with [[direct mode]] and an fsck? --[[anarcat]]

View file

@ -0,0 +1,59 @@
[[!comment format=mdwn
username="Stefan"
avatar="http://cdn.libravatar.org/avatar/1474db4b030b82320e3bd5e899ef2bad"
subject="This guide fails with &quot;fatal: refusing to merge unrelated histories&quot;"
date="2022-10-29T10:28:18Z"
content="""
This no longer works, here is a MWE to copy-paste (uses /tmp/{A,B}):
```
mkdir /tmp/A && touch /tmp/A/bigfile
mkdir /tmp/B && touch /tmp/B/bigfile
cd /tmp/A
git init
git annex init
git annex add .
git commit -m \"git annex yay\"
cd /tmp/B
git init
git remote add A /tmp/A
git fetch A
git annex info # this should display the two repos
git annex add .
git annex whereis
git annex sync
```
This fails with
```
commit
[main (root-commit) e9435bf] git-annex in stefan@notebook:/tmp/B
1 file changed, 1 insertion(+)
create mode 120000 bigfile
ok
pull A
fatal: refusing to merge unrelated histories
failed
push A
Enumerating objects: 19, done.
Counting objects: 100% (19/19), done.
Delta compression using up to 8 threads
Compressing objects: 100% (11/11), done.
Writing objects: 100% (14/14), 1.37 KiB | 1.37 MiB/s, done.
Total 14 (delta 2), reused 0 (delta 0), pack-reused 0
To /tmp/A
* [new branch] main -> synced/main
* [new branch] git-annex -> synced/git-annex
To /tmp/A
! [rejected] main -> main (non-fast-forward)
error: failed to push some refs to '/tmp/A'
hint: Updates were rejected because the tip of your current branch is behind
hint: its remote counterpart. Integrate the remote changes (e.g.
hint: 'git pull ...') before pushing again.
hint: See the 'Note about fast-forwards' in 'git push --help' for details.
ok
sync: 1 failed
```
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="joey"
subject="""comment 2"""
date="2022-10-31T16:09:12Z"
content="""
Indeed, you will need to use `git-annex sync --allow-unrelated-histories`
now in that situation. I have updated the tip.
"""]]

View file

@ -0,0 +1,27 @@
[[!comment format=mdwn
username="joey"
subject="""comment 8"""
date="2022-10-10T21:04:49Z"
content="""
I've finished the work on aws, which is in
<https://github.com/aristidb/aws/pull/281> and I hope will be merged soon.
git-annex now has a branch `anons3` that implements this, when
the S3 remote is configured with signature=anonymous.
$ git-annex initremote s3-origin type=S3 importtree=yes encryption=none bucket=dandiarchive fileprefix=zarr-checksums/2ac71edb-738c-40ac-bd8c-8ca985adaa12/ signature=anonymous
initremote s3-origin (checking bucket...) ok
(recording state in git...)
$ git-annex import master --from s3-origin
list s3-origin ok
import s3-origin .checksum
ok
import s3-origin 0/.checksum
ok
import s3-origin 0/0/.checksum
ok
^C
Also, I've fixed it to only list files in the fileprefix, which
sped up the listing a *lot* in this bucket with many other files..
"""]]

View file

@ -0,0 +1,83 @@
Some parts of git-annex wait for an exclusive lock, and once they take it,
hold it while performing an operation. Now consider what happens if the
git-annex process is suspended. Another git-annex process that is running
and that waits to take the same exclusive lock (or a shared lock of the
same file) will stall forever, until the git-annex process is resumed.
These time windows tend to be small, but may not always be.
Is there any better way git-annex could handle this? Is it a significant
problem at all? I don't think I've ever seen it happen, but I rarely ^Z
git-annex either. How do other programs handle this, if at all?
--[[Joey]]
----
Would it be better for the second git-annex process, rather than hanging
indefinitely, to timeout after a few seconds?
But how many seconds? What if the system is under heavy load?
> What could be done is, update the lock's file's mtime after successfully
> taking the lock. Then, as long as the mtime is advancing, some other
> process is actively using it, and it's ok for our process to wait
> longer.
>
> (Updating the mtime would be a problem when locking annex object files
> in v9 and earlier. Luckily, that locking is not done with a blocking
> lock anyway.)
> If the lock file's mtime is being checked, the process that is
> blocking with the lock held could periodically update the mtime.
> A background thread could manage that. If that's done every ten seconds,
> then an mtime more than 20 seconds old indicates that the lock is
> held by a suspended process. So git-annex would stall for up to 20-30
> seconds before erroring out when a lock is held by a suspended process.
> That seems acceptible, it doesn't need to deal with this situation
> instantly, it just needs to not block indefinitely. And updating the
> mtime every 10 seconds should not be too much IO.
>
> When an old version of git-annex has the lock held, it won't be updating
> the mtime. So if it takes longer than 10 seconds to do the operation with
> the lock held, a new version may complain that it's suspended when it's
> really not. This could be avoided by checking what process holds the
> lock, and whether it's suspended. But probably 10 seconds is enough
> time for all the operations git-annex takes a blocking lock for
> currently to finish, and if so we don't need to worry about this situation?
>
> > Unfortunately not: importKeys takes an exclusive lock and holds it while
> > downloading all the content! This seems like a bug though, because it can
> > cause other git-annex processes that are eg storing content in a remote
> > to block for a long time.
> >
> > Another one is Database.Export.writeLockDbWhile, which takes an
> > exclusive lock while running eg, Command.Export.changeExport,
> > which may sometimes need to do a lot of work.
> >
> > Another one is Annex.Queue.flush, which probably mostly runs in under
> > 10 seconds, but maybe not always, and when annex.queuesize is changed,
> > could surely take longer.
>
> To avoid problems when old git-annex's are also being used, it could
> update and check the mtime of a different file than the lock file.
>
> Start by trying to take the lock for up to 10 seconds. If it takes the
> lock, create the mtime file and start a thread that updates the mtime
> every 10 seconds until the lock is closed, and delete the mtime file
> before closing the lock handle.
>
> When it times out taking the lock, if the mtime file does not exist, an
> old git-annex has the lock; if the mtime file does exist, then check
> if its timestamp has advanced; if not then a new git-annex has the lock
> and is suspended and it can error out.
>
> Oops: There's a race in the method above; a timeout may occur
> right when the other process has taken the lock, but has not updated
> the mtime file yet. Then that process would incorrectly be treated
> as an old git-annex process.
>
> So: To support old git-annex, it seems it will need to check, when the
> lock is held, what process has the lock. And then check if that process
> is suspended or not. Which means looking in /proc. Ugh.
>
> Or: Change to checking lock mtimes only in git-annex v11..

View file

@ -1,5 +1,5 @@
Name: git-annex Name: git-annex
Version: 10.20221003 Version: 10.20221103
Cabal-Version: 1.12 Cabal-Version: 1.12
License: AGPL-3 License: AGPL-3
Maintainer: Joey Hess <id@joeyh.name> Maintainer: Joey Hess <id@joeyh.name>
@ -830,6 +830,7 @@ Executable git-annex
Database.Init Database.Init
Database.Keys Database.Keys
Database.Keys.Handle Database.Keys.Handle
Database.Keys.Tables
Database.Keys.SQL Database.Keys.SQL
Database.Queue Database.Queue
Database.Types Database.Types