Merge branch 'master' into anons3

This commit is contained in:
Joey Hess 2022-11-04 15:08:29 -04:00
commit de1e8201a6
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
66 changed files with 1120 additions and 307 deletions
Annex.hs
Annex
Assistant
Build
CHANGELOGCmdLine.hs
CmdLine
Command.hs
Command
Database
Remote
Test.hs
Types
Upgrade
Utility
doc
bugs
design/external_special_remote_protocol
forum
install/rpm_standalone
news
tips
todo
git-annex.cabal

View file

@ -287,12 +287,8 @@ run (st, rd) a = do
run' :: MVar AnnexState -> AnnexRead -> Annex a -> IO (a, (AnnexState, AnnexRead))
run' mvar rd a = do
r <- runReaderT (runAnnex a) (mvar, rd)
`onException` (flush rd)
flush rd
st <- takeMVar mvar
return (r, (st, rd))
where
flush = Keys.flushDbQueue . keysdbhandle
{- Performs an action in the Annex monad from a starting state,
- and throws away the changed state. -}

View file

@ -1,6 +1,6 @@
{- git-annex actions
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -11,7 +11,7 @@ module Annex.Action (
action,
verifiedAction,
startup,
shutdown,
quiesce,
stopCoProcesses,
) where
@ -25,6 +25,7 @@ import Annex.CheckAttr
import Annex.HashObject
import Annex.CheckIgnore
import Annex.TransferrerPool
import qualified Database.Keys
import Control.Concurrent.STM
#ifndef mingw32_HOST_OS
@ -74,12 +75,25 @@ startup = do
return ()
#endif
{- Cleanup actions. -}
shutdown :: Bool -> Annex ()
shutdown nocommit = do
{- Rn all cleanup actions, save all state, stop all long-running child
- processes.
-
- This can be run repeatedly with other Annex actions run in between,
- but usually it is run only once at the end.
-
- When passed True, avoids making any commits to the git-annex branch,
- leaving changes in the journal for later commit.
-}
quiesce :: Bool -> Annex ()
quiesce nocommit = do
cas <- Annex.withState $ \st -> return
( st { Annex.cleanupactions = mempty }
, Annex.cleanupactions st
)
sequence_ (M.elems cas)
saveState nocommit
sequence_ =<< M.elems <$> Annex.getState Annex.cleanupactions
stopCoProcesses
Database.Keys.closeDb
{- Stops all long-running child processes, including git query processes. -}
stopCoProcesses :: Annex ()

View file

@ -718,7 +718,7 @@ listKeys' keyloc want = do
saveState :: Bool -> Annex ()
saveState nocommit = doSideAction $ do
Annex.Queue.flush
Database.Keys.closeDb
Database.Keys.flushDb
unless nocommit $
whenM (annexAlwaysCommit <$> Annex.getGitConfig) $
Annex.Branch.commit =<< Annex.Branch.commitMessage

View file

@ -184,10 +184,13 @@ recordImportTree remote importtreeconfig importable = do
unlessM (stillpresent db oldkey) $
logChange oldkey (Remote.uuid remote) InfoMissing
_ -> noop
db <- Export.openDb (Remote.uuid remote)
forM_ (exportedTreeishes oldexport) $ \oldtree ->
Export.runExportDiffUpdater updater db oldtree finaltree
Export.closeDb db
-- When the remote is versioned, it still contains keys
-- that are not present in the new tree.
unless (Remote.versionedExport (Remote.exportActions remote)) $ do
db <- Export.openDb (Remote.uuid remote)
forM_ (exportedTreeishes oldexport) $ \oldtree ->
Export.runExportDiffUpdater updater db oldtree finaltree
Export.closeDb db
buildImportCommit' :: Remote -> ImportCommitConfig -> Maybe Sha -> History Sha -> Annex (Maybe Sha)
buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History ti _) =

View file

@ -1,6 +1,6 @@
{- git-annex program path
-
- Copyright 2013-2021 Joey Hess <id@joeyh.name>
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -11,6 +11,7 @@ module Annex.Path (
gitAnnexChildProcess,
gitAnnexChildProcessParams,
gitAnnexDaemonizeParams,
cleanStandaloneEnvironment,
) where
import Annex.Common
@ -19,7 +20,7 @@ import Utility.Env
import Annex.PidLock
import qualified Annex
import System.Environment (getExecutablePath, getArgs)
import System.Environment (getExecutablePath, getArgs, getProgName)
{- A fully qualified path to the currently running git-annex program.
-
@ -29,13 +30,16 @@ import System.Environment (getExecutablePath, getArgs)
- or searching for the command name in PATH.
-
- The standalone build runs git-annex via ld.so, and defeats
- getExecutablePath. It sets GIT_ANNEX_PROGRAMPATH to the correct path
- to the wrapper script to use.
- getExecutablePath. It sets GIT_ANNEX_DIR to the location of the
- standalone build directory, and there are wrapper scripts for git-annex
- and git-annex-shell in that directory.
-}
programPath :: IO FilePath
programPath = go =<< getEnv "GIT_ANNEX_PROGRAMPATH"
programPath = go =<< getEnv "GIT_ANNEX_DIR"
where
go (Just p) = return p
go (Just dir) = do
name <- getProgName
return (dir </> name)
go Nothing = do
exe <- getExecutablePath
p <- if isAbsolute exe
@ -97,3 +101,25 @@ gitAnnexDaemonizeParams = do
-- Get every parameter git-annex was run with.
ps <- liftIO getArgs
return (map Param ps ++ cps)
{- Returns a cleaned up environment that lacks path and other settings
- used to make the standalone builds use their bundled libraries and programs.
- Useful when calling programs not included in the standalone builds.
-
- For a non-standalone build, returns Nothing.
-}
cleanStandaloneEnvironment :: IO (Maybe [(String, String)])
cleanStandaloneEnvironment = clean <$> getEnvironment
where
clean environ
| null vars = Nothing
| otherwise = Just $ catMaybes $ map (restoreorig environ) environ
where
vars = words $ fromMaybe "" $
lookup "GIT_ANNEX_STANDLONE_ENV" environ
restoreorig oldenviron p@(k, _v)
| k `elem` vars = case lookup ("ORIG_" ++ k) oldenviron of
(Just v')
| not (null v') -> Just (k, v')
_ -> Nothing
| otherwise = Just p

View file

@ -1,6 +1,6 @@
{- git-annex worktree files
-
- Copyright 2013-2021 Joey Hess <id@joeyh.name>
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -14,7 +14,7 @@ import Annex.CurrentBranch
import qualified Database.Keys
{- Looks up the key corresponding to an annexed file in the work tree,
- by examining what the file links to.
- by examining what the symlink points to.
-
- An unlocked file will not have a link on disk, so fall back to
- looking for a pointer to a key in git.
@ -31,6 +31,16 @@ lookupKey = lookupKey' catkeyfile
, catKeyFileHidden file =<< getCurrentBranch
)
{- Like lookupKey, but only looks at files staged in git, not at unstaged
- changes in the work tree. This means it's slower, but it also has
- consistently the same behavior for locked files as for unlocked files.
-}
lookupKeyStaged :: RawFilePath -> Annex (Maybe Key)
lookupKeyStaged file = catKeyFile file >>= \case
Just k -> return (Just k)
Nothing -> catKeyFileHidden file =<< getCurrentBranch
{- Like lookupKey, but does not find keys for hidden files. -}
lookupKeyNotHidden :: RawFilePath -> Annex (Maybe Key)
lookupKeyNotHidden = lookupKey' catkeyfile
where
@ -45,23 +55,6 @@ lookupKey' catkeyfile file = isAnnexLink file >>= \case
Just key -> return (Just key)
Nothing -> catkeyfile file
{- Modifies an action to only act on files that are already annexed,
- and passes the key on to it. -}
whenAnnexed :: (RawFilePath -> Key -> Annex (Maybe a)) -> RawFilePath -> Annex (Maybe a)
whenAnnexed a file = ifAnnexed file (a file) (return Nothing)
ifAnnexed :: RawFilePath -> (Key -> Annex a) -> Annex a -> Annex a
ifAnnexed file yes no = maybe no yes =<< lookupKey file
{- Find all annexed files and update the keys database for them.
-
- Normally the keys database is updated incrementally when it's being
- opened, and changes are noticed. Calling this explicitly allows
- running the update at an earlier point.
-
- All that needs to be done is to open the database,
- that will result in Database.Keys.reconcileStaged
- running, and doing the work.
-}
{- Find all annexed files and update the keys database for them. -}
scanAnnexedFiles :: Annex ()
scanAnnexedFiles = Database.Keys.runWriter (const noop)
scanAnnexedFiles = Database.Keys.updateDatabase

View file

@ -171,25 +171,3 @@ installFileManagerHooks program = unlessM osAndroid $ do
#else
installFileManagerHooks _ = noop
#endif
{- Returns a cleaned up environment that lacks settings used to make the
- standalone builds use their bundled libraries and programs.
- Useful when calling programs not included in the standalone builds.
-
- For a non-standalone build, returns Nothing.
-}
cleanEnvironment :: IO (Maybe [(String, String)])
cleanEnvironment = clean <$> getEnvironment
where
clean environ
| null vars = Nothing
| otherwise = Just $ catMaybes $ map (restoreorig environ) environ
where
vars = words $ fromMaybe "" $
lookup "GIT_ANNEX_STANDLONE_ENV" environ
restoreorig oldenviron p@(k, _v)
| k `elem` vars = case lookup ("ORIG_" ++ k) oldenviron of
(Just v')
| not (null v') -> Just (k, v')
_ -> Nothing
| otherwise = Just p

View file

@ -49,7 +49,7 @@ inDir dir a = do
state <- Annex.new
=<< Git.Config.read
=<< Git.Construct.fromPath (toRawFilePath dir)
Annex.eval state $ a `finally` stopCoProcesses
Annex.eval state $ a `finally` quiesce True
{- Creates a new repository, and returns its UUID. -}
initRepo :: Bool -> Bool -> FilePath -> Maybe String -> Maybe StandardGroup -> IO UUID

View file

@ -24,6 +24,7 @@ import Annex.Content
import Annex.WorkTree
import Git.Command
import qualified Utility.RawFilePath as R
import Annex.Action
import Data.Time.Clock
import Data.Char
@ -69,7 +70,9 @@ main = do
state <- Annex.new =<< Git.Construct.fromPath (toRawFilePath ".")
ood <- Annex.eval state $ do
buildrpms topdir updated
makeinfos updated version
is <- makeinfos updated version
quiesce False
return is
syncToArchiveOrg
unless (null ood) $
error $ "Some info files are out of date: " ++ show (map fst ood)

View file

@ -164,8 +164,6 @@ installLinkerShim top linker exe = do
createSymbolicLink (fromRawFilePath link) (top </> exelink)
writeFile exe $ unlines
[ "#!/bin/sh"
, "GIT_ANNEX_PROGRAMPATH=\"$0\""
, "export GIT_ANNEX_PROGRAMPATH"
, "exec \"$GIT_ANNEX_DIR/" ++ exelink ++ "\" --library-path \"$GIT_ANNEX_LD_LIBRARY_PATH\" \"$GIT_ANNEX_DIR/shimmed/" ++ base ++ "/" ++ base ++ "\" \"$@\""
]
modifyFileMode (toRawFilePath exe) $ addModes executeModes

View file

@ -1,16 +1,32 @@
git-annex (10.20221004) UNRELEASED; urgency=medium
* S3: Support signature=anonymous to access a S3 bucket anonymously.
This can be used, for example, with importtree=yes to import from
a public bucket.
git-annex (10.20221103) upstream; urgency=medium
* Doubled the speed of git-annex drop when operating on many files,
and of git-annex get when operating on many tiny files.
* trust, untrust, semitrust, dead: Fix behavior when provided with
multiple repositories to operate on.
* trust, untrust, semitrust, dead: When provided with no parameters,
do not operate on a repository that has an empty name.
* move: Fix openFile crash with -J
(Fixes a reversion in 8.20201103)
* S3: Support signature=anonymous to access a S3 bucket anonymously.
This can be used, for example, with importtree=yes to import from
a public bucket.
* S3: Speed up importing from a large bucket when fileprefix= is set,
by only asking for files under the prefix.
* When importing from versioned remotes, fix tracking of the content
of deleted files.
* More robust handling of ErrorBusy when writing to sqlite databases.
* Avoid hanging when a suspended git-annex process is keeping a sqlite
database locked.
* Make --batch mode handle unstaged annexed files consistently
whether the file is unlocked or not. Note that this changes the
behavior of --batch when it is provided with locked files that are
in the process of being added to the repository, but have not yet been
staged in git.
* Make git-annex enable-tor work when using the linux standalone build.
-- Joey Hess <id@joeyh.name> Mon, 03 Oct 2022 13:36:42 -0400
-- Joey Hess <id@joeyh.name> Thu, 03 Nov 2022 14:07:31 -0400
git-annex (10.20221003) upstream; urgency=medium

View file

@ -63,7 +63,7 @@ dispatch' subcommandname args fuzzy cmds allargs allcmds fields getgitrepo progn
prepRunCommand cmd annexsetter
startup
performCommandAction True cmd seek $
shutdown $ cmdnocommit cmd
quiesce $ cmdnocommit cmd
go (Left norepo) = do
let ingitrepo = \a -> a =<< Git.Config.global
-- Parse command line with full cmdparser first,

View file

@ -186,8 +186,9 @@ batchAnnexed fmt seeker keyaction = do
matcher <- getMatcher
batchFilesKeys fmt $ \(si, v) ->
case v of
Right bf -> flip whenAnnexed bf $ \f k ->
checkpresent k $
Right f -> lookupKeyStaged f >>= \case
Nothing -> return Nothing
Just k -> checkpresent k $
startAction seeker si f k
Left k -> ifM (matcher (MatchingInfo (mkinfo k)))
( checkpresent k $

View file

@ -17,6 +17,7 @@ import Utility.AuthToken
import Annex.UUID
import P2P.Address
import P2P.Auth
import Annex.Action
run :: [String] -> IO ()
run (_remotename:address:[]) = forever $
@ -59,6 +60,8 @@ connectService address port service = do
g <- Annex.gitRepo
conn <- liftIO $ connectPeer g (TorAnnex address port)
runst <- liftIO $ mkRunState Client
liftIO $ runNetProto runst conn $ auth myuuid authtoken noop >>= \case
r <- liftIO $ runNetProto runst conn $ auth myuuid authtoken noop >>= \case
Just _theiruuid -> connect service stdin stdout
Nothing -> giveup $ "authentication failed, perhaps you need to set " ++ p2pAuthTokenEnv
quiesce False
return r

View file

@ -11,7 +11,6 @@ module Command (
) where
import Annex.Common as ReExported
import Annex.WorkTree as ReExported (whenAnnexed, ifAnnexed)
import Types.Command as ReExported
import Types.DeferredParse as ReExported
import CmdLine.Seek as ReExported

View file

@ -18,6 +18,7 @@ import Annex.FileMatcher
import Annex.Link
import Annex.Tmp
import Annex.HashObject
import Annex.WorkTree
import Messages.Progress
import Git.FilePath
import Git.Types
@ -202,7 +203,9 @@ start dr si file addunlockedmatcher =
mk <- liftIO $ isPointerFile file
maybe (go s) (fixuppointer s) mk
where
go s = ifAnnexed file (addpresent s) (add s)
go s = lookupKey file >>= \case
Just k -> addpresent s k
Nothing -> add s
add s = starting "add" (ActionItemTreeFile file) si $
skipWhenDryRun dr $
if isSymbolicLink s

View file

@ -20,6 +20,7 @@ import Annex.Ingest
import Annex.CheckIgnore
import Annex.Perms
import Annex.UUID
import Annex.WorkTree
import Annex.YoutubeDl
import Annex.UntrustedFilePath
import Logs.Web
@ -183,7 +184,9 @@ startRemote addunlockedmatcher r o si file uri sz = do
performRemote addunlockedmatcher r o uri (toRawFilePath file') sz
performRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> URLString -> RawFilePath -> Maybe Integer -> CommandPerform
performRemote addunlockedmatcher r o uri file sz = ifAnnexed file adduri geturi
performRemote addunlockedmatcher r o uri file sz = lookupKey file >>= \case
Just k -> adduri k
Nothing -> geturi
where
loguri = setDownloader uri OtherDownloader
adduri = addUrlChecked o loguri file (Remote.uuid r) checkexistssize
@ -270,7 +273,9 @@ checkPreserveFileNameSecurity f = do
]
performWeb :: AddUnlockedMatcher -> AddUrlOptions -> URLString -> RawFilePath -> Url.UrlInfo -> CommandPerform
performWeb addunlockedmatcher o url file urlinfo = ifAnnexed file addurl geturl
performWeb addunlockedmatcher o url file urlinfo = lookupKey file >>= \case
Just k -> addurl k
Nothing -> geturl
where
geturl = next $ isJust <$> addUrlFile addunlockedmatcher (downloadOptions o) url urlinfo file
addurl = addUrlChecked o url file webUUID $ \k ->
@ -335,9 +340,9 @@ downloadWeb addunlockedmatcher o url urlinfo file =
tryyoutubedl tmp = youtubeDlFileNameHtmlOnly url >>= \case
Right mediafile ->
let f = youtubeDlDestFile o file (toRawFilePath mediafile)
in ifAnnexed f
(alreadyannexed (fromRawFilePath f))
(dl f)
in lookupKey f >>= \case
Just k -> alreadyannexed (fromRawFilePath f) k
Nothing -> dl f
Left err -> checkRaw (Just err) o Nothing (normalfinish tmp)
where
dl dest = withTmpWorkDir mediakey $ \workdir -> do

View file

@ -60,9 +60,10 @@ start _os = do
gitannex <- liftIO programPath
let ps = [Param (cmdname cmd), Param (show curruserid)]
sucommand <- liftIO $ mkSuCommand gitannex ps
cleanenv <- liftIO $ cleanStandaloneEnvironment
maybe noop showLongNote
(describePasswordPrompt' sucommand)
ifM (liftIO $ runSuCommand sucommand)
ifM (liftIO $ runSuCommand sucommand cleanenv)
( next checkHiddenService
, giveup $ unwords $
[ "Failed to run as root:" , gitannex ] ++ toCommand ps

View file

@ -378,7 +378,7 @@ cleanupUnexport r db eks loc = do
removeExportedLocation db ek loc
flushDbQueue db
-- An versionedExport remote supports removeExportLocation to remove
-- A versionedExport remote supports removeExportLocation to remove
-- the file from the exported tree, but still retains the content
-- and allows retrieving it.
unless (versionedExport (exportActions r)) $ do

View file

@ -40,6 +40,7 @@ import Command.AddUrl (addUrlFile, downloadRemoteFile, parseDownloadOptions, Dow
import Annex.UUID
import Backend.URL (fromUrl)
import Annex.Content
import Annex.WorkTree
import Annex.YoutubeDl
import Types.MetaData
import Logs.MetaData
@ -297,7 +298,9 @@ performDownload' started addunlockedmatcher opts cache todownload = case locatio
- to be re-downloaded. -}
makeunique url n file = ifM alreadyexists
( ifM forced
( ifAnnexed (toRawFilePath f) checksameurl tryanother
( lookupKey (toRawFilePath f) >>= \case
Just k -> checksameurl k
Nothing -> tryanother
, tryanother
)
, return $ Just f

View file

@ -28,6 +28,7 @@ import Utility.DiskFree
import Annex.Content
import Annex.UUID
import Annex.CatFile
import Annex.WorkTree
import Logs.UUID
import Logs.Trust
import Logs.Location
@ -174,9 +175,9 @@ itemInfo o (si, p) = ifM (isdir p)
Right u -> uuidInfo o u si
Left _ -> do
relp <- liftIO $ relPathCwdToFile (toRawFilePath p)
ifAnnexed relp
(fileInfo o (fromRawFilePath relp) si)
(treeishInfo o p si)
lookupKey relp >>= \case
Just k -> fileInfo o (fromRawFilePath relp) si k
Nothing -> treeishInfo o p si
)
where
isdir = liftIO . catchBoolIO . (isDirectory <$$> getFileStatus)

View file

@ -155,7 +155,7 @@ parseJSONInput i = case eitherDecode (BU.fromString i) of
startBatch :: (SeekInput, (Either RawFilePath Key, MetaData)) -> CommandStart
startBatch (si, (i, (MetaData m))) = case i of
Left f -> do
mk <- lookupKey f
mk <- lookupKeyStaged f
case mk of
Just k -> go k (mkActionItem (k, AssociatedFile (Just f)))
Nothing -> return Nothing

View file

@ -16,6 +16,7 @@ import Annex.Perms
import Annex.ReplaceFile
import Logs.Location
import Annex.InodeSentinal
import Annex.WorkTree
import Utility.InodeCache
import qualified Utility.RawFilePath as R
@ -61,7 +62,9 @@ seek o = case batchOption o of
(toRawFilePath file, fromMaybe (giveup "bad key") (deserializeKey skey))
start :: SeekInput -> (RawFilePath, Key) -> CommandStart
start si (file, newkey) = ifAnnexed file go stop
start si (file, newkey) = lookupKey file >>= \case
Just k -> go k
Nothing -> stop
where
go oldkey
| oldkey == newkey = stop

View file

@ -31,9 +31,7 @@ start (_, key) = fieldTransfer Download key $ \_p -> do
ifM (getViaTmp rsp DefaultVerify key (AssociatedFile Nothing) go)
( do
logStatus key InfoPresent
-- forcibly quit after receiving one key,
-- and shutdown cleanly
_ <- shutdown True
_ <- quiesce True
return True
, return False
)

View file

@ -13,6 +13,7 @@ import Annex.Content
import Backend
import Types.KeySource
import Utility.Metered
import Annex.WorkTree
import qualified Git
cmd :: Command
@ -45,9 +46,9 @@ startSrcDest :: [FilePath] -> CommandStart
startSrcDest ps@(src:dest:[])
| src == dest = stop
| otherwise = notAnnexed src' $
ifAnnexed (toRawFilePath dest)
go
(giveup $ src ++ " is not an annexed file")
lookupKey (toRawFilePath dest) >>= \case
Just k -> go k
Nothing -> giveup $ src ++ " is not an annexed file"
where
src' = toRawFilePath src
go key = starting "reinject" ai si $
@ -79,9 +80,9 @@ notAnnexed :: RawFilePath -> CommandStart -> CommandStart
notAnnexed src a =
ifM (fromRepo Git.repoIsLocalBare)
( a
, ifAnnexed src
(giveup $ "cannot used annexed file as src: " ++ fromRawFilePath src)
a
, lookupKey src >>= \case
Just _ -> giveup $ "cannot used annexed file as src: " ++ fromRawFilePath src
Nothing -> a
)
perform :: RawFilePath -> Key -> CommandPerform

View file

@ -9,6 +9,7 @@ module Command.RmUrl where
import Command
import Logs.Web
import Annex.WorkTree
cmd :: Command
cmd = notBareRepo $
@ -46,10 +47,12 @@ batchParser s = case separate (== ' ') (reverse s) of
return $ Right (f', reverse ru)
start :: (SeekInput, (FilePath, URLString)) -> CommandStart
start (si, (file, url)) = flip whenAnnexed file' $ \_ key -> do
let ai = mkActionItem (key, AssociatedFile (Just file'))
starting "rmurl" ai si $
next $ cleanup url key
start (si, (file, url)) = lookupKeyStaged file' >>= \case
Nothing -> stop
Just key -> do
let ai = mkActionItem (key, AssociatedFile (Just file'))
starting "rmurl" ai si $
next $ cleanup url key
where
file' = toRawFilePath file

View file

@ -50,6 +50,7 @@ import Config.DynamicConfig
import Annex.Path
import Annex.Wanted
import Annex.Content
import Annex.WorkTree
import Command.Get (getKey')
import qualified Command.Move
import qualified Command.Export
@ -765,7 +766,10 @@ seekSyncContent o rs currbranch = do
seekHelper fst3 ww LsFiles.inRepoDetails l
seekincludinghidden origbranch mvar l bloomfeeder =
seekFiltered (const (pure True)) (\(si, f) -> ifAnnexed f (commandAction . gofile bloomfeeder mvar si f) noop) $
let filterer = \(si, f) -> lookupKey f >>= \case
Just k -> (commandAction $ gofile bloomfeeder mvar si f k)
Nothing -> noop
in seekFiltered (const (pure True)) filterer $
seekHelper id ww (LsFiles.inRepoOrBranch origbranch) l
ww = WarnUnmatchLsFiles

View file

@ -18,6 +18,7 @@ import qualified Database.Keys
import Annex.Content
import Annex.Init
import Annex.CheckIgnore
import Annex.WorkTree
import Utility.FileMode
import qualified Utility.RawFilePath as R
@ -50,13 +51,17 @@ seek ps = do
l <- workTreeItems ww ps
withFilesNotInGit
(CheckGitIgnore False)
WarnUnmatchWorkTreeItems
(\(_, f) -> commandAction $ whenAnnexed (startCheckIncomplete . fromRawFilePath) f)
WarnUnmatchWorkTreeItems
checksymlinks
l
withFilesInGitAnnex ww (Command.Unannex.seeker True) l
finish
where
ww = WarnUnmatchLsFiles
checksymlinks (_, f) =
commandAction $ lookupKey f >>= \case
Nothing -> stop
Just k -> startCheckIncomplete (fromRawFilePath f) k
{- git annex symlinks that are not checked into git could be left by an
- interrupted add. -}

View file

@ -22,6 +22,7 @@ import Utility.WebApp
import Utility.Daemon (checkDaemon)
import Utility.UserInfo
import Annex.Init
import Annex.Path
import qualified Git
import Git.Types (fromConfigValue)
import qualified Git.Config
@ -30,6 +31,7 @@ import qualified Annex
import Config.Files.AutoStart
import Upgrade
import Annex.Version
import Annex.Action
import Utility.Android
import Control.Concurrent
@ -126,8 +128,10 @@ startNoRepo o = go =<< liftIO (filterM doesDirectoryExist =<< readAutoStartFile)
Right state -> void $ Annex.eval state $ do
whenM (fromRepo Git.repoIsLocalBare) $
giveup $ d ++ " is a bare git repository, cannot run the webapp in it"
callCommandAction $
r <- callCommandAction $
start' False o
quiesce False
return r
cannotStartIn :: FilePath -> String -> IO ()
cannotStartIn d reason = warningIO $ "unable to start webapp in repository " ++ d ++ ": " ++ reason
@ -219,7 +223,7 @@ openBrowser' mcmd htmlshim realurl outh errh =
#endif
hPutStrLn (fromMaybe stdout outh) $ "Launching web browser on " ++ url
hFlush stdout
environ <- cleanEnvironment
environ <- cleanStandaloneEnvironment
let p' = p
{ env = environ
, std_out = maybe Inherit UseHandle outh

View file

@ -1,6 +1,6 @@
{- Persistent sqlite database handles.
-
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
- Copyright 2015-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -21,6 +21,7 @@ import Utility.Exception
import Utility.FileSystemEncoding
import Utility.Debug
import Utility.DebugLocks
import Utility.InodeCache
import Database.Persist.Sqlite
import qualified Database.Sqlite as Sqlite
@ -38,7 +39,7 @@ import System.IO
{- A DbHandle is a reference to a worker thread that communicates with
- the database. It has a MVar which Jobs are submitted to. -}
data DbHandle = DbHandle (Async ()) (MVar Job)
data DbHandle = DbHandle RawFilePath (Async ()) (MVar Job)
{- Name of a table that should exist once the database is initialized. -}
type TableName = String
@ -48,17 +49,17 @@ type TableName = String
openDb :: RawFilePath -> TableName -> IO DbHandle
openDb db tablename = do
jobs <- newEmptyMVar
worker <- async (workerThread (T.pack (fromRawFilePath db)) tablename jobs)
worker <- async (workerThread db tablename jobs)
-- work around https://github.com/yesodweb/persistent/issues/474
liftIO $ fileEncoding stderr
return $ DbHandle worker jobs
return $ DbHandle db worker jobs
{- This is optional; when the DbHandle gets garbage collected it will
- auto-close. -}
closeDb :: DbHandle -> IO ()
closeDb (DbHandle worker jobs) = do
closeDb (DbHandle _db worker jobs) = do
debugLocks $ putMVar jobs CloseJob
wait worker
@ -73,7 +74,7 @@ closeDb (DbHandle worker jobs) = do
- it is able to run.
-}
queryDb :: DbHandle -> SqlPersistM a -> IO a
queryDb (DbHandle _ jobs) a = do
queryDb (DbHandle _db _ jobs) a = do
res <- newEmptyMVar
putMVar jobs $ QueryJob $
debugLocks $ liftIO . putMVar res =<< tryNonAsync a
@ -82,25 +83,32 @@ queryDb (DbHandle _ jobs) a = do
{- Writes a change to the database.
-
- Writes can fail if another write is happening concurrently.
- So write failures are caught and retried repeatedly for up to 10
- seconds, which should avoid all but the most exceptional problems.
- Writes can fail when another write is happening concurrently.
- So write failures are caught and retried.
-
- Retries repeatedly for up to 60 seconds. Part that point, it continues
- retrying only if the database shows signs of being modified by another
- process at least once each 30 seconds.
-}
commitDb :: DbHandle -> SqlPersistM () -> IO ()
commitDb h wa = robustly Nothing 100 (commitDb' h wa)
commitDb h@(DbHandle db _ _) wa =
robustly (commitDb' h wa) maxretries emptyDatabaseInodeCache
where
robustly :: Maybe SomeException -> Int -> IO (Either SomeException ()) -> IO ()
robustly e 0 _ = error $ "failed to commit changes to sqlite database: " ++ show e
robustly _ n a = do
robustly a retries ic = do
r <- a
case r of
Right _ -> return ()
Left e -> do
threadDelay 100000 -- 1/10th second
robustly (Just e) (n-1) a
Left err -> do
threadDelay briefdelay
retryHelper "write to" err maxretries db retries ic $
robustly a
briefdelay = 100000 -- 1/10th second
maxretries = 300 :: Int -- 30 seconds of briefdelay
commitDb' :: DbHandle -> SqlPersistM () -> IO (Either SomeException ())
commitDb' (DbHandle _ jobs) a = do
commitDb' (DbHandle _ _ jobs) a = do
debug "Database.Handle" "commitDb start"
res <- newEmptyMVar
putMVar jobs $ ChangeJob $
@ -117,7 +125,7 @@ data Job
| ChangeJob (SqlPersistM ())
| CloseJob
workerThread :: T.Text -> TableName -> MVar Job -> IO ()
workerThread :: RawFilePath -> TableName -> MVar Job -> IO ()
workerThread db tablename jobs = newconn
where
newconn = do
@ -144,45 +152,47 @@ workerThread db tablename jobs = newconn
getjob :: IO (Either BlockedIndefinitelyOnMVar Job)
getjob = try $ takeMVar jobs
-- Like runSqlite, but more robust.
--
-- New database connections can sometimes take a while to become usable.
-- This may be due to WAL mode recovering after a crash, or perhaps a bug
-- like described in blob 500f777a6ab6c45ca5f9790e0a63575f8e3cb88f.
-- So, loop until a select succeeds; once one succeeds the connection will
-- stay usable.
--
-- And sqlite sometimes throws ErrorIO when there's not really an IO problem,
-- but perhaps just a short read(). That's caught and retried several times.
runSqliteRobustly :: TableName -> T.Text -> (SqlPersistM a) -> IO a
{- Like runSqlite, but more robust.
-
- New database connections can sometimes take a while to become usable,
- and selects will fail with ErrorBusy in the meantime. This may be due to
- WAL mode recovering after a crash, or a concurrent writer.
- So, wait until a select succeeds; once one succeeds the connection will
- stay usable.
-
- Also sqlite sometimes throws ErrorIO when there's not really an IO
- problem, but perhaps just a short read(). So also retry on ErrorIO.
-
- Retries repeatedly for up to 60 seconds. Part that point, it continues
- retrying only if the database shows signs of being modified by another
- process at least once each 30 seconds.
-}
runSqliteRobustly :: TableName -> RawFilePath -> (SqlPersistM a) -> IO a
runSqliteRobustly tablename db a = do
conn <- opensettle maxretries
go conn maxretries
conn <- opensettle maxretries emptyDatabaseInodeCache
go conn maxretries emptyDatabaseInodeCache
where
maxretries = 100 :: Int
rethrow msg e = throwIO $ userError $ show e ++ "(" ++ msg ++ ")"
go conn retries = do
go conn retries ic = do
r <- try $ runResourceT $ runNoLoggingT $
withSqlConnRobustly (wrapConnection conn) $
withSqlConnRobustly db (wrapConnection conn) $
runSqlConn a
case r of
Right v -> return v
Left ex@(Sqlite.SqliteException { Sqlite.seError = e })
| e == Sqlite.ErrorIO ->
let retries' = retries - 1
in if retries' < 1
then rethrow "after successful open" ex
else go conn retries'
| otherwise -> rethrow "after successful open" ex
| e == Sqlite.ErrorIO -> do
briefdelay
retryHelper "access" ex maxretries db retries ic $
go conn
| otherwise -> rethrow $ errmsg "after successful open" ex
opensettle retries = do
conn <- Sqlite.open db
settle conn retries
opensettle retries ic = do
conn <- Sqlite.open tdb
settle conn retries ic
settle conn retries = do
tdb = T.pack (fromRawFilePath db)
settle conn retries ic = do
r <- try $ do
stmt <- Sqlite.prepare conn nullselect
void $ Sqlite.step stmt
@ -190,26 +200,26 @@ runSqliteRobustly tablename db a = do
case r of
Right _ -> return conn
Left ex@(Sqlite.SqliteException { Sqlite.seError = e })
| e == Sqlite.ErrorBusy -> do
-- Wait and retry any number of times; it
-- will stop being busy eventually.
| e == Sqlite.ErrorBusy || e == Sqlite.ErrorIO -> do
when (e == Sqlite.ErrorIO) $
Sqlite.close conn
briefdelay
settle conn retries
| e == Sqlite.ErrorIO -> do
-- Could be a real IO error,
-- so don't retry indefinitely.
Sqlite.close conn
briefdelay
let retries' = retries - 1
if retries' < 1
then rethrow "while opening database connection" ex
else opensettle retries'
| otherwise -> rethrow "while opening database connection" ex
retryHelper "open" ex maxretries db retries ic $
if e == Sqlite.ErrorIO
then opensettle
else settle conn
| otherwise -> rethrow $ errmsg "while opening database connection" ex
-- This should succeed for any table.
nullselect = T.pack $ "SELECT null from " ++ tablename ++ " limit 1"
briefdelay = threadDelay 1000 -- 1/1000th second
maxretries = 30000 :: Int -- 30 seconds of briefdelays
rethrow = throwIO . userError
errmsg msg e = show e ++ "(" ++ msg ++ ")"
-- Like withSqlConn, but more robust.
withSqlConnRobustly
@ -219,45 +229,99 @@ withSqlConnRobustly
, BaseBackend backend ~ SqlBackend
, BackendCompatible SqlBackend backend
)
=> (LogFunc -> IO backend)
=> RawFilePath
-> (LogFunc -> IO backend)
-> (backend -> m a)
-> m a
withSqlConnRobustly open f = do
withSqlConnRobustly db open f = do
logFunc <- askLoggerIO
withRunInIO $ \run -> bracket
(open logFunc)
closeRobustly
(closeRobustly db)
(run . f)
-- Sqlite can throw ErrorBusy while closing a database; this catches
-- the exception and retries.
{- Sqlite can throw ErrorBusy while closing a database; this catches
- the exception and retries.
-
- Retries repeatedly for up to 60 seconds. Part that point, it continues
- retrying only if the database shows signs of being modified by another
- process at least once each 30 seconds.
-}
closeRobustly
:: (IsPersistBackend backend
, BaseBackend backend ~ SqlBackend
, BackendCompatible SqlBackend backend
)
=> backend
=> RawFilePath
-> backend
-> IO ()
closeRobustly conn = go maxretries briefdelay
closeRobustly db conn = go maxretries emptyDatabaseInodeCache
where
briefdelay = 1000 -- 1/1000th second
-- Try up to 14 times; with the delay doubling each time,
-- the maximum delay before giving up is 16 seconds.
maxretries = 14 :: Int
go retries delay = do
go retries ic = do
r <- try $ close' conn
case r of
Right () -> return ()
Left ex@(Sqlite.SqliteException { Sqlite.seError = e })
| e == Sqlite.ErrorBusy -> do
threadDelay delay
let delay' = delay * 2
let retries' = retries - 1
if retries' < 1
then rethrow "while closing database connection" ex
else go retries' delay'
| otherwise -> rethrow "while closing database connection" ex
threadDelay briefdelay
retryHelper "close" ex maxretries db retries ic go
| otherwise -> rethrow $ errmsg "while closing database connection" ex
rethrow msg e = throwIO $ userError $ show e ++ "(" ++ msg ++ ")"
briefdelay = 1000 -- 1/1000th second
maxretries = 30000 :: Int -- 30 seconds of briefdelays
rethrow = throwIO . userError
errmsg msg e = show e ++ "(" ++ msg ++ ")"
{- Retries a sqlite action repeatedly, but not forever. Detects situations
- when another git-annex process is suspended and has the database locked,
- and eventually gives up. The retries is the current number of retries
- that are left. The maxretries is how many retries to make each time
- the database is seen to have been modified by some other process.
-}
retryHelper
:: Show err
=> String
-> err
-> Int
-> RawFilePath
-> Int
-> DatabaseInodeCache
-> (Int -> DatabaseInodeCache -> IO a)
-> IO a
retryHelper action err maxretries db retries ic a = do
let retries' = retries - 1
if retries' < 1
then do
ic' <- getDatabaseInodeCache db
if isDatabaseModified ic ic'
then a maxretries ic'
else giveup (databaseAccessStalledMsg action db err)
else a retries' ic
databaseAccessStalledMsg :: Show err => String -> RawFilePath -> err -> String
databaseAccessStalledMsg action db err =
"Repeatedly unable to " ++ action ++ " sqlite database " ++ fromRawFilePath db
++ ": " ++ show err ++ ". "
++ "Perhaps another git-annex process is suspended and is "
++ "keeping this database locked?"
data DatabaseInodeCache = DatabaseInodeCache (Maybe InodeCache) (Maybe InodeCache)
emptyDatabaseInodeCache :: DatabaseInodeCache
emptyDatabaseInodeCache = DatabaseInodeCache Nothing Nothing
getDatabaseInodeCache :: RawFilePath -> IO DatabaseInodeCache
getDatabaseInodeCache db = DatabaseInodeCache
<$> genInodeCache db noTSDelta
<*> genInodeCache (db <> "-wal") noTSDelta
isDatabaseModified :: DatabaseInodeCache -> DatabaseInodeCache -> Bool
isDatabaseModified (DatabaseInodeCache a1 b1) (DatabaseInodeCache a2 b2) =
ismodified a1 a2 || ismodified b1 b2
where
ismodified (Just a) (Just b) = not (compareStrong a b)
ismodified Nothing Nothing = False
ismodified _ _ = True

View file

@ -1,6 +1,6 @@
{- Sqlite database of information about Keys
-
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
- Copyright 2015-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -12,6 +12,7 @@
module Database.Keys (
DbHandle,
closeDb,
flushDb,
addAssociatedFile,
getAssociatedFiles,
getAssociatedFilesIncluding,
@ -24,11 +25,13 @@ module Database.Keys (
removeInodeCache,
isInodeKnown,
runWriter,
updateDatabase,
) where
import qualified Database.Keys.SQL as SQL
import Database.Types
import Database.Keys.Handle
import Database.Keys.Tables
import qualified Database.Queue as H
import Database.Init
import Annex.Locations
@ -63,49 +66,53 @@ import Control.Concurrent.Async
- If the database is already open, any writes are flushed to it, to ensure
- consistency.
-
- Any queued writes will be flushed before the read.
- Any queued writes to the table will be flushed before the read.
-}
runReader :: Monoid v => (SQL.ReadHandle -> Annex v) -> Annex v
runReader a = do
runReader :: Monoid v => DbTable -> (SQL.ReadHandle -> Annex v) -> Annex v
runReader t a = do
h <- Annex.getRead Annex.keysdbhandle
withDbState h go
where
go DbUnavailable = return (mempty, DbUnavailable)
go st@(DbOpen qh) = do
liftIO $ H.flushDbQueue qh
go (DbOpen (qh, tableschanged)) = do
tableschanged' <- if isDbTableChanged tableschanged t
then do
liftIO $ H.flushDbQueue qh
return mempty
else return tableschanged
v <- a (SQL.ReadHandle qh)
return (v, st)
return (v, DbOpen (qh, tableschanged'))
go DbClosed = do
st' <- openDb False DbClosed
v <- case st' of
(DbOpen qh) -> a (SQL.ReadHandle qh)
st <- openDb False DbClosed
v <- case st of
(DbOpen (qh, _)) -> a (SQL.ReadHandle qh)
_ -> return mempty
return (v, st')
return (v, st)
runReaderIO :: Monoid v => (SQL.ReadHandle -> IO v) -> Annex v
runReaderIO a = runReader (liftIO . a)
runReaderIO :: Monoid v => DbTable -> (SQL.ReadHandle -> IO v) -> Annex v
runReaderIO t a = runReader t (liftIO . a)
{- Runs an action that writes to the database. Typically this is used to
- queue changes, which will be flushed at a later point.
-
- The database is created if it doesn't exist yet. -}
runWriter :: (SQL.WriteHandle -> Annex ()) -> Annex ()
runWriter a = do
runWriter :: DbTable -> (SQL.WriteHandle -> Annex ()) -> Annex ()
runWriter t a = do
h <- Annex.getRead Annex.keysdbhandle
withDbState h go
where
go st@(DbOpen qh) = do
go (DbOpen (qh, tableschanged)) = do
v <- a (SQL.WriteHandle qh)
return (v, st)
return (v, DbOpen (qh, addDbTable tableschanged t))
go st = do
st' <- openDb True st
v <- case st' of
DbOpen qh -> a (SQL.WriteHandle qh)
DbOpen (qh, _) -> a (SQL.WriteHandle qh)
_ -> error "internal"
return (v, st')
runWriterIO :: (SQL.WriteHandle -> IO ()) -> Annex ()
runWriterIO a = runWriter (liftIO . a)
runWriterIO :: DbTable -> (SQL.WriteHandle -> IO ()) -> Annex ()
runWriterIO t a = runWriter t (liftIO . a)
{- Opens the database, creating it if it doesn't exist yet.
-
@ -138,26 +145,29 @@ openDb forwrite _ = do
open db = do
qh <- liftIO $ H.openDbQueue db SQL.containedTable
reconcileStaged qh
return $ DbOpen qh
tc <- reconcileStaged qh
return $ DbOpen (qh, tc)
{- Closes the database if it was open. Any writes will be flushed to it.
-
- This does not normally need to be called; the database will auto-close
- when the handle is garbage collected. However, this can be used to
- force a re-read of the database, in case another process has written
- data to it.
- This does not prevent further use of the database; it will be re-opened
- as necessary.
-}
closeDb :: Annex ()
closeDb = liftIO . closeDbHandle =<< Annex.getRead Annex.keysdbhandle
{- Flushes any queued writes to the database. -}
flushDb :: Annex ()
flushDb = liftIO . flushDbQueue =<< Annex.getRead Annex.keysdbhandle
addAssociatedFile :: Key -> TopFilePath -> Annex ()
addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile k f
addAssociatedFile k f = runWriterIO AssociatedTable $ SQL.addAssociatedFile k f
{- Note that the files returned were once associated with the key, but
- some of them may not be any longer. -}
getAssociatedFiles :: Key -> Annex [TopFilePath]
getAssociatedFiles k = emptyWhenBare $ runReaderIO $ SQL.getAssociatedFiles k
getAssociatedFiles k = emptyWhenBare $ runReaderIO AssociatedTable $
SQL.getAssociatedFiles k
{- Queries for associated files never return anything when in a bare
- repository, since without a work tree there can be no associated files.
@ -183,10 +193,12 @@ getAssociatedFilesIncluding afile k = emptyWhenBare $ do
{- Gets any keys that are on record as having a particular associated file.
- (Should be one or none but the database doesn't enforce that.) -}
getAssociatedKey :: TopFilePath -> Annex [Key]
getAssociatedKey f = emptyWhenBare $ runReaderIO $ SQL.getAssociatedKey f
getAssociatedKey f = emptyWhenBare $ runReaderIO AssociatedTable $
SQL.getAssociatedKey f
removeAssociatedFile :: Key -> TopFilePath -> Annex ()
removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile k
removeAssociatedFile k = runWriterIO AssociatedTable .
SQL.removeAssociatedFile k
{- Stats the files, and stores their InodeCaches. -}
storeInodeCaches :: Key -> [RawFilePath] -> Annex ()
@ -195,7 +207,7 @@ storeInodeCaches k fs = withTSDelta $ \d ->
=<< liftIO (mapM (\f -> genInodeCache f d) fs)
addInodeCaches :: Key -> [InodeCache] -> Annex ()
addInodeCaches k is = runWriterIO $ SQL.addInodeCaches k is
addInodeCaches k is = runWriterIO ContentTable $ SQL.addInodeCaches k is
{- A key may have multiple InodeCaches; one for the annex object, and one
- for each pointer file that is a copy of it.
@ -207,18 +219,19 @@ addInodeCaches k is = runWriterIO $ SQL.addInodeCaches k is
- for pointer files, but none recorded for the annex object.
-}
getInodeCaches :: Key -> Annex [InodeCache]
getInodeCaches = runReaderIO . SQL.getInodeCaches
getInodeCaches = runReaderIO ContentTable . SQL.getInodeCaches
{- Remove all inodes cached for a key. -}
removeInodeCaches :: Key -> Annex ()
removeInodeCaches = runWriterIO . SQL.removeInodeCaches
removeInodeCaches = runWriterIO ContentTable . SQL.removeInodeCaches
{- Remove cached inodes, for any key. -}
removeInodeCache :: InodeCache -> Annex ()
removeInodeCache = runWriterIO . SQL.removeInodeCache
removeInodeCache = runWriterIO ContentTable . SQL.removeInodeCache
isInodeKnown :: InodeCache -> SentinalStatus -> Annex Bool
isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s)
isInodeKnown i s = or <$> runReaderIO ContentTable
((:[]) <$$> SQL.isInodeKnown i s)
{- Looks at staged changes to annexed files, and updates the keys database,
- so that its information is consistent with the state of the repository.
@ -247,18 +260,21 @@ isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s)
- So when using getAssociatedFiles, have to make sure the file still
- is an associated file.
-}
reconcileStaged :: H.DbQueue -> Annex ()
reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do
gitindex <- inRepo currentIndexFile
indexcache <- fromRawFilePath <$> calcRepo' gitAnnexKeysDbIndexCache
withTSDelta (liftIO . genInodeCache gitindex) >>= \case
Just cur -> readindexcache indexcache >>= \case
Nothing -> go cur indexcache =<< getindextree
Just prev -> ifM (compareInodeCaches prev cur)
( noop
, go cur indexcache =<< getindextree
)
Nothing -> noop
reconcileStaged :: H.DbQueue -> Annex DbTablesChanged
reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo)
( return mempty
, do
gitindex <- inRepo currentIndexFile
indexcache <- fromRawFilePath <$> calcRepo' gitAnnexKeysDbIndexCache
withTSDelta (liftIO . genInodeCache gitindex) >>= \case
Just cur -> readindexcache indexcache >>= \case
Nothing -> go cur indexcache =<< getindextree
Just prev -> ifM (compareInodeCaches prev cur)
( return mempty
, go cur indexcache =<< getindextree
)
Nothing -> return mempty
)
where
lastindexref = Ref "refs/annex/last-index"
@ -283,6 +299,7 @@ reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do
-- against next time.
inRepo $ update' lastindexref newtree
fastDebug "Database.Keys" "reconcileStaged end"
return (DbTablesChanged True True)
-- git write-tree will fail if the index is locked or when there is
-- a merge conflict. To get up-to-date with the current index,
-- diff --staged with the old index tree. The current index tree
@ -304,6 +321,7 @@ reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do
void $ updatetodiff g Nothing "--staged"
(procmergeconflictdiff mdfeeder)
fastDebug "Database.Keys" "reconcileStaged end"
return (DbTablesChanged True True)
updatetodiff g old new processor = do
(l, cleanup) <- pipeNullSplit' (diff old new) g
@ -479,3 +497,9 @@ reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do
largediff :: Int
largediff = 1000
{- Normally the keys database is updated incrementally when opened,
- by reconcileStaged. Calling this explicitly allows running the
- update at an earlier point.
-}
updateDatabase :: Annex ()
updateDatabase = runWriter ContentTable (const noop)

View file

@ -15,6 +15,7 @@ module Database.Keys.Handle (
) where
import qualified Database.Queue as H
import Database.Keys.Tables
import Utility.Exception
import Utility.DebugLocks
@ -29,7 +30,7 @@ newtype DbHandle = DbHandle (MVar DbState)
-- The database can be closed or open, but it also may have been
-- tried to open (for read) and didn't exist yet or is not readable.
data DbState = DbClosed | DbOpen H.DbQueue | DbUnavailable
data DbState = DbClosed | DbOpen (H.DbQueue, DbTablesChanged) | DbUnavailable
newDbHandle :: IO DbHandle
newDbHandle = DbHandle <$> newMVar DbClosed
@ -52,15 +53,17 @@ withDbState (DbHandle mvar) a = do
return v
flushDbQueue :: DbHandle -> IO ()
flushDbQueue (DbHandle mvar) = go =<< debugLocks (readMVar mvar)
flushDbQueue h = withDbState h go
where
go (DbOpen qh) = H.flushDbQueue qh
go _ = return ()
go (DbOpen (qh, _)) = do
H.flushDbQueue qh
return ((), DbOpen (qh, mempty))
go st = return ((), st)
closeDbHandle :: DbHandle -> IO ()
closeDbHandle h = withDbState h go
where
go (DbOpen qh) = do
go (DbOpen (qh, _)) = do
H.closeDbQueue qh
return ((), DbClosed)
go st = return ((), st)

38
Database/Keys/Tables.hs Normal file
View file

@ -0,0 +1,38 @@
{- Keeping track of which tables in the keys database have changed
-
- Copyright 2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Database.Keys.Tables where
import Data.Monoid
import qualified Data.Semigroup as Sem
import Prelude
data DbTable = AssociatedTable | ContentTable
deriving (Eq, Show)
data DbTablesChanged = DbTablesChanged
{ associatedTable :: Bool
, contentTable :: Bool
}
deriving (Show)
instance Sem.Semigroup DbTablesChanged where
a <> b = DbTablesChanged
{ associatedTable = associatedTable a || associatedTable b
, contentTable = contentTable a || contentTable b
}
instance Monoid DbTablesChanged where
mempty = DbTablesChanged False False
addDbTable :: DbTablesChanged -> DbTable -> DbTablesChanged
addDbTable ts AssociatedTable = ts { associatedTable = True }
addDbTable ts ContentTable = ts { contentTable = True }
isDbTableChanged :: DbTablesChanged -> DbTable -> Bool
isDbTableChanged ts AssociatedTable = associatedTable ts
isDbTableChanged ts ContentTable = contentTable ts

View file

@ -355,7 +355,8 @@ tryGitConfigRead autoinit r hasuuid
":" ++ show e
Annex.getState Annex.repo
s <- newLocal r
liftIO $ Annex.eval s $ check `finally` stopCoProcesses
liftIO $ Annex.eval s $ check
`finally` quiesce True
failedreadlocalconfig = do
unless hasuuid $ case Git.remoteName r of
@ -449,7 +450,6 @@ dropKey' repo r st@(State connpool duc _ _ _) key
Annex.Content.lockContentForRemoval key cleanup $ \lock -> do
Annex.Content.removeAnnex lock
cleanup
Annex.Content.saveState True
, giveup "remote does not have expected annex.uuid value"
)
| Git.repoIsHttp repo = giveup "dropping from http remote not supported"
@ -577,11 +577,9 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate
let checksuccess = liftIO checkio >>= \case
Just err -> giveup err
Nothing -> return True
res <- logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file $ \dest ->
logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file $ \dest ->
metered (Just (combineMeterUpdate meterupdate p)) key bwlimit $ \_ p' ->
copier object (fromRawFilePath dest) key p' checksuccess verify
Annex.Content.saveState True
return res
)
unless res $
giveup "failed to send content to remote"
@ -606,7 +604,7 @@ repairRemote r a = return $ do
Annex.eval s $ do
Annex.BranchState.disableUpdate
ensureInitialized (pure [])
a `finally` stopCoProcesses
a `finally` quiesce True
data LocalRemoteAnnex = LocalRemoteAnnex Git.Repo (MVar [(Annex.AnnexState, Annex.AnnexRead)])
@ -618,8 +616,8 @@ mkLocalRemoteAnnex repo = LocalRemoteAnnex repo <$> liftIO (newMVar [])
{- Runs an action from the perspective of a local remote.
-
- The AnnexState is cached for speed and to avoid resource leaks.
- However, coprocesses are stopped after each call to avoid git
- processes hanging around on removable media.
- However, it is quiesced after each call to avoid git processes
- hanging around on removable media.
-
- The remote will be automatically initialized/upgraded first,
- when possible.
@ -655,7 +653,7 @@ onLocal' (LocalRemoteAnnex repo mv) a = liftIO (takeMVar mv) >>= \case
go ((st, rd), a') = do
curro <- Annex.getState Annex.output
let act = Annex.run (st { Annex.output = curro }, rd) $
a' `finally` stopCoProcesses
a' `finally` quiesce True
(ret, (st', _rd)) <- liftIO $ act `onException` cache (st, rd)
liftIO $ cache (st', rd)
return ret

View file

@ -225,7 +225,7 @@ gen r u rc gc rs = do
, renameExport = renameExportS3 hdl this rs info
}
, importActions = ImportActions
{ listImportableContents = listImportableContentsS3 hdl this info
{ listImportableContents = listImportableContentsS3 hdl this info c
, importKey = Nothing
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierS3 hdl this rs info
, storeExportWithContentIdentifier = storeExportWithContentIdentifierS3 hdl this rs info magic
@ -561,8 +561,8 @@ renameExportS3 hv r rs info k src dest = Just <$> go
srcobject = T.pack $ bucketExportLocation info src
dstobject = T.pack $ bucketExportLocation info dest
listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
listImportableContentsS3 hv r info =
listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> ParsedRemoteConfig -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
listImportableContentsS3 hv r info c =
withS3Handle hv $ \case
Nothing -> giveup $ needS3Creds (uuid r)
Just h -> Just <$> go h
@ -571,6 +571,8 @@ listImportableContentsS3 hv r info =
ic <- liftIO $ runResourceT $ extractFromResourceT =<< startlist h
return (ImportableContentsComplete ic)
fileprefix = T.pack <$> getRemoteConfigValue fileprefixField c
startlist h
| versioning info = do
rsp <- sendS3Handle h $
@ -578,7 +580,8 @@ listImportableContentsS3 hv r info =
continuelistversioned h [] rsp
| otherwise = do
rsp <- sendS3Handle h $
S3.getBucket (bucket info)
(S3.getBucket (bucket info))
{ S3.gbPrefix = fileprefix }
continuelistunversioned h [] rsp
continuelistunversioned h l rsp
@ -586,6 +589,7 @@ listImportableContentsS3 hv r info =
rsp' <- sendS3Handle h $
(S3.getBucket (bucket info))
{ S3.gbMarker = S3.gbrNextMarker rsp
, S3.gbPrefix = fileprefix
}
continuelistunversioned h (rsp:l) rsp'
| otherwise = return $
@ -597,6 +601,7 @@ listImportableContentsS3 hv r info =
(S3.getBucketObjectVersions (bucket info))
{ S3.gbovKeyMarker = S3.gbovrNextKeyMarker rsp
, S3.gbovVersionIdMarker = S3.gbovrNextVersionIdMarker rsp
, S3.gbovPrefix = fileprefix
}
continuelistversioned h (rsp:l) rsp'
| otherwise = return $

View file

@ -130,9 +130,9 @@ tests n crippledfilesystem adjustedbranchok opts =
: concatMap mkrepotests testmodes
where
testmodes = catMaybes
[ canadjust ("v8 adjusted unlocked branch", (testMode opts (RepoVersion 8)) { adjustedUnlockedBranch = True })
, unlesscrippled ("v8 unlocked", (testMode opts (RepoVersion 8)) { unlockedFiles = True })
, unlesscrippled ("v8 locked", testMode opts (RepoVersion 8))
[ canadjust ("v10 adjusted unlocked branch", (testMode opts (RepoVersion 10)) { adjustedUnlockedBranch = True })
, unlesscrippled ("v10 unlocked", (testMode opts (RepoVersion 10)) { unlockedFiles = True })
, unlesscrippled ("v10 locked", testMode opts (RepoVersion 10))
]
remotetestmode = testMode opts (RepoVersion 8)
unlesscrippled v

View file

@ -270,9 +270,10 @@ data ExportActions a = ExportActions
-- Can throw exception if unable to access remote, or if remote
-- refuses to remove the content.
, removeExport :: Key -> ExportLocation -> a ()
-- Set when the content of a Key stored in the remote to an
-- ExportLocation and then removed with removeExport remains
-- accessible to retrieveKeyFile and checkPresent.
-- Set when the remote is versioned, so once a Key is stored
-- to an ExportLocation, a subsequent deletion of that
-- ExportLocation leaves the key still accessible to retrieveKeyFile
-- and checkPresent.
, versionedExport :: Bool
-- Removes an exported directory. Typically the directory will be
-- empty, but it could possibly contain files or other directories,

View file

@ -16,6 +16,7 @@ import Types.Upgrade
import Annex.CatFile
import qualified Database.Keys
import qualified Database.Keys.SQL
import Database.Keys.Tables
import qualified Git.LsFiles as LsFiles
import qualified Git
import Git.FilePath
@ -114,8 +115,9 @@ populateKeysDb = unlessM isBareRepo $ do
Nothing -> noop
Just k -> do
topf <- inRepo $ toTopFilePath $ toRawFilePath f
Database.Keys.runWriter $ \h -> liftIO $ do
Database.Keys.runWriter AssociatedTable $ \h -> liftIO $
Database.Keys.SQL.addAssociatedFile k topf h
Database.Keys.runWriter ContentTable $ \h -> liftIO $
Database.Keys.SQL.addInodeCaches k [ic] h
liftIO $ void cleanup
Database.Keys.closeDb

View file

@ -57,9 +57,9 @@ describePasswordPrompt' :: Maybe SuCommand -> Maybe String
describePasswordPrompt' (Just (SuCommand p _ _)) = describePasswordPrompt p
describePasswordPrompt' Nothing = Nothing
runSuCommand :: (Maybe SuCommand) -> IO Bool
runSuCommand (Just (SuCommand _ cmd ps)) = boolSystem cmd ps
runSuCommand Nothing = return False
runSuCommand :: (Maybe SuCommand) -> Maybe [(String, String)] -> IO Bool
runSuCommand (Just (SuCommand _ cmd ps)) environ = boolSystemEnv cmd ps environ
runSuCommand Nothing _ = return False
-- Generates a SuCommand that runs a command as root, fairly portably.
--

View file

@ -0,0 +1,51 @@
### Please describe the problem.
### What steps will reproduce the problem?
I have system:
Linux RPI-4B 5.15.74-2-MANJARO-ARM-RPI #1 SMP PREEMPT Thu Oct 20 16:43:17 UTC 2022 aarch64 GNU/Linux
Doesnt't have git-annex in aur and pacman.
So download git-annex-standalone-arm64.tar.gz from web.
./runshell
```git-annex enable-tor (adjusted/master(unlocked)+2) 10:25:22
enable-tor
You may be prompted for a password
git-annex: Failed to run as root: /home/gyurmo/.local/git-annex.linux/bin/git-annex enable-tor 1000
failed
enable-tor: 1 failed```
sudo /home/gyurmo/.local/git-annex.linux/bin/git-annex enable-tor 1000
[sudo] gyurmo jelszava:
/home/gyurmo/.local/git-annex.linux/bin/git-annex: sor: 4: /exe/git-annex: No such file or directory
### What version of git-annex are you using? On what operating system?
git-annex version: 10.20220121-g0bcb94487
build flags: Assistant Webapp Pairing Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite S3 WebDAV
dependency versions: aws-0.22 bloomfilter-2.0.1.0 cryptonite-0.26 DAV-1.3.4 feed-1.3.0.1 ghc-8.8.4 http-client-0.6.4.1 persistent-sqlite-2.10.6.2 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.1.0
key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL X*
remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs httpalso borg hook external
operating system: linux aarch64
supported repository versions: 8 9 10
upgrade supported from repository versions: 0 1 2 3 4 5 6 7 8 9 10
local repository version: 8
### Please provide any additional information below.
[[!format sh """
# If you can, paste a complete transcript of the problem occurring here.
# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log
# End of transcript or log.
"""]]
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
> [[fixed|done]] --[[Joey]]

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2022-10-26T18:53:40Z"
content="""
Using this command instead should work:
sudo /home/gyurmo/.local/git-annex.linux/git-annex enable-tor 1000
"""]]

View file

@ -0,0 +1,7 @@
[[!comment format=mdwn
username="joey"
subject="""comment 2"""
date="2022-10-26T19:44:24Z"
content="""
And I've fixed this problem now.
"""]]

View file

@ -0,0 +1,39 @@
(Sorry for the uninformative title, but I had to work within the character limit.)
### Please describe the problem.
`git-annex metadata` does nothing on Windows if invoked while `git-annex addurl` is in progress on other files.
### What steps will reproduce the problem?
On Windows (but not on Linux or macOS, where everything works fine):
- Start `git-annex addurl` in batch mode
- Feed it two or more URLs
- After reading the completion message for a URL from addurl's stdout, but before reading the remaining output, run `git-annex metadata` in batch mode and try to set the metadata for the file that was just downloaded.
- `git-annex metadata` will output an empty line (i.e., just CR LF), and if nothing further is fed to it, it will exit successfully without printing anything else on stdout or stderr.
- Querying the file's metadata normally after `git-annex addurl` exits will show that no metadata was set for the file.
The Python script at <https://github.com/jwodder/git-annex-bug-20221024/blob/master/mvce.py> (Python 3.8+ required) will run the above steps and show the output from `git-annex metadata`. A sample run under GitHub Actions can be seen at <https://github.com/jwodder/git-annex-bug-20221024/actions/runs/3322463020/jobs/5491516209>; note the following section of the output under "Run script":
```
16:04:04 [DEBUG ] __main__: Opening pipe to: git-annex metadata --batch --json --json-error-messages
16:04:04 [DEBUG ] __main__: Input to metadata: b'{"file": "programming/gameboy.pdf", "fields": {"title": ["GameBoy Programming Manual"]}}\n'
16:04:04 [DEBUG ] __main__: r.returncode=0
16:04:04 [DEBUG ] __main__: r.stdout=b'\r\n'
16:04:04 [DEBUG ] __main__: r.stderr=b''
```
This problem does not always occur, but it seems to occur most of the time. Using `git-annex registerurl` in place of `git-annex metadata` works fine.
### What version of git-annex are you using? On what operating system?
git-annex 10.20221003, provided by datalad/git-annex, on Microsoft Windows Server 2022
### Please provide any additional information below.
This affects a hobby project of mine "gamdam", implemented in [Python](https://github.com/jwodder/gamdam) and [Rust](https://github.com/jwodder/gamdam-rust) — that interacts with git-annex.
[[!meta author=jwodder]]
> [[fixed|done]], see my comments --[[Joey]]

View file

@ -0,0 +1,42 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2022-10-26T16:44:21Z"
content="""
Windows is not needed, this will happen in a
repository where `git annex adjust --unlock` has been run.
A simpler example:
joey@darkstar:~/tmp/t2#master(unlocked)>git-annex addurl --batch
http://google.com/
addurl http://google.com/
(to google.com_) ok
^Z
joey@darkstar:~/tmp/t2#master(unlocked)>git-annex metadata --batch --json
{"file":"google.com_","fields":{"author":["bar"]}}
I'm not sure if this is a bug, because it's documented to output a blank
line when batch mode is provided a file that is not an annexed file, and
the file is not an annexed file yet due to the pointer not yet having been
staged in git. Which is needed, when in an adjusted unlocked branch, for
git-annex to know that this is an annexed file.
When the file is locked, it just stats the symlink, so the fact that the
symlink is not yet staged in git doesn't matter.
It does not seem to make sense to have addurl update the index
after each file it adds, because that would make addurl of a lot
of files unncessarily slow.
So, I think if anything is changed, it would need to be a change to make
the behavior with unlocked files consistent with the behavior with locked
files. Eg, when the symlink is not yet staged in git, treat it as a
non-annexed file. Which is also consistent with other handling of such
files by git-annex when not in batch mode.
The solution for your program, though, seems like it will be to end the
git-annex addurl process before trying to set metadata on just-added files.
Or, alternatively, to use addurl with --json, extract the key, and set the
metadata of the key.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="joey"
subject="""comment 2"""
date="2022-10-26T18:13:42Z"
content="""
I've made --batch handling of unstaged locked files consistent with the
handling of unstaged unlocked files.
"""]]

View file

@ -0,0 +1,16 @@
### Please describe the problem.
Using the S3 special remote with version support enabled (`versioning=yes`) leads to a bit of a strange effect with multiple `annex import` calls.
First import from an existing bucket is fine. Now, if there were changes done to a file in the bucket and one runs `annex import` again, git-annex will record that the old version of said file is gone from the remote, requiring a `git annex fsck` to be called right after that import to fix it again. That seems a bit strange given that this versioning support comes with a "native" special remote.
I suppose that's probably a more general issue with how import/export works, since there's no way for a special remote to communicate to annex whether two different versions of the same file (same remote path, but different key) would overwrite each other. Neither an importtree nor an exporttree remote has a way to tell annex whether and how the availability of a previous key associated with the same remote path was affected.
### What version of git-annex are you using? On what operating system?
Observed with several versions from 8-10 on linux. As laid out above, I strongly suspect this is true for all versions.
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
Lots. I love git-annex.
> [[fixed|done]] --[[Joey]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2022-10-11T16:28:39Z"
content="""
This looks like a simple fix. After importing from a versioned remote,
it can just skip updating the location logs to remove the keys that are not
present in the current tree. The same as is already done when exporting
to a versioned remote. I've made that change.
"""]]

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="benjamin.poldrack@d09ccff6d42dd20277610b59867cf7462927b8e3"
nickname="benjamin.poldrack"
avatar="http://cdn.libravatar.org/avatar/5c1a901caa7c2cfeeb7e17e786c5230d"
subject="comment 2"
date="2022-10-12T06:12:17Z"
content="""
Thank you!
"""]]

View file

@ -19,4 +19,10 @@ an ever-growing amount of memory, and be slowed down by the write attempts.
Still, it does give it something better to do while the write is failing
than sleeping and retrying, eg to do the rest of the work it's been asked
to do.
(Update: Reads from a database first call flushDbQueue, and it would
not be safe for that to return without actually writing to the database,
since the read would then see possible stale information. It turns
out that `git-annex get` does do a database read per file (getAssociatedFiles).
So it seems this approach will not work.)
"""]]

View file

@ -0,0 +1,42 @@
[[!comment format=mdwn
username="joey"
subject="""comment 25"""
date="2022-10-11T17:17:13Z"
content="""
Revisiting this, I don't understand why the htop at the top of this page
lists so many `git-annex get` processes. They all seem to be children
of a single parent `git-annex get` process. But `git-annex get` does
not fork children like those. I wonder if perhaps those child
processes were forked() to do something else, but somehow hung before
they could exec?!
By comment #2, annex.stalldetection is enabled, so `git-annex get`
runs 5 `git-annex transferrer` processes. Each of those can write to the
database, so concurrent sqlite writes can happen. So, my "re: comment 16"
comment was off-base in thinking there was a single git-annex process.
And so, I don't think the debug info requested in that comment is needed.
Also, it turns out that the database queue is being flushed after every
file it gets, which is causing a sqlite write per file. So there are a
lot of sqlite writes happening, which probably makes this issue much more
likely to occur, on systems with slow enough disk IO that it does occur.
Especially if the files are relatively small.
The reason for the queue flush is partly that Annex.run forces a queue
flush after every action. That could, I think be avoided. That was only
done to make sure the queue is flushed before the program exits, which
should be able to be handled in a different way. But also,
the queue has to be flushed before reading from the database in order
for the read to see current information. In the `git-annex get` case,
it queues a change to the inode cache, and then reads the associated
files. To avoid that, it would need to keep track of the two different
tables in the keys db, and flush the queue only when querying a table
that a write had been queued to. That would be worth doing
just to generally speed up `git-annex get`. A quick benchmark shows
a get of 1000 small files that takes 17s will only take 12s once that's
done. And that's on a fast SSD, probably much more on a hard drive!
So I don't have a full solution, but speeding git-annex up significantly and
also making whatever the problem in this bug is probably much less likely
to occur is a good next step..
"""]]

View file

@ -0,0 +1,16 @@
[[!comment format=mdwn
username="joey"
subject="""comment 26"""
date="2022-10-12T19:34:05Z"
content="""
I've avoided the excessive sqlite database writes. Which doubled the speed
of git-annex in some circumstances, wow!
@yoh see if it still happens once you upgrade to a git-annex
with [[!commit 6fbd337e34cd62881ed5e21616e9c00d7bf35378]].
It would be possible for git-annex to do its own locking around writes
to the sqlite database. That would surely avoid any problem that sqlite might
have that would cause ErrorBusy. I want to think some about
[[todo/withExclusiveLock_blocking_issue]] first.
"""]]

View file

@ -0,0 +1,13 @@
[[!comment format=mdwn
username="joey"
subject="""comment 27"""
date="2022-10-17T18:49:47Z"
content="""
I've made it retry as long as necessary on ErrorBusy, while also noticing
when another process is suspended and has the sqlite database locked,
and avoiding retrying forever in that situation.
This seems to be as far as I can take this bug report, I don't know
100% for sure if I've fixed it, but git-annex's behavior should certainly
be improved.
"""]]

View file

@ -150,6 +150,13 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`.
Indicates that `IMPORTKEY` can be used.
* `IMPORTKEYSUPPORTED-FAILURE`
Indicates that `IMPORTKEY` cannot be used.
* `VERSIONED`
Used to check if the special remote is versioned.
Note that this request may be made before or after `PREPARE`.
* `ISVERSIONED`
Indicates that the remote is versioned.
* `NOTVERSIONED`
Indicates that the remote is not versioned.
* `LISTIMPORTABLECONTENTS`
Used to get a list of all the files that are stored in the special
remote. A block of responses
@ -170,6 +177,8 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`.
be nested multiple levels deep.
This should only be used when the remote supports using
"TRANSFER RECEIVE Key" to retrieve historical versions of files.
And, it should only be used when the remote replies `ISVERSIONED`
to the `VERSIONED` message.
* `END`
Indicates the end of a block of responses.
* `LOCATION Name`

View file

@ -0,0 +1,23 @@
Hi,
I've setup a remote on Github with LFS enabled. Running `git-annex sync --content` failed to push on the main branch :
```
pull lfstest
From github.com:myuser/myremote
* branch HEAD -> FETCH_HEAD
ok
push lfstest
Everything up-to-date
To github.com:myuser/myremote.git
! [rejected] main -> main (non-fast-forward)
error: failed to push some refs to 'github.com:myuser/myremote.git'
hint: Updates were rejected because the tip of your current branch is behind
hint: its remote counterpart. Integrate the remote changes (e.g.
hint: 'git pull ...') before pushing again.
hint: See the 'Note about fast-forwards' in 'git push --help' for details.
ok
```
Is this an expected behaviour ? Is it possible to correct the push failure ?
Thansk in advance

View file

@ -0,0 +1,13 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2022-10-17T16:28:48Z"
content="""
This doesn't involve LFS at all, it's a regular git branch being pushed in
the regular way. So you can certianly solve the problem with some
combination of `git pull`, `git merge`, and `git push`.
That said, I don't know why `git-annex sync` didn't work in your situation.
I created some test git-lfs repos on github and never saw any difficulty
syncing with them.
"""]]

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="AlexPraga"
avatar="http://cdn.libravatar.org/avatar/7c4e10fd352b81279b405f9f5337cdb7"
subject="comment 2"
date="2022-10-22T15:12:26Z"
content="""
Thanks for answering. I managed to correct it with force-pushing it : `git push lfs main -f`.
Not sure why it did not work before but it seems to be working now.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="xloem"
avatar="http://cdn.libravatar.org/avatar/b8c087f7c5e6a9358748f0727c077f3b"
subject="ipfs"
date="2022-10-31T13:36:51Z"
content="""
It would be nice if the rpm or repository included auxiliarity scripts such as git-annex-remote-ipfs so that these would get installed/uninstalled/upgraded alongside the main project.
"""]]

View file

@ -1,23 +0,0 @@
git-annex 10.20220624 released with [[!toggle text="these changes"]]
[[!toggleable text=""" * init: Added --no-autoenable option.
* info: Added --autoenable option.
* initremote: Improve handling of type=git special remotes.
The location value no longer needs to match the url of an existing
git remote, and locations not using ssh:// will work now, including
both paths and host:/path
* Fix retrival of an empty file that is stored in a special remote with
chunking enabled.
(Fixes a reversion in 8.20201103)
* move: Improve resuming a move that succeeded in transferring the
content, but where dropping failed due to eg a network problem,
in cases where numcopies checks prevented the resumed
move from dropping the object from the source repository.
* add, fix, lock, rekey: When several files were being processed,
replacing an annex symlink of a file that was already processed
with a new large file could sometimes cause that large file to be
added to git. These races have been fixed.
* add: Also fix a similar race that could cause a large file be added
to git when a small file was modified or overwritten while it was
being added.
* add --batch: Fix handling of a file that is skipped due to being
gitignored."""]]

View file

@ -0,0 +1,22 @@
git-annex 10.20221103 released with [[!toggle text="these changes"]]
[[!toggleable text=""" * Doubled the speed of git-annex drop when operating on many files,
and of git-annex get when operating on many tiny files.
* trust, untrust, semitrust, dead: Fix behavior when provided with
multiple repositories to operate on.
* trust, untrust, semitrust, dead: When provided with no parameters,
do not operate on a repository that has an empty name.
* move: Fix openFile crash with -J
(Fixes a reversion in 8.20201103)
* S3: Speed up importing from a large bucket when fileprefix= is set,
by only asking for files under the prefix.
* When importing from versioned remotes, fix tracking of the content
of deleted files.
* More robust handling of ErrorBusy when writing to sqlite databases.
* Avoid hanging when a suspended git-annex process is keeping a sqlite
database locked.
* Make --batch mode handle unstaged annexed files consistently
whether the file is unlocked or not. Note that this changes the
behavior of --batch when it is provided with locked files that are
in the process of being added to the repository, but have not yet been
staged in git.
* Make git-annex enable-tor work when using the linux standalone build."""]]

View file

@ -3,7 +3,7 @@ The following steps are tested on Windows 10 21h1 with Ubuntu 20 and are designe
** Limitations **
* The repository must be created with `annex.tune.objecthashlower=true`.
* `git annex adjust --unlock` will not work. Unlocked files will work most of the time. Avoid `annex.addunlocked=true` because it is likely to not work.
* `git annex adjust --unlock` will not work. Avoid `annex.addunlocked=true` and do not add multiple unlocked files to the index.
**Setup**
@ -13,7 +13,7 @@ The following steps are tested on Windows 10 21h1 with Ubuntu 20 and are designe
* `git config annex.sshcaching false`
* `git annex init`
* git-annex should not detect the filesystem as crippled but now set `git config annex.crippledfilesystem true`
* Safety of locked files will require these settings and scripts and the patch below.
* Safety of locked files will require these settings and scripts.
* `git config annex.freezecontent-command 'wsl-freezecontent %path'`
* `git config annex.thawcontent-command 'wsl-thawcontent %path'`
@ -71,8 +71,123 @@ fi
** Patches **
These patches may introduce problems when there are multiple independent processes writing to the repository. Use at your own risk.
<details>
<summary>This patch allows `git annex fix` on a crippled file system.</summary>
<summary>Create symlink to annexed objects in-place. The add, addunused, lock, and rekey commands will create symlinks in-place instead of in a temporary directory.</summary>
```
From d871289d22d2e86cb62776841343baf6c0f83484 Mon Sep 17 00:00:00 2001
From: Reiko Asakura <asakurareiko@protonmail.ch>
Date: Wed, 12 Oct 2022 17:13:55 -0400
Subject: [PATCH 2/3] Create symlink to annexed objects in-place
---
Annex/Ingest.hs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs
index 89dc8acea..ec35fb15d 100644
--- a/Annex/Ingest.hs
+++ b/Annex/Ingest.hs
@@ -301,7 +301,7 @@ restoreFile file key e = do
makeLink :: RawFilePath -> Key -> Maybe InodeCache -> Annex LinkTarget
makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do
l <- calcRepo $ gitAnnexLink file key
- replaceWorkTreeFile file' $ makeAnnexLink l . toRawFilePath
+ makeAnnexLink l file
-- touch symlink to have same time as the original file,
-- as provided in the InodeCache
--
2.30.2
```
</details>
<details>
<summary>Recreate symlinks after remote transfer. The copy, move, get, sync commands will recreate the symlink after transferring the file from a remote.</summary>
```
From 82ea0ffb02fbc5e4003a466a216c8d1030b7d70a Mon Sep 17 00:00:00 2001
From: Reiko Asakura <asakurareiko@protonmail.ch>
Date: Wed, 12 Oct 2022 19:10:07 -0400
Subject: [PATCH 3/3] Recreate symlinks after remote transfer
---
Annex/Link.hs | 7 +++++++
Command/Get.hs | 3 ++-
Command/Move.hs | 3 ++-
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/Annex/Link.hs b/Annex/Link.hs
index 1a344d07e..e0f172d1d 100644
--- a/Annex/Link.hs
+++ b/Annex/Link.hs
@@ -96,6 +96,13 @@ getAnnexLinkTarget' file coresymlinks = if coresymlinks
then mempty
else s
+relinkAssociatedFile :: AssociatedFile -> Bool -> Annex ()
+relinkAssociatedFile (AssociatedFile (Just file)) True =
+ getAnnexLinkTarget file >>= \case
+ Just target -> makeAnnexLink target file
+ _ -> noop
+relinkAssociatedFile _ _ = noop
+
makeAnnexLink :: LinkTarget -> RawFilePath -> Annex ()
makeAnnexLink = makeGitLink
diff --git a/Command/Get.hs b/Command/Get.hs
index a25fd8bf1..e16362f79 100644
--- a/Command/Get.hs
+++ b/Command/Get.hs
@@ -12,6 +12,7 @@ import qualified Remote
import Annex.Transfer
import Annex.NumCopies
import Annex.Wanted
+import Annex.Link
import qualified Command.Move
cmd :: Command
@@ -95,7 +96,7 @@ getKey' key afile = dispatch
showNote "not available"
showlocs []
return False
- dispatch remotes = notifyTransfer Download afile $ \witness -> do
+ dispatch remotes = observe (relinkAssociatedFile afile) $ notifyTransfer Download afile $ \witness -> do
ok <- pickRemote remotes $ \r -> ifM (probablyPresent r)
( docopy r witness
, return False
diff --git a/Command/Move.hs b/Command/Move.hs
index 55fed5c37..d733a7cbb 100644
--- a/Command/Move.hs
+++ b/Command/Move.hs
@@ -20,6 +20,7 @@ import Logs.Presence
import Logs.Trust
import Logs.File
import Annex.NumCopies
+import Annex.Link
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
@@ -241,7 +242,7 @@ fromPerform src removewhen key afile = do
then dispatch removewhen deststartedwithcopy True
else dispatch removewhen deststartedwithcopy =<< get
where
- get = notifyTransfer Download afile $
+ get = observe (relinkAssociatedFile afile) $ notifyTransfer Download afile $
download src key afile stdRetry
dispatch _ deststartedwithcopy False = do
--
2.30.2
```
</details>
<details>
<summary>Allow git-annex fix on crippled filesystem</summary>
```
From 65fe6e362dfbf2f54c8da5ca17c59af26de5ff83 Mon Sep 17 00:00:00 2001
@ -105,7 +220,7 @@ index 39853c894..2d66c1461 100644
** Usage tips **
* WSL1 will not create symlinks that work in Windows if created before the target file exists, such as after `git annex add` or `git annex get`. This can be fixed by recreating them with any method, such as delete them and `git checkout`.
* WSL1 will not create symlinks that work in Windows if created before the target file exists. This can be fixed by recreating them with any method, such as delete them and `git checkout`. Also see the above patches to make git-annex automatically recreate symlinks.
<details>
<summary>Sample script to recreate all symlinks under the current directory</summary>
@ -129,10 +244,7 @@ do(pathlib.Path('.'))
```
</details>
* Sometimes there will SQLite errors using multiple jobs but retrying will work most of the time.
** Related bugs **
* [[bugs/WSL_adjusted_braches__58___smudge_fails_with_sqlite_thread_crashed_-_locking_protocol]]
* [[bugs/WSL1__58___git-annex-add_fails_in_DrvFs_filesystem]]
* [[bugs/problems_with_SSH_and_relative_paths]]

View file

@ -49,9 +49,11 @@ This should display something like:
Once you are sure things went on okay, you can synchronise this with `marcos`:
git annex sync
git annex sync --allow-unrelated-histories
This will push the metadata information to marcos, so it knows which files are available on `angela`. From there on, you can freely get and move files between the two repos!
This will push the metadata information to marcos, so it knows which files
are available on `angela`. From there on, you can freely get and move files
between the two repos!
Importing files from a third directory
--------------------------------------
@ -61,7 +63,7 @@ Say that some files on `angela` are actually spread out outside of the `~/mp3` d
cd ~/mp3
git annex import ~/music/
(!) Be careful that `~/music` is not a git-annex repository, or this will [[destroy it!|bugs/git annex import destroys a fellow git annex repository]].
(!) Be careful that `~/music` is not a git-annex repository.
Deleting deleted files
----------------------
@ -73,7 +75,3 @@ It is quite possible some files were removed (or renamed!) on `marcos` but not o
This will show files that are on `angela` and not on `marcos`. They could be new files that were only added on `angela`, so be careful! A manual analysis is necessary, but let's say you are certain those files are not relevant anymore, you can delete them from `angela`:
git annex drop <file>
If the file is a renamed or modified version from the original, you may need to use `--force`, but be careful! If you delete the wrong file, it will be lost forever!
> (!) Maybe this wouldn't happen with [[direct mode]] and an fsck? --[[anarcat]]

View file

@ -0,0 +1,59 @@
[[!comment format=mdwn
username="Stefan"
avatar="http://cdn.libravatar.org/avatar/1474db4b030b82320e3bd5e899ef2bad"
subject="This guide fails with &quot;fatal: refusing to merge unrelated histories&quot;"
date="2022-10-29T10:28:18Z"
content="""
This no longer works, here is a MWE to copy-paste (uses /tmp/{A,B}):
```
mkdir /tmp/A && touch /tmp/A/bigfile
mkdir /tmp/B && touch /tmp/B/bigfile
cd /tmp/A
git init
git annex init
git annex add .
git commit -m \"git annex yay\"
cd /tmp/B
git init
git remote add A /tmp/A
git fetch A
git annex info # this should display the two repos
git annex add .
git annex whereis
git annex sync
```
This fails with
```
commit
[main (root-commit) e9435bf] git-annex in stefan@notebook:/tmp/B
1 file changed, 1 insertion(+)
create mode 120000 bigfile
ok
pull A
fatal: refusing to merge unrelated histories
failed
push A
Enumerating objects: 19, done.
Counting objects: 100% (19/19), done.
Delta compression using up to 8 threads
Compressing objects: 100% (11/11), done.
Writing objects: 100% (14/14), 1.37 KiB | 1.37 MiB/s, done.
Total 14 (delta 2), reused 0 (delta 0), pack-reused 0
To /tmp/A
* [new branch] main -> synced/main
* [new branch] git-annex -> synced/git-annex
To /tmp/A
! [rejected] main -> main (non-fast-forward)
error: failed to push some refs to '/tmp/A'
hint: Updates were rejected because the tip of your current branch is behind
hint: its remote counterpart. Integrate the remote changes (e.g.
hint: 'git pull ...') before pushing again.
hint: See the 'Note about fast-forwards' in 'git push --help' for details.
ok
sync: 1 failed
```
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="joey"
subject="""comment 2"""
date="2022-10-31T16:09:12Z"
content="""
Indeed, you will need to use `git-annex sync --allow-unrelated-histories`
now in that situation. I have updated the tip.
"""]]

View file

@ -0,0 +1,27 @@
[[!comment format=mdwn
username="joey"
subject="""comment 8"""
date="2022-10-10T21:04:49Z"
content="""
I've finished the work on aws, which is in
<https://github.com/aristidb/aws/pull/281> and I hope will be merged soon.
git-annex now has a branch `anons3` that implements this, when
the S3 remote is configured with signature=anonymous.
$ git-annex initremote s3-origin type=S3 importtree=yes encryption=none bucket=dandiarchive fileprefix=zarr-checksums/2ac71edb-738c-40ac-bd8c-8ca985adaa12/ signature=anonymous
initremote s3-origin (checking bucket...) ok
(recording state in git...)
$ git-annex import master --from s3-origin
list s3-origin ok
import s3-origin .checksum
ok
import s3-origin 0/.checksum
ok
import s3-origin 0/0/.checksum
ok
^C
Also, I've fixed it to only list files in the fileprefix, which
sped up the listing a *lot* in this bucket with many other files..
"""]]

View file

@ -0,0 +1,83 @@
Some parts of git-annex wait for an exclusive lock, and once they take it,
hold it while performing an operation. Now consider what happens if the
git-annex process is suspended. Another git-annex process that is running
and that waits to take the same exclusive lock (or a shared lock of the
same file) will stall forever, until the git-annex process is resumed.
These time windows tend to be small, but may not always be.
Is there any better way git-annex could handle this? Is it a significant
problem at all? I don't think I've ever seen it happen, but I rarely ^Z
git-annex either. How do other programs handle this, if at all?
--[[Joey]]
----
Would it be better for the second git-annex process, rather than hanging
indefinitely, to timeout after a few seconds?
But how many seconds? What if the system is under heavy load?
> What could be done is, update the lock's file's mtime after successfully
> taking the lock. Then, as long as the mtime is advancing, some other
> process is actively using it, and it's ok for our process to wait
> longer.
>
> (Updating the mtime would be a problem when locking annex object files
> in v9 and earlier. Luckily, that locking is not done with a blocking
> lock anyway.)
> If the lock file's mtime is being checked, the process that is
> blocking with the lock held could periodically update the mtime.
> A background thread could manage that. If that's done every ten seconds,
> then an mtime more than 20 seconds old indicates that the lock is
> held by a suspended process. So git-annex would stall for up to 20-30
> seconds before erroring out when a lock is held by a suspended process.
> That seems acceptible, it doesn't need to deal with this situation
> instantly, it just needs to not block indefinitely. And updating the
> mtime every 10 seconds should not be too much IO.
>
> When an old version of git-annex has the lock held, it won't be updating
> the mtime. So if it takes longer than 10 seconds to do the operation with
> the lock held, a new version may complain that it's suspended when it's
> really not. This could be avoided by checking what process holds the
> lock, and whether it's suspended. But probably 10 seconds is enough
> time for all the operations git-annex takes a blocking lock for
> currently to finish, and if so we don't need to worry about this situation?
>
> > Unfortunately not: importKeys takes an exclusive lock and holds it while
> > downloading all the content! This seems like a bug though, because it can
> > cause other git-annex processes that are eg storing content in a remote
> > to block for a long time.
> >
> > Another one is Database.Export.writeLockDbWhile, which takes an
> > exclusive lock while running eg, Command.Export.changeExport,
> > which may sometimes need to do a lot of work.
> >
> > Another one is Annex.Queue.flush, which probably mostly runs in under
> > 10 seconds, but maybe not always, and when annex.queuesize is changed,
> > could surely take longer.
>
> To avoid problems when old git-annex's are also being used, it could
> update and check the mtime of a different file than the lock file.
>
> Start by trying to take the lock for up to 10 seconds. If it takes the
> lock, create the mtime file and start a thread that updates the mtime
> every 10 seconds until the lock is closed, and delete the mtime file
> before closing the lock handle.
>
> When it times out taking the lock, if the mtime file does not exist, an
> old git-annex has the lock; if the mtime file does exist, then check
> if its timestamp has advanced; if not then a new git-annex has the lock
> and is suspended and it can error out.
>
> Oops: There's a race in the method above; a timeout may occur
> right when the other process has taken the lock, but has not updated
> the mtime file yet. Then that process would incorrectly be treated
> as an old git-annex process.
>
> So: To support old git-annex, it seems it will need to check, when the
> lock is held, what process has the lock. And then check if that process
> is suspended or not. Which means looking in /proc. Ugh.
>
> Or: Change to checking lock mtimes only in git-annex v11..

View file

@ -1,5 +1,5 @@
Name: git-annex
Version: 10.20221003
Version: 10.20221103
Cabal-Version: 1.12
License: AGPL-3
Maintainer: Joey Hess <id@joeyh.name>
@ -830,6 +830,7 @@ Executable git-annex
Database.Init
Database.Keys
Database.Keys.Handle
Database.Keys.Tables
Database.Keys.SQL
Database.Queue
Database.Types