diff --git a/Annex.hs b/Annex.hs index efb3cc6a4b..9eb4c5f391 100644 --- a/Annex.hs +++ b/Annex.hs @@ -114,7 +114,7 @@ data AnnexState = AnnexState , fast :: Bool , daemon :: Bool , branchstate :: BranchState - , repoqueue :: Maybe Git.Queue.Queue + , repoqueue :: Maybe (Git.Queue.Queue Annex) , catfilehandles :: M.Map FilePath CatFileHandle , hashobjecthandle :: Maybe HashObjectHandle , checkattrhandle :: Maybe CheckAttrHandle diff --git a/Annex/AdjustedBranch.hs b/Annex/AdjustedBranch.hs index 131f1dd708..a7b9d91a44 100644 --- a/Annex/AdjustedBranch.hs +++ b/Annex/AdjustedBranch.hs @@ -224,8 +224,9 @@ adjustToCrippledFileSystem :: Annex () adjustToCrippledFileSystem = do warning "Entering an adjusted branch where files are unlocked as this filesystem does not support locked files." checkVersionSupported - whenM (isNothing <$> inRepo Git.Branch.current) $ - void $ inRepo $ Git.Branch.commitCommand Git.Branch.AutomaticCommit + whenM (isNothing <$> inRepo Git.Branch.current) $ do + cmode <- annexCommitMode <$> Annex.getGitConfig + void $ inRepo $ Git.Branch.commitCommand cmode [ Param "--quiet" , Param "--allow-empty" , Param "-m" @@ -310,12 +311,16 @@ commitAdjustedTree' :: Sha -> BasisBranch -> [Ref] -> Annex Sha commitAdjustedTree' treesha (BasisBranch basis) parents = go =<< catCommit basis where - go Nothing = inRepo mkcommit - go (Just basiscommit) = inRepo $ commitWithMetaData - (commitAuthorMetaData basiscommit) - (commitCommitterMetaData basiscommit) - mkcommit - mkcommit = Git.Branch.commitTree Git.Branch.AutomaticCommit + go Nothing = do + cmode <- annexCommitMode <$> Annex.getGitConfig + inRepo $ mkcommit cmode + go (Just basiscommit) = do + cmode <- annexCommitMode <$> Annex.getGitConfig + inRepo $ commitWithMetaData + (commitAuthorMetaData basiscommit) + (commitCommitterMetaData basiscommit) + (mkcommit cmode) + mkcommit cmode = Git.Branch.commitTree cmode adjustedBranchCommitMessage parents treesha {- This message should never be changed. -} @@ -444,7 +449,8 @@ mergeToAdjustedBranch tomerge (origbranch, adj) mergeconfig canresolvemerge comm reparent adjtree adjmergecommit (Just currentcommit) = do if (commitTree currentcommit /= adjtree) then do - c <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit + cmode <- annexCommitMode <$> Annex.getGitConfig + c <- inRepo $ Git.Branch.commitTree cmode ("Merged " ++ fromRef tomerge) [adjmergecommit] (commitTree currentcommit) inRepo $ Git.Branch.update "updating adjusted branch" currbranch c @@ -534,12 +540,14 @@ reverseAdjustedCommit commitparent adj (csha, basiscommit) origbranch | length (commitParent basiscommit) > 1 = return $ Left $ "unable to propigate merge commit " ++ show csha ++ " back to " ++ show origbranch | otherwise = do + cmode <- annexCommitMode <$> Annex.getGitConfig treesha <- reverseAdjustedTree commitparent adj csha revadjcommit <- inRepo $ commitWithMetaData (commitAuthorMetaData basiscommit) (commitCommitterMetaData basiscommit) $ - Git.Branch.commitTree Git.Branch.AutomaticCommit - (commitMessage basiscommit) [commitparent] treesha + Git.Branch.commitTree cmode + (commitMessage basiscommit) + [commitparent] treesha return (Right revadjcommit) {- Adjusts the tree of the basis, changing only the files that the diff --git a/Annex/Branch.hs b/Annex/Branch.hs index b033c059cf..faf11ce05a 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -109,8 +109,9 @@ getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha [Param "branch", Param $ fromRef name, Param $ fromRef originname] fromMaybe (error $ "failed to create " ++ fromRef name) <$> branchsha - go False = withIndex' True $ - inRepo $ Git.Branch.commitAlways Git.Branch.AutomaticCommit "branch created" fullname [] + go False = withIndex' True $ do + cmode <- annexCommitMode <$> Annex.getGitConfig + inRepo $ Git.Branch.commitAlways cmode "branch created" fullname [] use sha = do setIndexSha sha return sha @@ -317,7 +318,8 @@ commitIndex jl branchref message parents = do commitIndex' :: JournalLocked -> Git.Ref -> String -> String -> Integer -> [Git.Ref] -> Annex () commitIndex' jl branchref message basemessage retrynum parents = do updateIndex jl branchref - committedref <- inRepo $ Git.Branch.commitAlways Git.Branch.AutomaticCommit message fullname parents + cmode <- annexCommitMode <$> Annex.getGitConfig + committedref <- inRepo $ Git.Branch.commitAlways cmode message fullname parents setIndexSha committedref parentrefs <- commitparents <$> catObject committedref when (racedetected branchref parentrefs) $ @@ -551,7 +553,8 @@ performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do Annex.Queue.flush if neednewlocalbranch then do - committedref <- inRepo $ Git.Branch.commitAlways Git.Branch.AutomaticCommit message fullname transitionedrefs + cmode <- annexCommitMode <$> Annex.getGitConfig + committedref <- inRepo $ Git.Branch.commitAlways cmode message fullname transitionedrefs setIndexSha committedref else do ref <- getBranch @@ -657,9 +660,10 @@ rememberTreeish treeish graftpoint = lockJournal $ \jl -> do origtree <- fromMaybe (giveup "unable to determine git-annex branch tree") <$> inRepo (Git.Ref.tree branchref) addedt <- inRepo $ Git.Tree.graftTree treeish graftpoint origtree - c <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit + cmode <- annexCommitMode <$> Annex.getGitConfig + c <- inRepo $ Git.Branch.commitTree cmode "graft" [branchref] addedt - c' <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit + c' <- inRepo $ Git.Branch.commitTree cmode "graft cleanup" [c] origtree inRepo $ Git.Branch.update' fullname c' -- The tree in c' is the same as the tree in branchref, diff --git a/Annex/Concurrent.hs b/Annex/Concurrent.hs index 4626a9294f..1ff8e0c730 100644 --- a/Annex/Concurrent.hs +++ b/Annex/Concurrent.hs @@ -90,10 +90,20 @@ enteringStage newstage a = Annex.getState Annex.workers >>= \case Nothing -> a Just tv -> do mytid <- liftIO myThreadId - let set = changeStageTo mytid tv newstage - let restore = maybe noop (void . changeStageTo mytid tv) + let set = changeStageTo mytid tv (const newstage) + let restore = maybe noop (void . changeStageTo mytid tv . const) bracket set restore (const a) +{- Transition the current thread to the initial stage. + - This is done once the thread is ready to begin work. + -} +enteringInitialStage :: Annex () +enteringInitialStage = Annex.getState Annex.workers >>= \case + Nothing -> noop + Just tv -> do + mytid <- liftIO myThreadId + void $ changeStageTo mytid tv initialStage + {- This needs to leave the WorkerPool with the same number of - idle and active threads, and with the same number of threads for each - WorkerStage. So, all it can do is swap the WorkerStage of our thread's @@ -110,14 +120,15 @@ enteringStage newstage a = Annex.getState Annex.workers >>= \case - in the pool than spareVals. That does not prevent other threads that call - this from using them though, so it's fine. -} -changeStageTo :: ThreadId -> TMVar (WorkerPool AnnexState) -> WorkerStage -> Annex (Maybe WorkerStage) -changeStageTo mytid tv newstage = liftIO $ +changeStageTo :: ThreadId -> TMVar (WorkerPool AnnexState) -> (UsedStages -> WorkerStage) -> Annex (Maybe WorkerStage) +changeStageTo mytid tv getnewstage = liftIO $ replaceidle >>= maybe (return Nothing) (either waitidle (return . Just)) where replaceidle = atomically $ do pool <- takeTMVar tv + let newstage = getnewstage (usedStages pool) let notchanging = do putTMVar tv pool return Nothing @@ -128,7 +139,7 @@ changeStageTo mytid tv newstage = liftIO $ Nothing -> do putTMVar tv $ addWorkerPool (IdleWorker oldstage) pool' - return $ Just $ Left (myaid, oldstage) + return $ Just $ Left (myaid, newstage, oldstage) Just pool'' -> do -- optimisation putTMVar tv $ @@ -139,27 +150,26 @@ changeStageTo mytid tv newstage = liftIO $ _ -> notchanging else notchanging - waitidle (myaid, oldstage) = atomically $ do + waitidle (myaid, newstage, oldstage) = atomically $ do pool <- waitIdleWorkerSlot newstage =<< takeTMVar tv putTMVar tv $ addWorkerPool (ActiveWorker myaid newstage) pool return (Just oldstage) --- | Waits until there's an idle worker in the worker pool --- for its initial stage, removes it from the pool, and returns its state. +-- | Waits until there's an idle StartStage worker in the worker pool, +-- removes it from the pool, and returns its state. -- -- If the worker pool is not already allocated, returns Nothing. -waitInitialWorkerSlot :: TMVar (WorkerPool Annex.AnnexState) -> STM (Maybe (Annex.AnnexState, WorkerStage)) -waitInitialWorkerSlot tv = do +waitStartWorkerSlot :: TMVar (WorkerPool Annex.AnnexState) -> STM (Maybe (Annex.AnnexState, WorkerStage)) +waitStartWorkerSlot tv = do pool <- takeTMVar tv - let stage = initialStage (usedStages pool) - st <- go stage pool - return $ Just (st, stage) + st <- go pool + return $ Just (st, StartStage) where - go wantstage pool = case spareVals pool of + go pool = case spareVals pool of [] -> retry (v:vs) -> do let pool' = pool { spareVals = vs } - putTMVar tv =<< waitIdleWorkerSlot wantstage pool' + putTMVar tv =<< waitIdleWorkerSlot StartStage pool' return v waitIdleWorkerSlot :: WorkerStage -> WorkerPool Annex.AnnexState -> STM (WorkerPool Annex.AnnexState) diff --git a/Annex/Content.hs b/Annex/Content.hs index 3b41784a5e..43fc3238c6 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -776,7 +776,7 @@ downloadUrl k p urls file = -- download command is used. meteredFile file (Just p) k $ Url.withUrlOptions $ \uo -> - liftIO $ anyM (\u -> Url.download p u file uo) urls + anyM (\u -> Url.download p u file uo) urls {- Copies a key's content, when present, to a temp file. - This is used to speed up some rsyncs. -} diff --git a/Annex/Init.hs b/Annex/Init.hs index 1bc82710f4..a762bf690c 100644 --- a/Annex/Init.hs +++ b/Annex/Init.hs @@ -108,9 +108,9 @@ initialize' mversion = checkCanInitialize $ do unlessM (isJust <$> getVersion) $ setVersion (fromMaybe defaultVersion mversion) configureSmudgeFilter - showSideAction "scanning for unlocked files" - scanUnlockedFiles unlessM isBareRepo $ do + showSideAction "scanning for unlocked files" + scanUnlockedFiles hookWrite postCheckoutHook hookWrite postMergeHook AdjustedBranch.checkAdjustedClone >>= \case diff --git a/Annex/Link.hs b/Annex/Link.hs index 62a5635de4..00c2d68d9e 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -192,12 +192,13 @@ restagePointerFile (Restage True) f orig = withTSDelta $ \tsd -> do -- on all still-unmodified files, using a copy of the index file, -- to bypass the lock. Then replace the old index file with the new -- updated index file. + runner :: Git.Queue.InternalActionRunner Annex runner = Git.Queue.InternalActionRunner "restagePointerFile" $ \r l -> do - realindex <- Git.Index.currentIndexFile r + realindex <- liftIO $ Git.Index.currentIndexFile r let lock = Git.Index.indexFileLock realindex - lockindex = catchMaybeIO $ Git.LockFile.openLock' lock - unlockindex = maybe noop Git.LockFile.closeLock - showwarning = warningIO $ unableToRestage Nothing + lockindex = liftIO $ catchMaybeIO $ Git.LockFile.openLock' lock + unlockindex = liftIO . maybe noop Git.LockFile.closeLock + showwarning = warning $ unableToRestage Nothing go Nothing = showwarning go (Just _) = withTmpDirIn (Git.localGitDir r) "annexindex" $ \tmpdir -> do let tmpindex = tmpdir "index" @@ -216,7 +217,7 @@ restagePointerFile (Restage True) f orig = withTSDelta $ \tsd -> do let replaceindex = catchBoolIO $ do moveFile tmpindex realindex return True - ok <- createLinkOrCopy realindex tmpindex + ok <- liftIO $ createLinkOrCopy realindex tmpindex <&&> updatetmpindex <&&> replaceindex unless ok showwarning diff --git a/Annex/Queue.hs b/Annex/Queue.hs index 0e98897800..5bbe04dbc5 100644 --- a/Annex/Queue.hs +++ b/Annex/Queue.hs @@ -28,24 +28,24 @@ import qualified Git.UpdateIndex addCommand :: String -> [CommandParam] -> [FilePath] -> Annex () addCommand command params files = do q <- get - store <=< flushWhenFull <=< inRepo $ - Git.Queue.addCommand command params files q + store =<< flushWhenFull =<< + (Git.Queue.addCommand command params files q =<< gitRepo) -addInternalAction :: Git.Queue.InternalActionRunner -> [(FilePath, IO Bool)] -> Annex () +addInternalAction :: Git.Queue.InternalActionRunner Annex -> [(FilePath, IO Bool)] -> Annex () addInternalAction runner files = do q <- get - store <=< flushWhenFull <=< inRepo $ - Git.Queue.addInternalAction runner files q + store =<< flushWhenFull =<< + (Git.Queue.addInternalAction runner files q =<< gitRepo) {- Adds an update-index stream to the queue. -} addUpdateIndex :: Git.UpdateIndex.Streamer -> Annex () addUpdateIndex streamer = do q <- get - store <=< flushWhenFull <=< inRepo $ - Git.Queue.addUpdateIndex streamer q + store =<< flushWhenFull =<< + (Git.Queue.addUpdateIndex streamer q =<< gitRepo) {- Runs the queue if it is full. -} -flushWhenFull :: Git.Queue.Queue -> Annex Git.Queue.Queue +flushWhenFull :: Git.Queue.Queue Annex -> Annex (Git.Queue.Queue Annex) flushWhenFull q | Git.Queue.full q = flush' q | otherwise = return q @@ -64,25 +64,25 @@ flush = do - But, flushing two queues at the same time could lead to failures due to - git locking files. So, only one queue is allowed to flush at a time. -} -flush' :: Git.Queue.Queue -> Annex Git.Queue.Queue +flush' :: Git.Queue.Queue Annex -> Annex (Git.Queue.Queue Annex) flush' q = withExclusiveLock gitAnnexGitQueueLock $ do showStoringStateAction - inRepo $ Git.Queue.flush q + Git.Queue.flush q =<< gitRepo {- Gets the size of the queue. -} size :: Annex Int size = Git.Queue.size <$> get -get :: Annex Git.Queue.Queue +get :: Annex (Git.Queue.Queue Annex) get = maybe new return =<< getState repoqueue -new :: Annex Git.Queue.Queue +new :: Annex (Git.Queue.Queue Annex) new = do q <- Git.Queue.new . annexQueueSize <$> getGitConfig store q return q -store :: Git.Queue.Queue -> Annex () +store :: Git.Queue.Queue Annex -> Annex () store q = changeState $ \s -> s { repoqueue = Just q } mergeFrom :: AnnexState -> Annex () diff --git a/Annex/RemoteTrackingBranch.hs b/Annex/RemoteTrackingBranch.hs index 02f0bb01b4..ade303e02d 100644 --- a/Annex/RemoteTrackingBranch.hs +++ b/Annex/RemoteTrackingBranch.hs @@ -17,6 +17,7 @@ module Annex.RemoteTrackingBranch import Annex.Common import Annex.CatFile +import qualified Annex import Git.Types import qualified Git.Ref import qualified Git.Branch @@ -72,9 +73,10 @@ makeRemoteTrackingBranchMergeCommit tb commitsha = _ -> return commitsha makeRemoteTrackingBranchMergeCommit' :: Sha -> Sha -> Sha -> Annex Sha -makeRemoteTrackingBranchMergeCommit' commitsha importedhistory treesha = +makeRemoteTrackingBranchMergeCommit' commitsha importedhistory treesha = do + cmode <- annexCommitMode <$> Annex.getGitConfig inRepo $ Git.Branch.commitTree - Git.Branch.AutomaticCommit + cmode "remote tracking branch" [commitsha, importedhistory] treesha diff --git a/Annex/SpecialRemote.hs b/Annex/SpecialRemote.hs index 828eb6e775..37e0e2129a 100644 --- a/Annex/SpecialRemote.hs +++ b/Annex/SpecialRemote.hs @@ -34,21 +34,9 @@ findExisting name = do t <- trustMap headMaybe . sortBy (comparing $ \(u, _, _) -> Down $ M.lookup u t) - . findByName name + . findByRemoteConfig (\c -> lookupName c == Just name) <$> Logs.Remote.readRemoteLog -findByName :: RemoteName -> M.Map UUID RemoteConfig -> [(UUID, RemoteConfig, Maybe (ConfigFrom UUID))] -findByName n = map sameasuuid . filter (matching . snd) . M.toList - where - matching c = case lookupName c of - Nothing -> False - Just n' - | n' == n -> True - | otherwise -> False - sameasuuid (u, c) = case M.lookup sameasUUIDField c of - Nothing -> (u, c, Nothing) - Just u' -> (toUUID u', c, Just (ConfigFrom u)) - newConfig :: RemoteName -> Maybe (Sameas UUID) diff --git a/Annex/SpecialRemote/Config.hs b/Annex/SpecialRemote/Config.hs index 73688569c6..e09ae8ecc7 100644 --- a/Annex/SpecialRemote/Config.hs +++ b/Annex/SpecialRemote/Config.hs @@ -101,3 +101,11 @@ removeSameasInherited :: RemoteConfig -> RemoteConfig removeSameasInherited c = case M.lookup sameasUUIDField c of Nothing -> c Just _ -> M.withoutKeys c sameasInherits + +{- Finds remote uuids with matching RemoteConfig. -} +findByRemoteConfig :: (RemoteConfig -> Bool) -> M.Map UUID RemoteConfig -> [(UUID, RemoteConfig, Maybe (ConfigFrom UUID))] +findByRemoteConfig matching = map sameasuuid . filter (matching . snd) . M.toList + where + sameasuuid (u, c) = case M.lookup sameasUUIDField c of + Nothing -> (u, c, Nothing) + Just u' -> (toUUID u', c, Just (ConfigFrom u)) diff --git a/Annex/Url.hs b/Annex/Url.hs index b1f970a6a5..bcc6a747f5 100644 --- a/Annex/Url.hs +++ b/Annex/Url.hs @@ -1,24 +1,39 @@ {- Url downloading, with git-annex user agent and configured http - headers, security restrictions, etc. - - - Copyright 2013-2018 Joey Hess + - Copyright 2013-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} module Annex.Url ( - module U, withUrlOptions, getUrlOptions, getUserAgent, ipAddressesUnlimited, + checkBoth, + download, + exists, + getUrlInfo, + U.downloadQuiet, + U.URLString, + U.UrlOptions(..), + U.UrlInfo(..), + U.sinkResponseFile, + U.matchStatusCodeException, + U.downloadConduit, + U.downloadPartial, + U.parseURIRelaxed, + U.allowedScheme, + U.assumeUrlExists, ) where import Annex.Common import qualified Annex -import Utility.Url as U +import qualified Utility.Url as U import Utility.IPAddress import Utility.HttpManagerRestricted +import Utility.Metered import qualified BuildInfo import Network.Socket @@ -43,7 +58,7 @@ getUrlOptions = Annex.getState Annex.urloptions >>= \case where mk = do (urldownloader, manager) <- checkallowedaddr - mkUrlOptions + U.mkUrlOptions <$> (Just <$> getUserAgent) <*> headers <*> pure urldownloader @@ -108,3 +123,27 @@ ipAddressesUnlimited = withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a withUrlOptions a = a =<< getUrlOptions + +checkBoth :: U.URLString -> Maybe Integer -> U.UrlOptions -> Annex Bool +checkBoth url expected_size uo = + liftIO (U.checkBoth url expected_size uo) >>= \case + Right r -> return r + Left err -> warning err >> return False + +download :: MeterUpdate -> U.URLString -> FilePath -> U.UrlOptions -> Annex Bool +download meterupdate url file uo = + liftIO (U.download meterupdate url file uo) >>= \case + Right () -> return True + Left err -> warning err >> return False + +exists :: U.URLString -> U.UrlOptions -> Annex Bool +exists url uo = liftIO (U.exists url uo) >>= \case + Right b -> return b + Left err -> warning err >> return False + +getUrlInfo :: U.URLString -> U.UrlOptions -> Annex U.UrlInfo +getUrlInfo url uo = liftIO (U.getUrlInfo url uo) >>= \case + Right i -> return i + Left err -> do + warning err + return $ U.UrlInfo False Nothing Nothing diff --git a/Annex/View.hs b/Annex/View.hs index a136b6b842..412cca8e0e 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -12,6 +12,7 @@ import Annex.View.ViewedFile import Types.View import Types.MetaData import Annex.MetaData +import qualified Annex import qualified Git import qualified Git.DiffTree as DiffTree import qualified Git.Branch @@ -418,7 +419,8 @@ withViewIndex a = do genViewBranch :: View -> Annex Git.Branch genViewBranch view = withViewIndex $ do let branch = branchView view - void $ inRepo $ Git.Branch.commit Git.Branch.AutomaticCommit True (fromRef branch) branch [] + cmode <- annexCommitMode <$> Annex.getGitConfig + void $ inRepo $ Git.Branch.commit cmode True (fromRef branch) branch [] return branch withCurrentView :: (View -> Annex a) -> Annex a diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs index 5371101b49..8d797ccfe6 100644 --- a/Annex/WorkTree.hs +++ b/Annex/WorkTree.hs @@ -21,6 +21,7 @@ import qualified Git.LsTree import qualified Git.Types import qualified Database.Keys import qualified Database.Keys.SQL +import Config {- Looks up the key corresponding to an annexed file in the work tree, - by examining what the file links to. @@ -74,7 +75,7 @@ ifAnnexed file yes no = maybe no yes =<< lookupFile file - as-is. -} scanUnlockedFiles :: Annex () -scanUnlockedFiles = whenM (inRepo Git.Ref.headExists) $ do +scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $ do Database.Keys.runWriter $ liftIO . Database.Keys.SQL.dropAllAssociatedFiles (l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive Git.Ref.headRef diff --git a/Annex/YoutubeDl.hs b/Annex/YoutubeDl.hs index 9855a391a1..64ca3fbf42 100644 --- a/Annex/YoutubeDl.hs +++ b/Annex/YoutubeDl.hs @@ -18,7 +18,6 @@ import Annex.Common import qualified Annex import Annex.Content import Annex.Url -import Utility.Url (URLString) import Utility.DiskFree import Utility.HtmlDetect import Utility.Process.Transcript diff --git a/Assistant/MakeRepo.hs b/Assistant/MakeRepo.hs index 372f216630..67e83ef5cd 100644 --- a/Assistant/MakeRepo.hs +++ b/Assistant/MakeRepo.hs @@ -53,8 +53,9 @@ initRepo True primary_assistant_repo dir desc mgroup = inDir dir $ do initRepo' desc mgroup {- Initialize the master branch, so things that expect - to have it will work, before any files are added. -} - unlessM (Git.Config.isBare <$> gitRepo) $ - void $ inRepo $ Git.Branch.commitCommand Git.Branch.AutomaticCommit + unlessM (Git.Config.isBare <$> gitRepo) $ do + cmode <- annexCommitMode <$> Annex.getGitConfig + void $ inRepo $ Git.Branch.commitCommand cmode [ Param "--quiet" , Param "--allow-empty" , Param "-m" diff --git a/Assistant/Restart.hs b/Assistant/Restart.hs index 1660c1317d..ef8477ead1 100644 --- a/Assistant/Restart.hs +++ b/Assistant/Restart.hs @@ -95,7 +95,9 @@ newAssistantUrl repo = do - warp-tls listens to http, in order to show an error page, so this works. -} assistantListening :: URLString -> IO Bool -assistantListening url = catchBoolIO $ exists url' =<< defUrlOptions +assistantListening url = catchBoolIO $ do + uo <- defUrlOptions + (== Right True) <$> exists url' uo where url' = case parseURI url of Nothing -> url diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index a81f21f65c..09fac0b311 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -36,7 +36,6 @@ import qualified Annex import Utility.InodeCache import qualified Database.Keys import qualified Command.Sync -import qualified Git.Branch import Utility.Tuple import Utility.Metered @@ -231,7 +230,8 @@ commitStaged msg = do case v of Left _ -> return False Right _ -> do - ok <- Command.Sync.commitStaged Git.Branch.AutomaticCommit msg + cmode <- annexCommitMode <$> Annex.getGitConfig + ok <- Command.Sync.commitStaged cmode msg when ok $ Command.Sync.updateBranches =<< getCurrentBranch return ok diff --git a/Assistant/Threads/Merger.hs b/Assistant/Threads/Merger.hs index d1f29e7845..f2284b6055 100644 --- a/Assistant/Threads/Merger.hs +++ b/Assistant/Threads/Merger.hs @@ -14,6 +14,7 @@ import Assistant.Sync import Utility.DirWatcher import Utility.DirWatcher.Types import Annex.CurrentBranch +import qualified Annex import qualified Annex.Branch import qualified Git import qualified Git.Branch @@ -80,11 +81,13 @@ onChange file [ "merging", Git.fromRef changedbranch , "into", Git.fromRef b ] - void $ liftAnnex $ Command.Sync.merge - currbranch Command.Sync.mergeConfig - def - Git.Branch.AutomaticCommit - changedbranch + void $ liftAnnex $ do + cmode <- annexCommitMode <$> Annex.getGitConfig + Command.Sync.merge + currbranch Command.Sync.mergeConfig + def + cmode + changedbranch mergecurrent' _ = noop {- Is the first branch a synced branch or remote tracking branch related diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index cef02f0b20..67c986301b 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -183,7 +183,7 @@ runHandler :: Handler -> FilePath -> Maybe FileStatus -> Assistant () runHandler handler file filestatus = void $ do r <- tryIO <~> handler (normalize file) filestatus case r of - Left e -> liftIO $ warningIO $ show e + Left e -> liftAnnex $ warning $ show e Right Nothing -> noop Right (Just change) -> recordChange change where diff --git a/Assistant/Upgrade.hs b/Assistant/Upgrade.hs index a8920bb9c5..e46ac86ced 100644 --- a/Assistant/Upgrade.hs +++ b/Assistant/Upgrade.hs @@ -40,9 +40,10 @@ import Utility.Metered import qualified Utility.Lsof as Lsof import qualified BuildInfo import qualified Utility.Url as Url -import qualified Annex.Url as Url +import qualified Annex.Url as Url hiding (download) import Utility.Tuple +import Data.Either import qualified Data.Map as M {- Upgrade without interaction in the webapp. -} @@ -323,8 +324,8 @@ downloadDistributionInfo = do liftIO $ withTmpDir "git-annex.tmp" $ \tmpdir -> do let infof = tmpdir "info" let sigf = infof ++ ".sig" - ifM (Url.download nullMeterUpdate distributionInfoUrl infof uo - <&&> Url.download nullMeterUpdate distributionInfoSigUrl sigf uo + ifM (isRight <$> Url.download nullMeterUpdate distributionInfoUrl infof uo + <&&> (isRight <$> Url.download nullMeterUpdate distributionInfoSigUrl sigf uo) <&&> verifyDistributionSig gpgcmd sigf) ( parseInfoFile <$> readFileStrict infof , return Nothing diff --git a/Assistant/WebApp/Configurators/IA.hs b/Assistant/WebApp/Configurators/IA.hs index 84d609761e..04feb965b6 100644 --- a/Assistant/WebApp/Configurators/IA.hs +++ b/Assistant/WebApp/Configurators/IA.hs @@ -192,7 +192,7 @@ escapeHeader = escapeURIString (\c -> isUnescapedInURI c && c /= ' ') getRepoInfo :: RemoteConfig -> Widget getRepoInfo c = do uo <- liftAnnex Url.getUrlOptions - exists <- liftIO $ catchDefaultIO False $ Url.exists url uo + exists <- liftAnnex $ catchDefaultIO False $ Url.exists url uo [whamlet| Internet Archive item diff --git a/Build/DistributionUpdate.hs b/Build/DistributionUpdate.hs index 976477ce43..0efe02fb68 100644 --- a/Build/DistributionUpdate.hs +++ b/Build/DistributionUpdate.hs @@ -26,6 +26,7 @@ import Git.Command import Data.Time.Clock import Data.Char +import Data.Either import System.Posix.Directory -- git-annex distribution signing key (for Joey Hess) @@ -86,7 +87,7 @@ getbuild repodir (url, f) = do putStrLn $ "*** " ++ s return Nothing uo <- defUrlOptions - ifM (download nullMeterUpdate url tmp uo) + ifM (isRight <$> download nullMeterUpdate url tmp uo) ( ifM (liftIO $ virusFree tmp) ( do bv2 <- getbv diff --git a/Build/OSXMkLibs.hs b/Build/OSXMkLibs.hs index 17af6592b4..8a11c88ee5 100644 --- a/Build/OSXMkLibs.hs +++ b/Build/OSXMkLibs.hs @@ -50,8 +50,12 @@ installLibs appbase replacement_libs libmap = do let symdest = appbase shortlib -- This is a hack; libraries need to be in the same -- directory as the program, so also link them into the - -- extra directory. - let symdestextra = appbase "extra" shortlib + -- extra and git-core directories so programs in those will + -- find them. + let symdestextra = + [ appbase "extra" shortlib + , appbase "git-core" shortlib + ] ifM (doesFileExist dest) ( return Nothing , do @@ -59,9 +63,11 @@ installLibs appbase replacement_libs libmap = do putStrLn $ "installing " ++ pathlib ++ " as " ++ shortlib unlessM (boolSystem "cp" [File pathlib, File dest] <&&> boolSystem "chmod" [Param "644", File dest] - <&&> boolSystem "ln" [Param "-s", File fulllib, File symdest] - <&&> boolSystem "ln" [Param "-s", File (".." fulllib), File symdestextra]) $ + <&&> boolSystem "ln" [Param "-s", File fulllib, File symdest]) $ error "library install failed" + forM_ symdestextra $ \d -> + unlessM (boolSystem "ln" [Param "-s", File (".." fulllib), File d]) $ + error "library linking failed" return $ Just appbase ) return (catMaybes libs, replacement_libs', libmap') diff --git a/Build/Standalone.hs b/Build/Standalone.hs index 6f1dab3690..88cc69facb 100644 --- a/Build/Standalone.hs +++ b/Build/Standalone.hs @@ -56,16 +56,26 @@ installGitLibs topdir = do if issymlink then do -- many git-core files may symlink to eg - -- ../../git. The link targets are put - -- into a subdirectory so all links to - -- .../git get the same binary. + -- ../../bin/git, which is located outside + -- the git-core directory. The target of + -- such links is installed into the progDir + -- (if not already there), and the links + -- repointed to it. + -- + -- Other git-core files symlink to a file + -- beside them in the directory. Those + -- links can be copied as-is. linktarget <- readSymbolicLink f - let linktarget' = gitcoredestdir "bin" takeFileName linktarget - createDirectoryIfMissing True (takeDirectory linktarget') - L.readFile f >>= L.writeFile linktarget' - nukeFile destf - rellinktarget <- relPathDirToFile (takeDirectory destf) linktarget' - createSymbolicLink rellinktarget destf + if takeFileName linktarget == linktarget + then cp f destf + else do + let linktarget' = progDir topdir takeFileName linktarget + unlessM (doesFileExist linktarget') $ do + createDirectoryIfMissing True (takeDirectory linktarget') + L.readFile f >>= L.writeFile linktarget' + nukeFile destf + rellinktarget <- relPathDirToFile (takeDirectory destf) linktarget' + createSymbolicLink rellinktarget destf else cp f destf -- install git's template files diff --git a/CHANGELOG b/CHANGELOG index f0d80785b2..a43a77fec5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,11 +13,42 @@ git-annex (8.20191107) UNRELEASED; urgency=medium Microbenchmarks show around 10-25% speedup of sqlite database operations. * Improved serialization of filenames and keys to the sqlite databases, avoiding encoding problems. - * Windows: Fix handling of changes to time zone. (Used to work but was - broken in version 7.20181031.) -- Joey Hess Tue, 29 Oct 2019 15:13:03 -0400 +git-annex (7.20191115) UNRELEASED; urgency=medium + + * Stop displaying rsync progress, and use git-annex's own progress display + for local-to-local repo transfers. + * git-lfs: The url provided to initremote/enableremote will now be + stored in the git-annex branch, allowing enableremote to be used without + an url. initremote --sameas can be used to add additional urls. + * git-lfs: When there's a git remote with an url that's known to be + used for git-lfs, automatically enable the special remote. + * sync, assistant: Pull and push from git-lfs remotes. + * Fix bug that made bare repos be treated as non-bare when --git-dir + was used. + * benchmark: Changed --databases to take a parameter specifiying the size + of the database to benchmark. + * benchmark --databases: Display size of the populated database. + * benchmark --databases: Improve the "addAssociatedFile to (new)" + benchmark to really add new values, not overwriting old values. + + -- Joey Hess Fri, 15 Nov 2019 11:57:19 -0400 + +git-annex (7.20191114) upstream; urgency=medium + + * Added annex.allowsign option. + * Make --json-error-messages capture more errors, + particularly url download errors. + * Fix a crash (STM deadlock) when -J is used with multiple files + that point to the same key. + * linuxstandalone: Fix a regression that broke git-remote-https. + * OSX git-annex.app: Fix a problem that prevented using the bundled + git-remote-https, git-remote-http, and git-shell. + + -- Joey Hess Thu, 14 Nov 2019 21:57:59 -0400 + git-annex (7.20191106) upstream; urgency=medium * init: Fix bug that lost modifications to unlocked files when init is diff --git a/CmdLine/Action.hs b/CmdLine/Action.hs index 87298a95ff..67a7618e4c 100644 --- a/CmdLine/Action.hs +++ b/CmdLine/Action.hs @@ -63,7 +63,7 @@ commandAction start = Annex.getState Annex.concurrency >>= \case runconcurrent = Annex.getState Annex.workers >>= \case Nothing -> runnonconcurrent Just tv -> - liftIO (atomically (waitInitialWorkerSlot tv)) >>= + liftIO (atomically (waitStartWorkerSlot tv)) >>= maybe runnonconcurrent (runconcurrent' tv) runconcurrent' tv (workerst, workerstage) = do aid <- liftIO $ async $ snd <$> Annex.run workerst @@ -99,12 +99,13 @@ commandAction start = Annex.getState Annex.concurrency >>= \case case mkActionItem startmsg' of OnlyActionOn k' _ | k' /= k -> concurrentjob' workerst startmsg' perform' - _ -> mkjob workerst startmsg' perform' + _ -> beginjob workerst startmsg' perform' Nothing -> noop - _ -> mkjob workerst startmsg perform + _ -> beginjob workerst startmsg perform - mkjob workerst startmsg perform = - inOwnConsoleRegion (Annex.output workerst) $ + beginjob workerst startmsg perform = + inOwnConsoleRegion (Annex.output workerst) $ do + enteringInitialStage void $ accountCommandAction startmsg $ performconcurrent startmsg perform diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 2c363148ad..aafa764919 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -197,8 +197,7 @@ startWeb o urlstring = go $ fromMaybe bad $ parseURI urlstring pathmax <- liftIO $ fileNameLengthLimit "." urlinfo <- if relaxedOption (downloadOptions o) then pure Url.assumeUrlExists - else Url.withUrlOptions $ - liftIO . Url.getUrlInfo urlstring + else Url.withUrlOptions $ Url.getUrlInfo urlstring file <- adjustFile o <$> case fileOption (downloadOptions o) of Just f -> pure f Nothing -> case Url.urlSuggestedFile urlinfo of diff --git a/Command/Benchmark.hs b/Command/Benchmark.hs index 7ecbf338d4..0395383ff4 100644 --- a/Command/Benchmark.hs +++ b/Command/Benchmark.hs @@ -26,7 +26,7 @@ cmd generator = command "benchmark" SectionTesting data BenchmarkOptions = BenchmarkOptions CmdParams CriterionMode - | BenchmarkDatabases CriterionMode + | BenchmarkDatabases CriterionMode Integer optParser :: CmdParamsDesc -> Parser BenchmarkOptions optParser desc = benchmarkoptions <|> benchmarkdatabases @@ -36,10 +36,11 @@ optParser desc = benchmarkoptions <|> benchmarkdatabases <*> criterionopts benchmarkdatabases = BenchmarkDatabases <$> criterionopts - <* flag' () - ( long "databases" + <*> option auto + ( long "databases" + <> metavar paramNumber <> help "benchmark sqlite databases" - ) + ) #ifdef WITH_BENCHMARK criterionopts = parseWith defaultConfig #else @@ -51,7 +52,7 @@ seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek seek generator (BenchmarkOptions ps mode) = do runner <- generator ps liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ] -seek _ (BenchmarkDatabases mode) = benchmarkDbs mode +seek _ (BenchmarkDatabases mode n) = benchmarkDbs mode n #else seek _ _ = giveup "git-annex is not built with benchmarking support" #endif diff --git a/Command/Import.hs b/Command/Import.hs index 0a77642789..0488ef4cb7 100644 --- a/Command/Import.hs +++ b/Command/Import.hs @@ -30,7 +30,6 @@ import Utility.InodeCache import Logs.Location import Git.FilePath import Git.Types -import Git.Branch import Types.Import import Utility.Metered @@ -40,7 +39,7 @@ cmd :: Command cmd = notBareRepo $ withGlobalOptions [jobsOption, jsonOptions, fileMatchingOptions] $ command "import" SectionCommon - "import files from elsewhere into the repository" + "add a tree of files to the repository" (paramPaths ++ "|BRANCH[:SUBDIR]") (seek <$$> optParser) @@ -266,7 +265,8 @@ seekRemote remote branch msubdir = do Nothing -> giveup $ "Unable to find base tree for branch " ++ fromRef branch trackingcommit <- fromtrackingbranch Git.Ref.sha - let importcommitconfig = ImportCommitConfig trackingcommit AutomaticCommit importmessage + cmode <- annexCommitMode <$> Annex.getGitConfig + let importcommitconfig = ImportCommitConfig trackingcommit cmode importmessage let commitimport = commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig importabletvar <- liftIO $ newTVarIO Nothing diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index 6eda7b84ba..2eca658649 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -146,13 +146,12 @@ findDownloads u f = catMaybes $ map mk (feedItems f) downloadFeed :: URLString -> Annex (Maybe String) downloadFeed url | Url.parseURIRelaxed url == Nothing = giveup "invalid feed url" - | otherwise = Url.withUrlOptions $ \uo -> - liftIO $ withTmpFile "feed" $ \f h -> do - hClose h - ifM (Url.download nullMeterUpdate url f uo) - ( Just <$> readFileStrict f - , return Nothing - ) + | otherwise = withTmpFile "feed" $ \f h -> do + liftIO $ hClose h + ifM (Url.withUrlOptions $ Url.download nullMeterUpdate url f) + ( Just <$> liftIO (readFileStrict f) + , return Nothing + ) performDownload :: ImportFeedOptions -> Cache -> ToDownload -> Annex Bool performDownload opts cache todownload = case location todownload of @@ -164,7 +163,7 @@ performDownload opts cache todownload = case location todownload of urlinfo <- if relaxedOption (downloadOptions opts) then pure Url.assumeUrlExists else Url.withUrlOptions $ - liftIO . Url.getUrlInfo url + Url.getUrlInfo url let dlopts = (downloadOptions opts) -- force using the filename -- chosen here diff --git a/Database/Benchmark.hs b/Database/Benchmark.hs index d102c3866e..25508f09f7 100644 --- a/Database/Benchmark.hs +++ b/Database/Benchmark.hs @@ -19,6 +19,7 @@ import Database.Init import Utility.Tmp.Dir import Git.FilePath import Types.Key +import Utility.DataUnits import Criterion.Main import Control.Monad.IO.Class (liftIO) @@ -26,17 +27,12 @@ import qualified Data.ByteString.Char8 as B8 import System.Random #endif -benchmarkDbs :: CriterionMode -> Annex () +benchmarkDbs :: CriterionMode -> Integer -> Annex () #ifdef WITH_BENCHMARK -benchmarkDbs mode = withTmpDirIn "." "benchmark" $ \tmpdir -> do - -- benchmark different sizes of databases - dbs <- mapM (benchDb tmpdir) - [ 1000 - , 10000 - -- , 100000 - ] +benchmarkDbs mode n = withTmpDirIn "." "benchmark" $ \tmpdir -> do + db <- benchDb tmpdir n liftIO $ runMode mode - [ bgroup "keys database" $ flip concatMap dbs $ \db -> + [ bgroup "keys database" [ getAssociatedFilesHitBench db , getAssociatedFilesMissBench db , getAssociatedKeyHitBench db @@ -78,22 +74,22 @@ addAssociatedFileOldBench (BenchDb h num) = bench ("addAssociatedFile to " ++ sh addAssociatedFileNewBench :: BenchDb -> Benchmark addAssociatedFileNewBench (BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do n <- getStdRandom (randomR (1,num)) - SQL.addAssociatedFile (keyN n) (fileN (n+1)) (SQL.WriteHandle h) + SQL.addAssociatedFile (keyN n) (fileN (num+n)) (SQL.WriteHandle h) H.flushDbQueue h -populateAssociatedFiles :: H.DbQueue -> Int -> IO () +populateAssociatedFiles :: H.DbQueue -> Integer -> IO () populateAssociatedFiles h num = do forM_ [1..num] $ \n -> SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h) H.flushDbQueue h -keyN :: Int -> Key +keyN :: Integer -> Key keyN n = stubKey { keyName = B8.pack $ "key" ++ show n , keyVariety = OtherKey "BENCH" } -fileN :: Int -> TopFilePath +fileN :: Integer -> TopFilePath fileN n = asTopFilePath ("file" ++ show n) keyMiss :: Key @@ -102,14 +98,17 @@ keyMiss = keyN 0 -- 0 is never stored fileMiss :: TopFilePath fileMiss = fileN 0 -- 0 is never stored -data BenchDb = BenchDb H.DbQueue Int +data BenchDb = BenchDb H.DbQueue Integer -benchDb :: FilePath -> Int -> Annex BenchDb +benchDb :: FilePath -> Integer -> Annex BenchDb benchDb tmpdir num = do - liftIO $ putStrLn $ "setting up database with " ++ show num + liftIO $ putStrLn $ "setting up database with " ++ show num ++ " items" initDb db SQL.createTables h <- liftIO $ H.openDbQueue H.MultiWriter db SQL.containedTable liftIO $ populateAssociatedFiles h num + sz <- liftIO $ getFileSize db + liftIO $ putStrLn $ "size of database on disk: " ++ + roughSize storageUnits False sz return (BenchDb h num) where db = tmpdir show num "db" diff --git a/Git/Config.hs b/Git/Config.hs index f250af73d6..9ebd4bd0f5 100644 --- a/Git/Config.hs +++ b/Git/Config.hs @@ -94,6 +94,14 @@ store s repo = do , fullconfig = M.unionWith (++) c (fullconfig repo) } +{- Stores a single config setting in a Repo, returning the new version of + - the Repo. Config settings can be updated incrementally. -} +store' :: String -> String -> Repo -> Repo +store' k v repo = repo + { config = M.singleton k v `M.union` config repo + , fullconfig = M.unionWith (++) (M.singleton k [v]) (fullconfig repo) + } + {- Updates the location of a repo, based on its configuration. - - Git.Construct makes LocalUknown repos, of which only a directory is diff --git a/Git/CurrentRepo.hs b/Git/CurrentRepo.hs index f4cdf28634..f8383326a5 100644 --- a/Git/CurrentRepo.hs +++ b/Git/CurrentRepo.hs @@ -67,8 +67,12 @@ get = do configure (Just d) _ = do absd <- absPath d curr <- getCurrentDirectory - Git.Config.read $ newFrom $ + r <- Git.Config.read $ newFrom $ Local { gitdir = absd, worktree = Just curr } + return $ if Git.Config.isBare r + then r { location = (location r) { worktree = Nothing } } + else r + configure Nothing Nothing = giveup "Not in a git repository." addworktree w r = changelocation r $ diff --git a/Git/Queue.hs b/Git/Queue.hs index 175cd3f58b..eb4bbb0694 100644 --- a/Git/Queue.hs +++ b/Git/Queue.hs @@ -1,6 +1,6 @@ {- git repository command queue - - - Copyright 2010-2018 Joey Hess + - Copyright 2010-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -27,9 +27,10 @@ import Git.Command import qualified Git.UpdateIndex import qualified Data.Map.Strict as M +import Control.Monad.IO.Class {- Queable actions that can be performed in a git repository. -} -data Action +data Action m {- Updating the index file, using a list of streamers that can - be added to as the queue grows. -} = UpdateIndexAction [Git.UpdateIndex.Streamer] -- in reverse order @@ -43,21 +44,21 @@ data Action {- An internal action to run, on a list of files that can be added - to as the queue grows. -} | InternalAction - { getRunner :: InternalActionRunner + { getRunner :: InternalActionRunner m , getInternalFiles :: [(FilePath, IO Bool)] } {- The String must be unique for each internal action. -} -data InternalActionRunner = InternalActionRunner String (Repo -> [(FilePath, IO Bool)] -> IO ()) +data InternalActionRunner m = InternalActionRunner String (Repo -> [(FilePath, IO Bool)] -> m ()) -instance Eq InternalActionRunner where +instance Eq (InternalActionRunner m) where InternalActionRunner s1 _ == InternalActionRunner s2 _ = s1 == s2 {- A key that can uniquely represent an action in a Map. -} data ActionKey = UpdateIndexActionKey | CommandActionKey String | InternalActionKey String deriving (Eq, Ord) -actionKey :: Action -> ActionKey +actionKey :: Action m -> ActionKey actionKey (UpdateIndexAction _) = UpdateIndexActionKey actionKey CommandAction { getSubcommand = s } = CommandActionKey s actionKey InternalAction { getRunner = InternalActionRunner s _ } = InternalActionKey s @@ -65,10 +66,10 @@ actionKey InternalAction { getRunner = InternalActionRunner s _ } = InternalActi {- A queue of actions to perform (in any order) on a git repository, - with lists of files to perform them on. This allows coalescing - similar git commands. -} -data Queue = Queue +data Queue m = Queue { size :: Int , _limit :: Int - , items :: M.Map ActionKey Action + , items :: M.Map ActionKey (Action m) } {- A recommended maximum size for the queue, after which it should be @@ -84,7 +85,7 @@ defaultLimit :: Int defaultLimit = 10240 {- Constructor for empty queue. -} -new :: Maybe Int -> Queue +new :: Maybe Int -> Queue m new lim = Queue 0 (fromMaybe defaultLimit lim) M.empty {- Adds an git command to the queue. @@ -93,7 +94,7 @@ new lim = Queue 0 (fromMaybe defaultLimit lim) M.empty - assumed to be equivilant enough to perform in any order with the same - result. -} -addCommand :: String -> [CommandParam] -> [FilePath] -> Queue -> Repo -> IO Queue +addCommand :: MonadIO m => String -> [CommandParam] -> [FilePath] -> Queue m -> Repo -> m (Queue m) addCommand subcommand params files q repo = updateQueue action different (length files) q repo where @@ -107,7 +108,7 @@ addCommand subcommand params files q repo = different _ = True {- Adds an internal action to the queue. -} -addInternalAction :: InternalActionRunner -> [(FilePath, IO Bool)] -> Queue -> Repo -> IO Queue +addInternalAction :: MonadIO m => InternalActionRunner m -> [(FilePath, IO Bool)] -> Queue m -> Repo -> m (Queue m) addInternalAction runner files q repo = updateQueue action different (length files) q repo where @@ -120,7 +121,7 @@ addInternalAction runner files q repo = different _ = True {- Adds an update-index streamer to the queue. -} -addUpdateIndex :: Git.UpdateIndex.Streamer -> Queue -> Repo -> IO Queue +addUpdateIndex :: MonadIO m => Git.UpdateIndex.Streamer -> Queue m -> Repo -> m (Queue m) addUpdateIndex streamer q repo = updateQueue action different 1 q repo where @@ -133,7 +134,7 @@ addUpdateIndex streamer q repo = {- Updates or adds an action in the queue. If the queue already contains a - different action, it will be flushed; this is to ensure that conflicting - actions, like add and rm, are run in the right order.-} -updateQueue :: Action -> (Action -> Bool) -> Int -> Queue -> Repo -> IO Queue +updateQueue :: MonadIO m => Action m -> (Action m -> Bool) -> Int -> Queue m -> Repo -> m (Queue m) updateQueue !action different sizeincrease q repo | null (filter different (M.elems (items q))) = return $ go q | otherwise = go <$> flush q repo @@ -150,7 +151,7 @@ updateQueue !action different sizeincrease q repo {- The new value comes first. It probably has a smaller list of files than - the old value. So, the list append of the new value first is more - efficient. -} -combineNewOld :: Action -> Action -> Action +combineNewOld :: Action m -> Action m -> Action m combineNewOld (CommandAction _sc1 _ps1 fs1) (CommandAction sc2 ps2 fs2) = CommandAction sc2 ps2 (fs1++fs2) combineNewOld (UpdateIndexAction s1) (UpdateIndexAction s2) = @@ -162,18 +163,18 @@ combineNewOld anew _aold = anew {- Merges the contents of the second queue into the first. - This should only be used when the two queues are known to contain - non-conflicting actions. -} -merge :: Queue -> Queue -> Queue +merge :: Queue m -> Queue m -> Queue m merge origq newq = origq { size = size origq + size newq , items = M.unionWith combineNewOld (items newq) (items origq) } {- Is a queue large enough that it should be flushed? -} -full :: Queue -> Bool +full :: Queue m -> Bool full (Queue cur lim _) = cur >= lim {- Runs a queue on a git repository. -} -flush :: Queue -> Repo -> IO Queue +flush :: MonadIO m => Queue m -> Repo -> m (Queue m) flush (Queue _ lim m) repo = do forM_ (M.elems m) $ runAction repo return $ Queue 0 lim M.empty @@ -184,11 +185,11 @@ flush (Queue _ lim m) repo = do - - Intentionally runs the command even if the list of files is empty; - this allows queueing commands that do not need a list of files. -} -runAction :: Repo -> Action -> IO () +runAction :: MonadIO m => Repo -> Action m -> m () runAction repo (UpdateIndexAction streamers) = -- list is stored in reverse order - Git.UpdateIndex.streamUpdateIndex repo $ reverse streamers -runAction repo action@(CommandAction {}) = do + liftIO $ Git.UpdateIndex.streamUpdateIndex repo $ reverse streamers +runAction repo action@(CommandAction {}) = liftIO $ do #ifndef mingw32_HOST_OS let p = (proc "xargs" $ "-0":"git":toCommand gitparams) { env = gitEnv repo } withHandle StdinHandle createProcessSuccess p $ \h -> do diff --git a/Git/Remote.hs b/Git/Remote.hs index 9b05a86fb4..fa336013e7 100644 --- a/Git/Remote.hs +++ b/Git/Remote.hs @@ -51,6 +51,7 @@ makeLegalName s = case filter legal $ replace "/" "_" s of legal c = isAlphaNum c data RemoteLocation = RemoteUrl String | RemotePath FilePath + deriving (Eq) remoteLocationIsUrl :: RemoteLocation -> Bool remoteLocationIsUrl (RemoteUrl _) = True diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs index 0e49cb1837..b18e0334a2 100644 --- a/Remote/BitTorrent.hs +++ b/Remote/BitTorrent.hs @@ -206,7 +206,7 @@ downloadTorrentFile u = do withTmpFileIn othertmp "torrent" $ \f h -> do liftIO $ hClose h ok <- Url.withUrlOptions $ - liftIO . Url.download nullMeterUpdate u f + Url.download nullMeterUpdate u f when ok $ liftIO $ renameFile f torrent return ok diff --git a/Remote/External.hs b/Remote/External.hs index f6444e678b..cbf3e57b7a 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -716,7 +716,7 @@ checkKeyUrl :: Git.Repo -> CheckPresent checkKeyUrl r k = do showChecking r us <- getWebUrls k - anyM (\u -> withUrlOptions $ liftIO . checkBoth u (keySize k)) us + anyM (\u -> withUrlOptions $ checkBoth u (keySize k)) us getWebUrls :: Key -> Annex [URLString] getWebUrls key = filter supported <$> getUrls key diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 0c4d42cf57..ff948ba0d6 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -286,7 +286,7 @@ setupRepo gcryptid r {- Ask git-annex-shell to configure the repository as a gcrypt - repository. May fail if it is too old. -} gitannexshellsetup = Ssh.onRemote NoConsumeStdin r - (boolSystem, return False) + (\f p -> liftIO (boolSystem f p), return False) "gcryptsetup" [ Param gcryptid ] [] denyNonFastForwards = "receive.denyNonFastForwards" @@ -451,7 +451,7 @@ getGCryptId fast r gc | Git.repoIsLocal r || Git.repoIsLocalUnknown r = extract <$> liftIO (catchMaybeIO $ Git.Config.read r) | not fast = extract . liftM fst <$> getM (eitherToMaybe <$>) - [ Ssh.onRemote NoConsumeStdin r (Git.Config.fromPipe r, return (Left $ error "configlist failed")) "configlist" [] [] + [ Ssh.onRemote NoConsumeStdin r (\f p -> liftIO (Git.Config.fromPipe r f p), return (Left $ error "configlist failed")) "configlist" [] [] , getConfigViaRsync r gc ] | otherwise = return (Nothing, r) diff --git a/Remote/Git.hs b/Remote/Git.hs index 7bdab21a1b..6e1b31f748 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -143,7 +143,9 @@ configRead autoinit r = do (True, _, _) | remoteAnnexCheckUUID gc -> tryGitConfigRead autoinit r | otherwise -> return r - (False, _, NoUUID) -> tryGitConfigRead autoinit r + (False, _, NoUUID) -> configSpecialGitRemotes r >>= \case + Nothing -> tryGitConfigRead autoinit r + Just r' -> return r' _ -> return r gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) @@ -231,7 +233,7 @@ repoAvail r tryGitConfigRead :: Bool -> Git.Repo -> Annex Git.Repo tryGitConfigRead autoinit r | haveconfig r = return r -- already read - | Git.repoIsSsh r = store $ do + | Git.repoIsSsh r = storeUpdatedRemote $ do v <- Ssh.onRemote NoConsumeStdin r (pipedconfig, return (Left $ giveup "configlist failed")) "configlist" [] configlistfields @@ -240,30 +242,30 @@ tryGitConfigRead autoinit r | haveconfig r' -> return r' | otherwise -> configlist_failed Left _ -> configlist_failed - | Git.repoIsHttp r = store geturlconfig + | Git.repoIsHttp r = storeUpdatedRemote geturlconfig | Git.GCrypt.isEncrypted r = handlegcrypt =<< getConfigMaybe (remoteConfig r "uuid") | Git.repoIsUrl r = return r - | otherwise = store $ liftIO $ + | otherwise = storeUpdatedRemote $ liftIO $ readlocalannexconfig `catchNonAsync` (const $ return r) where haveconfig = not . M.null . Git.config pipedconfig cmd params = do - v <- Git.Config.fromPipe r cmd params + v <- liftIO $ Git.Config.fromPipe r cmd params case v of Right (r', val) -> do unless (isUUIDConfigured r' || null val) $ do - warningIO $ "Failed to get annex.uuid configuration of repository " ++ Git.repoDescribe r - warningIO $ "Instead, got: " ++ show val - warningIO $ "This is unexpected; please check the network transport!" + warning $ "Failed to get annex.uuid configuration of repository " ++ Git.repoDescribe r + warning $ "Instead, got: " ++ show val + warning $ "This is unexpected; please check the network transport!" return $ Right r' Left l -> return $ Left l geturlconfig = Url.withUrlOptions $ \uo -> do - v <- liftIO $ withTmpFile "git-annex.tmp" $ \tmpfile h -> do - hClose h + v <- withTmpFile "git-annex.tmp" $ \tmpfile h -> do + liftIO $ hClose h let url = Git.repoLocation r ++ "/config" - ifM (Url.downloadQuiet nullMeterUpdate url tmpfile uo) + ifM (liftIO $ Url.downloadQuiet nullMeterUpdate url tmpfile uo) ( Just <$> pipedconfig "git" [Param "config", Param "--null", Param "--list", Param "--file", File tmpfile] , return Nothing ) @@ -278,18 +280,6 @@ tryGitConfigRead autoinit r set_ignore "not usable by git-annex" False return r - store = observe $ \r' -> do - l <- Annex.getGitRemotes - let rs = exchange l r' - Annex.changeState $ \s -> s { Annex.gitremotes = Just rs } - - exchange [] _ = [] - exchange (old:ls) new - | Git.remoteName old == Git.remoteName new = - new : exchange ls new - | otherwise = - old : exchange ls new - {- Is this remote just not available, or does - it not have git-annex-shell? - Find out by trying to fetch from the remote. -} @@ -319,7 +309,7 @@ tryGitConfigRead autoinit r g <- gitRepo case Git.GCrypt.remoteRepoId g (Git.remoteName r) of Nothing -> return r - Just v -> store $ liftIO $ setUUID r $ + Just v -> storeUpdatedRemote $ liftIO $ setUUID r $ genUUIDInNameSpace gCryptNameSpace v {- The local repo may not yet be initialized, so try to initialize @@ -337,6 +327,31 @@ tryGitConfigRead autoinit r then [(Fields.autoInit, "1")] else [] +{- Handles special remotes that can be enabled by the presence of + - regular git remotes. + - + - When a remote repo is found to be such a special remote, its + - UUID is cached in the git config, and the repo returned with + - the UUID set. + -} +configSpecialGitRemotes :: Git.Repo -> Annex (Maybe Git.Repo) +configSpecialGitRemotes r = Remote.GitLFS.configKnownUrl r >>= \case + Nothing -> return Nothing + Just r' -> Just <$> storeUpdatedRemote (return r') + +storeUpdatedRemote :: Annex Git.Repo -> Annex Git.Repo +storeUpdatedRemote = observe $ \r' -> do + l <- Annex.getGitRemotes + let rs = exchange l r' + Annex.changeState $ \s -> s { Annex.gitremotes = Just rs } + where + exchange [] _ = [] + exchange (old:ls) new + | Git.remoteName old == Git.remoteName new = + new : exchange ls new + | otherwise = + old : exchange ls new + {- Checks if a given remote has the content for a key in its annex. -} inAnnex :: Remote -> State -> Key -> Annex Bool inAnnex rmt st key = do @@ -352,11 +367,10 @@ inAnnex' repo rmt (State connpool duc _ _) key checkhttp = do showChecking repo gc <- Annex.getGitConfig - ifM (Url.withUrlOptions $ \uo -> liftIO $ - anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls gc repo rmt key)) - ( return True - , giveup "not found" - ) + ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls gc repo rmt key)) + ( return True + , giveup "not found" + ) checkremote = let fallback = Ssh.inAnnex repo key in P2PHelper.checkpresent (Ssh.runProto rmt connpool (cantCheck rmt) fallback) key @@ -498,8 +512,9 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _) key file dest meter Just (object, checksuccess) -> do copier <- mkCopier hardlink st params runTransfer (Transfer Download u key) - file stdRetry - (\p -> copier object dest (combineMeterUpdate p meterupdate) checksuccess) + file stdRetry $ \p -> + metered (Just (combineMeterUpdate p meterupdate)) key $ \_ p' -> + copier object dest p' checksuccess | Git.repoIsSsh repo = if forcersync then fallback meterupdate else P2PHelper.retrieve @@ -632,15 +647,15 @@ copyToRemote' repo r st@(State connpool duc _ _) key file meterupdate -- run copy from perspective of remote onLocalFast repo r $ ifM (Annex.Content.inAnnex key) ( return True - , do + , runTransfer (Transfer Download u key) file stdRetry $ \p -> do copier <- mkCopier hardlink st params let verify = Annex.Content.RemoteVerify r let rsp = RetrievalAllKeysSecure - runTransfer (Transfer Download u key) file stdRetry $ \p -> - let p' = combineMeterUpdate meterupdate p - in Annex.Content.saveState True `after` - Annex.Content.getViaTmp rsp verify key - (\dest -> copier object dest p' (liftIO checksuccessio)) + res <- Annex.Content.getViaTmp rsp verify key $ \dest -> + metered (Just (combineMeterUpdate meterupdate p)) key $ \_ p' -> + copier object dest p' (liftIO checksuccessio) + Annex.Content.saveState True + return res ) copyremotefallback p = Annex.Content.sendAnnex key noop $ \object -> do -- This is too broad really, but recvkey normally @@ -750,7 +765,7 @@ rsyncOrCopyFile st rsyncparams src dest p = dorsync = do -- dest may already exist, so make sure rsync can write to it void $ liftIO $ tryIO $ allowWrite dest - oh <- mkOutputHandler + oh <- mkOutputHandlerQuiet Ssh.rsyncHelper oh (Just p) $ rsyncparams ++ [File src, File dest] docopycow = docopywith copyCoW diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index d629788199..01f76a5a8b 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -5,7 +5,7 @@ - Licensed under the GNU AGPL version 3 or higher. -} -module Remote.GitLFS (remote, gen) where +module Remote.GitLFS (remote, gen, configKnownUrl) where import Annex.Common import Types.Remote @@ -13,9 +13,11 @@ import Annex.Url import Types.Key import Types.Creds import qualified Annex +import qualified Annex.SpecialRemote.Config import qualified Git import qualified Git.Types as Git import qualified Git.Url +import qualified Git.Remote import qualified Git.GCrypt import qualified Git.Credential as Git import Config @@ -31,8 +33,10 @@ import Crypto import Backend.Hash import Utility.Hash import Utility.SshHost +import Logs.Remote import Logs.RemoteState import qualified Utility.GitLFS as LFS +import qualified Git.Config import Control.Concurrent.STM import Data.String @@ -145,21 +149,46 @@ mySetup _ mu _ c gc = do , "likely insecure configuration.)" ] - -- The url is not stored in the remote log, because the same - -- git-lfs repo can be accessed using different urls by different - -- people (eg over ssh or http). - -- - -- Instead, set up remote.name.url to point to the repo, + -- Set up remote.name.url to point to the repo, -- (so it's also usable by git as a non-special remote), - -- and set remote.name.git-lfs = true - let c'' = M.delete "url" c' - gitConfigSpecialRemote u c'' [("git-lfs", "true")] + -- and set remote.name.annex-git-lfs = true + gitConfigSpecialRemote u c' [("git-lfs", "true")] setConfig (ConfigKey ("remote." ++ getRemoteName c ++ ".url")) url - return (c'', u) + return (c', u) where url = fromMaybe (giveup "Specify url=") (M.lookup "url" c) remotename = fromJust (lookupName c) +{- Check if a remote's url is one known to belong to a git-lfs repository. + - If so, set the necessary configuration to enable using the remote + - with git-lfs. -} +configKnownUrl :: Git.Repo -> Annex (Maybe Git.Repo) +configKnownUrl r + | Git.repoIsUrl r = do + l <- readRemoteLog + g <- Annex.gitRepo + case Annex.SpecialRemote.Config.findByRemoteConfig (match g) l of + ((u, _, mcu):[]) -> Just <$> go u mcu + _ -> return Nothing + | otherwise = return Nothing + where + match g c = fromMaybe False $ do + t <- M.lookup Annex.SpecialRemote.Config.typeField c + u <- M.lookup "url" c + let u' = Git.Remote.parseRemoteLocation u g + return $ Git.Remote.RemoteUrl (Git.repoLocation r) == u' + && t == typename remote + go u mcu = do + r' <- set "uuid" (fromUUID u) =<< set "git-lfs" "true" r + case mcu of + Just (Annex.SpecialRemote.Config.ConfigFrom cu) -> + set "config-uuid" (fromUUID cu) r' + Nothing -> return r' + set k v r' = do + let ck@(ConfigKey k') = remoteConfig r' k + setConfig ck v + return $ Git.Config.store' k' v r' + data LFSHandle = LFSHandle { downloadEndpoint :: Maybe LFS.Endpoint , uploadEndpoint :: Maybe LFS.Endpoint diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs index 47cf577218..cc17220f28 100644 --- a/Remote/Helper/Ssh.hs +++ b/Remote/Helper/Ssh.hs @@ -83,7 +83,7 @@ git_annex_shell cs r command params fields onRemote :: ConsumeStdin -> Git.Repo - -> (FilePath -> [CommandParam] -> IO a, Annex a) + -> (FilePath -> [CommandParam] -> Annex a, Annex a) -> String -> [CommandParam] -> [(Field, String)] @@ -91,7 +91,7 @@ onRemote onRemote cs r (with, errorval) command params fields = do s <- git_annex_shell cs r command params fields case s of - Just (c, ps) -> liftIO $ with c ps + Just (c, ps) -> with c ps Nothing -> errorval {- Checks if a remote contains a key. -} @@ -100,14 +100,14 @@ inAnnex r k = do showChecking r onRemote NoConsumeStdin r (runcheck, cantCheck r) "inannex" [Param $ serializeKey k] [] where - runcheck c p = dispatch =<< safeSystem c p + runcheck c p = liftIO $ dispatch =<< safeSystem c p dispatch ExitSuccess = return True dispatch (ExitFailure 1) = return False dispatch _ = cantCheck r {- Removes a key from a remote. -} dropKey :: Git.Repo -> Key -> Annex Bool -dropKey r key = onRemote NoConsumeStdin r (boolSystem, return False) "dropkey" +dropKey r key = onRemote NoConsumeStdin r (\f p -> liftIO (boolSystem f p), return False) "dropkey" [ Param "--quiet", Param "--force" , Param $ serializeKey key ] diff --git a/Remote/List.hs b/Remote/List.hs index 5f3016b257..3e7ca9fa73 100644 --- a/Remote/List.hs +++ b/Remote/List.hs @@ -128,4 +128,8 @@ updateRemote remote = do {- Checks if a remote is syncable using git. -} gitSyncableRemote :: Remote -> Bool gitSyncableRemote r = remotetype r `elem` - [ Remote.Git.remote, Remote.GCrypt.remote, Remote.P2P.remote ] + [ Remote.Git.remote + , Remote.GCrypt.remote + , Remote.P2P.remote + , Remote.GitLFS.remote + ] diff --git a/Remote/S3.hs b/Remote/S3.hs index 97a94dc853..2787e3f554 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -58,11 +58,10 @@ import Logs.Web import Logs.MetaData import Types.MetaData import Utility.Metered -import qualified Annex.Url as Url import Utility.DataUnits import Annex.Content -import Annex.Url (getUrlOptions, withUrlOptions) -import Utility.Url (checkBoth, UrlOptions(..)) +import qualified Annex.Url as Url +import Annex.Url (getUrlOptions, withUrlOptions, UrlOptions(..)) import Utility.Env type BucketName = String @@ -348,7 +347,7 @@ checkKey hv r rs c info k = withS3Handle hv $ \case Right us -> do showChecking r let check u = withUrlOptions $ - liftIO . checkBoth u (keySize k) + Url.checkBoth u (keySize k) anyM check us checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool @@ -397,7 +396,7 @@ retrieveExportS3 hv r info _k loc f p = warning $ needS3Creds (uuid r) return False Just geturl -> Url.withUrlOptions $ - liftIO . Url.download p (geturl exportloc) f + Url.download p (geturl exportloc) f exportloc = bucketExportLocation info loc removeExportS3 :: S3HandleVar -> Remote -> RemoteStateHandle -> S3Info -> Key -> ExportLocation -> Annex Bool @@ -417,8 +416,8 @@ checkPresentExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation checkPresentExportS3 hv r info k loc = withS3Handle hv $ \case Just h -> checkKeyHelper info h (Left (T.pack $ bucketExportLocation info loc)) Nothing -> case getPublicUrlMaker info of - Just geturl -> withUrlOptions $ liftIO . - checkBoth (geturl $ bucketExportLocation info loc) (keySize k) + Just geturl -> withUrlOptions $ + Url.checkBoth (geturl $ bucketExportLocation info loc) (keySize k) Nothing -> do warning $ needS3Creds (uuid r) giveup "No S3 credentials configured" diff --git a/Remote/Web.hs b/Remote/Web.hs index b3dab374e7..645495d696 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -116,9 +116,8 @@ checkKey' key us = firsthit us (Right False) $ \u -> do showChecking u' case downloader of YoutubeDownloader -> youtubeDlCheck u' - _ -> do - Url.withUrlOptions $ liftIO . catchMsgIO . - Url.checkBoth u' (keySize key) + _ -> catchMsgIO $ + Url.withUrlOptions $ Url.checkBoth u' (keySize key) where firsthit [] miss _ = return miss firsthit (u:rest) _ a = do diff --git a/RemoteDaemon/Core.hs b/RemoteDaemon/Core.hs index 871d9e4630..d31792948c 100644 --- a/RemoteDaemon/Core.hs +++ b/RemoteDaemon/Core.hs @@ -170,5 +170,5 @@ updateTransportHandle :: TransportHandle -> IO TransportHandle updateTransportHandle h@(TransportHandle _g annexstate) = do g' <- liftAnnex h $ do reloadConfig - Annex.fromRepo id + Annex.gitRepo return (TransportHandle (LocalRepo g') annexstate) diff --git a/RemoteDaemon/Transport/Tor.hs b/RemoteDaemon/Transport/Tor.hs index 79bf7e05ef..977a29112e 100644 --- a/RemoteDaemon/Transport/Tor.hs +++ b/RemoteDaemon/Transport/Tor.hs @@ -69,7 +69,7 @@ server ichan th@(TransportHandle (LocalRepo r) _) = go ) unless ok $ do hClose conn - warningIO "dropped Tor connection, too busy" + liftAnnex th $ warning "dropped Tor connection, too busy" handlecontrol servicerunning = do msg <- atomically $ readTChan ichan diff --git a/Test.hs b/Test.hs index e6d4cc9b12..115ea3bb66 100644 --- a/Test.hs +++ b/Test.hs @@ -84,6 +84,7 @@ import qualified Utility.Base64 import qualified Utility.Tmp.Dir import qualified Utility.FileSystemEncoding import qualified Utility.Aeson +import qualified Utility.CopyFile #ifndef mingw32_HOST_OS import qualified Remote.Helper.Encryptable import qualified Types.Crypto @@ -248,6 +249,7 @@ unitTests note = testGroup ("Unit Tests " ++ note) , testCase "info" test_info , testCase "version" test_version , testCase "sync" test_sync + , testCase "concurrent get of dup key regression" test_concurrent_get_of_dup_key_regression , testCase "union merge regression" test_union_merge_regression , testCase "adjusted branch merge regression" test_adjusted_branch_merge_regression , testCase "adjusted branch subtree regression" test_adjusted_branch_subtree_regression @@ -951,6 +953,31 @@ test_sync = intmpclonerepo $ do git_annex "sync" ["--content"] @? "sync failed" git_annex_expectoutput "find" ["--in", "."] [] +{- Regression test for the concurrency bug fixed in + - 667d38a8f11c1ee8f256cdbd80e225c2bae06595 -} +test_concurrent_get_of_dup_key_regression :: Assertion +test_concurrent_get_of_dup_key_regression = intmpclonerepo $ do + makedup dupfile + -- This was sufficient currency to trigger the bug. + git_annex "get" ["-J1", annexedfile, dupfile] + @? "concurrent get -J1 with dup failed" + git_annex "drop" ["-J1"] + @? "drop with dup failed" + -- With -J2, one more dup file was needed to trigger the bug. + makedup dupfile2 + git_annex "get" ["-J2", annexedfile, dupfile, dupfile2] + @? "concurrent get -J2 with dup failed" + git_annex "drop" ["-J2"] + @? "drop with dup failed" + where + dupfile = annexedfile ++ "2" + dupfile2 = annexedfile ++ "3" + makedup f = do + Utility.CopyFile.copyFileExternal Utility.CopyFile.CopyAllMetaData annexedfile f + @? "copying annexed file failed" + boolSystem "git" [Param "add", File f] + @? "git add failed" + {- Regression test for union merge bug fixed in - 0214e0fb175a608a49b812d81b4632c081f63027 -} test_union_merge_regression :: Assertion diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 36224b14c3..50aa6f2379 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -1,6 +1,6 @@ {- git-annex configuration - - - Copyright 2012-2015 Joey Hess + - Copyright 2012-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -21,6 +21,7 @@ import qualified Git.Config import qualified Git.Construct import Git.Types import Git.ConfigTypes +import Git.Branch (CommitMode(..)) import Utility.DataUnits import Config.Cost import Types.UUID @@ -105,6 +106,7 @@ data GitConfig = GitConfig , annexJobs :: Concurrency , annexCacheCreds :: Bool , annexAutoUpgradeRepository :: Bool + , annexCommitMode :: CommitMode , coreSymlinks :: Bool , coreSharedRepository :: SharedRepository , receiveDenyCurrentBranch :: DenyCurrentBranch @@ -186,6 +188,9 @@ extractGitConfig r = GitConfig parseConcurrency =<< getmaybe (annex "jobs") , annexCacheCreds = getbool (annex "cachecreds") True , annexAutoUpgradeRepository = getbool (annex "autoupgraderepository") True + , annexCommitMode = if getbool (annex "allowsign") False + then ManualCommit + else AutomaticCommit , coreSymlinks = getbool "core.symlinks" True , coreSharedRepository = getSharedRepository r , receiveDenyCurrentBranch = getDenyCurrentBranch r diff --git a/Types/WorkerPool.hs b/Types/WorkerPool.hs index 23ef345574..8a68163138 100644 --- a/Types/WorkerPool.hs +++ b/Types/WorkerPool.hs @@ -20,7 +20,14 @@ data WorkerPool t = WorkerPool -- but there can temporarily be fewer values, when a thread is -- changing between stages. } - deriving (Show) + +instance Show (WorkerPool t) where + show p = unwords + [ "WorkerPool" + , show (usedStages p) + , show (workerList p) + , show (length (spareVals p)) + ] -- | A worker can either be idle or running an Async action. -- And it is used for some stage. @@ -33,7 +40,12 @@ instance Show (Worker t) where show (ActiveWorker _ s) = "ActiveWorker " ++ show s data WorkerStage - = PerformStage + = StartStage + -- ^ All threads start in this stage, and then transition away from + -- it to the initialStage when they begin doing work. This should + -- never be included in UsedStages, because transition from some + -- other stage back to this one could result in a deadlock. + | PerformStage -- ^ Running a CommandPerform action. | CleanupStage -- ^ Running a CommandCleanup action. @@ -95,12 +107,13 @@ workerAsync (ActiveWorker aid _) = Just aid allocateWorkerPool :: t -> Int -> UsedStages -> WorkerPool t allocateWorkerPool t n u = WorkerPool { usedStages = u - , workerList = take totalthreads $ map IdleWorker stages + , workerList = map IdleWorker $ + take totalthreads $ concat $ repeat stages , spareVals = replicate totalthreads t } where - stages = concat $ repeat $ S.toList $ stageSet u - totalthreads = n * S.size (stageSet u) + stages = StartStage : S.toList (stageSet u) + totalthreads = n * length stages addWorkerPool :: Worker t -> WorkerPool t -> WorkerPool t addWorkerPool w pool = pool { workerList = w : workerList pool } diff --git a/Utility/Android.hs b/Utility/Android.hs index f30e415d77..10ec35d37d 100644 --- a/Utility/Android.hs +++ b/Utility/Android.hs @@ -7,7 +7,9 @@ - License: BSD-2-clause -} -module Utility.Android where +module Utility.Android ( + osAndroid +) where #ifdef linux_HOST_OS import Common diff --git a/Utility/Applicative.hs b/Utility/Applicative.hs index fce3c04852..62d84bcf2b 100644 --- a/Utility/Applicative.hs +++ b/Utility/Applicative.hs @@ -5,7 +5,9 @@ - License: BSD-2-clause -} -module Utility.Applicative where +module Utility.Applicative ( + (<$$>), +) where {- Like <$> , but supports one level of currying. - diff --git a/Utility/Batch.hs b/Utility/Batch.hs index 0c6d9da755..1d66881d23 100644 --- a/Utility/Batch.hs +++ b/Utility/Batch.hs @@ -7,7 +7,14 @@ {-# LANGUAGE CPP #-} -module Utility.Batch where +module Utility.Batch ( + batch, + BatchCommandMaker, + getBatchCommandMaker, + toBatchCommand, + batchCommand, + batchCommandEnv, +) where import Common diff --git a/Utility/DBus.hs b/Utility/DBus.hs index 5b04703013..bcb093223d 100644 --- a/Utility/DBus.hs +++ b/Utility/DBus.hs @@ -7,7 +7,13 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} -module Utility.DBus where +module Utility.DBus ( + ServiceName, + listServiceNames, + callDBus, + runClient, + persistentClient, +) where import Utility.PartialPrelude import Utility.Exception diff --git a/Utility/Daemon.hs b/Utility/Daemon.hs index 398e6d9e7b..2bf857f7eb 100644 --- a/Utility/Daemon.hs +++ b/Utility/Daemon.hs @@ -7,7 +7,12 @@ {-# LANGUAGE CPP #-} -module Utility.Daemon where +module Utility.Daemon ( + daemonize, + foreground, + checkDaemon, + stopDaemon, +) where import Common import Utility.PID diff --git a/Utility/Data.hs b/Utility/Data.hs index 27c0a824c2..55108457d6 100644 --- a/Utility/Data.hs +++ b/Utility/Data.hs @@ -7,7 +7,10 @@ {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.Data where +module Utility.Data ( + firstJust, + eitherToMaybe, +) where {- First item in the list that is not Nothing. -} firstJust :: Eq a => [Maybe a] -> Maybe a diff --git a/Utility/DebugLocks.hs b/Utility/DebugLocks.hs index fecacc9539..4593726ff9 100644 --- a/Utility/DebugLocks.hs +++ b/Utility/DebugLocks.hs @@ -8,7 +8,7 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} -module Utility.DebugLocks where +module Utility.DebugLocks (debugLocks) where import Control.Monad.Catch import Control.Monad.IO.Class diff --git a/Utility/DirWatcher.hs b/Utility/DirWatcher.hs index 892841f93f..99eede4173 100644 --- a/Utility/DirWatcher.hs +++ b/Utility/DirWatcher.hs @@ -11,7 +11,15 @@ {-# LANGUAGE CPP #-} -module Utility.DirWatcher where +module Utility.DirWatcher ( + canWatch, + eventsCoalesce, + closingTracked, + modifyTracked, + DirWatcherHandle, + watchDir, + stopWatchDir, +) where import Utility.DirWatcher.Types diff --git a/Utility/DirWatcher/FSEvents.hs b/Utility/DirWatcher/FSEvents.hs index d7472d490a..a2e3b4ae2d 100644 --- a/Utility/DirWatcher/FSEvents.hs +++ b/Utility/DirWatcher/FSEvents.hs @@ -5,7 +5,7 @@ - License: BSD-2-clause -} -module Utility.DirWatcher.FSEvents where +module Utility.DirWatcher.FSEvents (watchDir) where import Common hiding (isDirectory) import Utility.DirWatcher.Types diff --git a/Utility/DirWatcher/INotify.hs b/Utility/DirWatcher/INotify.hs index 341cd3073c..c33b02fa6b 100644 --- a/Utility/DirWatcher/INotify.hs +++ b/Utility/DirWatcher/INotify.hs @@ -5,7 +5,7 @@ - License: BSD-2-clause -} -module Utility.DirWatcher.INotify where +module Utility.DirWatcher.INotify (watchDir) where import Common hiding (isDirectory) import Utility.ThreadLock diff --git a/Utility/DirWatcher/Types.hs b/Utility/DirWatcher/Types.hs index 75ef69f83b..9abd5f36a1 100644 --- a/Utility/DirWatcher/Types.hs +++ b/Utility/DirWatcher/Types.hs @@ -5,7 +5,11 @@ - License: BSD-2-clause -} -module Utility.DirWatcher.Types where +module Utility.DirWatcher.Types ( + Hook, + WatchHooks(..), + mkWatchHooks, +) where import Common diff --git a/Utility/DirWatcher/Win32Notify.hs b/Utility/DirWatcher/Win32Notify.hs index a2f40128fb..7a76f618a7 100644 --- a/Utility/DirWatcher/Win32Notify.hs +++ b/Utility/DirWatcher/Win32Notify.hs @@ -5,7 +5,7 @@ - License: BSD-2-clause -} -module Utility.DirWatcher.Win32Notify where +module Utility.DirWatcher.Win32Notify (watchDir) where import Common hiding (isDirectory) import Utility.DirWatcher.Types diff --git a/Utility/Directory/Stream.hs b/Utility/Directory/Stream.hs index e827ef21a2..3a6222c561 100644 --- a/Utility/Directory/Stream.hs +++ b/Utility/Directory/Stream.hs @@ -9,11 +9,16 @@ {-# LANGUAGE LambdaCase #-} {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.Directory.Stream where +module Utility.Directory.Stream ( + DirectoryHandle, + openDirectory, + closeDirectory, + readDirectory, + isDirectoryEmpty, +) where import Control.Monad import System.FilePath -import System.IO.Unsafe (unsafeInterleaveIO) import Control.Concurrent import Data.Maybe import Prelude @@ -100,22 +105,6 @@ readDirectory hdl@(DirectoryHandle _ h fdat mv) = do return (Just filename) #endif --- | Like getDirectoryContents, but rather than buffering the whole --- directory content in memory, lazily streams. --- --- This is like lazy readFile in that the handle to the directory remains --- open until the whole list is consumed, or until the list is garbage --- collected. So use with caution particularly when traversing directory --- trees. -streamDirectoryContents :: FilePath -> IO [FilePath] -streamDirectoryContents d = openDirectory d >>= collect - where - collect hdl = readDirectory hdl >>= \case - Nothing -> return [] - Just f -> do - rest <- unsafeInterleaveIO (collect hdl) - return (f:rest) - -- | True only when directory exists and contains nothing. -- Throws exception if directory does not exist. isDirectoryEmpty :: FilePath -> IO Bool diff --git a/Utility/Dot.hs b/Utility/Dot.hs index e21915d327..95aff93c02 100644 --- a/Utility/Dot.hs +++ b/Utility/Dot.hs @@ -1,11 +1,23 @@ {- a simple graphviz / dot(1) digraph description generator library + - + - import qualified - - Copyright 2010 Joey Hess - - License: BSD-2-clause -} -module Utility.Dot where -- import qualified +module Utility.Dot ( + graph, + graphNode, + graphEdge, + label, + attr, + fillColor, + subGraph, + indent, + quote, +) where {- generates a graph description from a list of lines -} graph :: [String] -> String diff --git a/Utility/DottedVersion.hs b/Utility/DottedVersion.hs index c2a849b4a2..dff37176ba 100644 --- a/Utility/DottedVersion.hs +++ b/Utility/DottedVersion.hs @@ -7,7 +7,11 @@ {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.DottedVersion where +module Utility.DottedVersion ( + DottedVersion, + fromDottedVersion, + normalize, +) where import Common diff --git a/Utility/Env.hs b/Utility/Env.hs index dfebd98680..9847326940 100644 --- a/Utility/Env.hs +++ b/Utility/Env.hs @@ -8,7 +8,14 @@ {-# LANGUAGE CPP #-} {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.Env where +module Utility.Env ( + getEnv, + getEnvDefault, + getEnvironment, + addEntry, + addEntries, + delEntry, +) where #ifdef mingw32_HOST_OS import Utility.Exception diff --git a/Utility/Env/Basic.hs b/Utility/Env/Basic.hs index 38295bea04..db738270f7 100644 --- a/Utility/Env/Basic.hs +++ b/Utility/Env/Basic.hs @@ -7,7 +7,10 @@ {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.Env.Basic where +module Utility.Env.Basic ( + getEnv, + getEnvDefault, +) where import Utility.Exception import Control.Applicative diff --git a/Utility/Env/Set.hs b/Utility/Env/Set.hs index 9d17090b9b..f14674ca67 100644 --- a/Utility/Env/Set.hs +++ b/Utility/Env/Set.hs @@ -7,7 +7,10 @@ {-# LANGUAGE CPP #-} -module Utility.Env.Set where +module Utility.Env.Set ( + setEnv, + unsetEnv, +) where #ifdef mingw32_HOST_OS import qualified System.SetEnv diff --git a/Utility/FileSize.hs b/Utility/FileSize.hs index 5f89cff2c0..c86528369a 100644 --- a/Utility/FileSize.hs +++ b/Utility/FileSize.hs @@ -5,7 +5,11 @@ {-# LANGUAGE CPP #-} -module Utility.FileSize where +module Utility.FileSize ( + FileSize, + getFileSize, + getFileSize', +) where import System.PosixCompat.Files #ifdef mingw32_HOST_OS diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs index 5be1ff528d..4c04f8265f 100644 --- a/Utility/Gpg.hs +++ b/Utility/Gpg.hs @@ -7,7 +7,32 @@ {-# LANGUAGE CPP #-} -module Utility.Gpg where +module Utility.Gpg ( + KeyId, + KeyIds(..), + GpgCmd(..), + mkGpgCmd, + boolGpgCmd, + pkEncTo, + stdEncryptionParams, + pipeStrict, + feedRead, + pipeLazy, + findPubKeys, + UserId, + secretKeys, + KeyType(..), + maxRecommendedKeySize, + genSecretKey, + genRandom, + testKeyId, +#ifndef mingw32_HOST_OS + testHarness, + testTestHarness, + checkEncryptionFile, + checkEncryptionStream, +#endif +) where import Common import qualified BuildInfo @@ -279,6 +304,7 @@ genRandom cmd highQuality size = checksize <$> readStrict cmd params - It has an empty passphrase. -} testKeyId :: String testKeyId = "129D6E0AC537B9C7" + testKey :: String testKey = keyBlock True [ "mI0ETvFAZgEEAKnqwWgZqznMhi1RQExem2H8t3OyKDxaNN3rBN8T6LWGGqAYV4wT" @@ -299,6 +325,7 @@ testKey = keyBlock True , "+gQkDF9/" , "=1k11" ] + testSecretKey :: String testSecretKey = keyBlock False [ "lQHYBE7xQGYBBACp6sFoGas5zIYtUUBMXpth/Ldzsig8WjTd6wTfE+i1hhqgGFeM" @@ -332,6 +359,7 @@ testSecretKey = keyBlock False , "IJf+/dFjxEmflWpbxw/36pEd/EReLX8b8qDIYadK6BpiWN9xgEiBv/oEJAxffw==" , "=LDsg" ] + keyBlock :: Bool -> [String] -> String keyBlock public ls = unlines [ "-----BEGIN PGP "++t++" KEY BLOCK-----" @@ -381,9 +409,7 @@ testTestHarness :: FilePath -> GpgCmd -> IO Bool testTestHarness tmpdir cmd = do keys <- testHarness tmpdir cmd $ findPubKeys cmd testKeyId return $ KeyIds [testKeyId] == keys -#endif -#ifndef mingw32_HOST_OS checkEncryptionFile :: GpgCmd -> FilePath -> Maybe KeyIds -> IO Bool checkEncryptionFile cmd filename keys = checkGpgPackets cmd keys =<< readStrict cmd params diff --git a/Utility/HtmlDetect.hs b/Utility/HtmlDetect.hs index bf0839e9ec..9096cd4022 100644 --- a/Utility/HtmlDetect.hs +++ b/Utility/HtmlDetect.hs @@ -5,7 +5,11 @@ - License: BSD-2-clause -} -module Utility.HtmlDetect where +module Utility.HtmlDetect ( + isHtml, + isHtmlBs, + htmlPrefixLength, +) where import Text.HTML.TagSoup import Data.Char diff --git a/Utility/HumanNumber.hs b/Utility/HumanNumber.hs index c3fede95f6..6143cef105 100644 --- a/Utility/HumanNumber.hs +++ b/Utility/HumanNumber.hs @@ -5,7 +5,7 @@ - License: BSD-2-clause -} -module Utility.HumanNumber where +module Utility.HumanNumber (showImprecise) where {- Displays a fractional value as a string with a limited number - of decimal digits. -} diff --git a/Utility/IPAddress.hs b/Utility/IPAddress.hs index cfe9873ca3..52d9dc0f7c 100644 --- a/Utility/IPAddress.hs +++ b/Utility/IPAddress.hs @@ -5,7 +5,12 @@ - License: BSD-2-clause -} -module Utility.IPAddress where +module Utility.IPAddress ( + extractIPAddress, + isLoopbackAddress, + isPrivateAddress, + makeAddressMatcher, +) where import Utility.Exception diff --git a/Utility/LinuxMkLibs.hs b/Utility/LinuxMkLibs.hs index 15f82fd18e..9f042dc2eb 100644 --- a/Utility/LinuxMkLibs.hs +++ b/Utility/LinuxMkLibs.hs @@ -5,7 +5,11 @@ - License: BSD-2-clause -} -module Utility.LinuxMkLibs where +module Utility.LinuxMkLibs ( + installLib, + parseLdd, + glibcLibs, +) where import Utility.PartialPrelude import Utility.Directory diff --git a/Utility/LockFile/LockStatus.hs b/Utility/LockFile/LockStatus.hs index 3f466c1255..6690882b76 100644 --- a/Utility/LockFile/LockStatus.hs +++ b/Utility/LockFile/LockStatus.hs @@ -5,7 +5,7 @@ - License: BSD-2-clause -} -module Utility.LockFile.LockStatus where +module Utility.LockFile.LockStatus (LockStatus(..)) where import System.Posix diff --git a/Utility/LogFile.hs b/Utility/LogFile.hs index 4e08e9b9f4..cb02047351 100644 --- a/Utility/LogFile.hs +++ b/Utility/LogFile.hs @@ -7,7 +7,15 @@ {-# LANGUAGE CPP #-} -module Utility.LogFile where +module Utility.LogFile ( + openLog, + listLogs, + maxLogs, +#ifndef mingw32_HOST_OS + redirLog, + redir, +#endif +) where import Common diff --git a/Utility/Lsof.hs b/Utility/Lsof.hs index 90b22cc7bc..22d4a0ebfb 100644 --- a/Utility/Lsof.hs +++ b/Utility/Lsof.hs @@ -5,7 +5,12 @@ - License: BSD-2-clause -} -module Utility.Lsof where +module Utility.Lsof ( + LsofOpenMode(..), + setup, + queryDir, + query, +) where import Common import BuildInfo diff --git a/Utility/Metered.hs b/Utility/Metered.hs index 0fec7749b6..53e253eccb 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -7,7 +7,40 @@ {-# LANGUAGE TypeSynonymInstances, BangPatterns #-} -module Utility.Metered where +module Utility.Metered ( + MeterUpdate, + nullMeterUpdate, + combineMeterUpdate, + BytesProcessed(..), + toBytesProcessed, + fromBytesProcessed, + addBytesProcessed, + zeroBytesProcessed, + withMeteredFile, + meteredWrite, + meteredWrite', + meteredWriteFile, + offsetMeterUpdate, + hGetContentsMetered, + hGetMetered, + defaultChunkSize, + watchFileSize, + OutputHandler(..), + ProgressParser, + commandMeter, + commandMeter', + demeterCommand, + demeterCommandEnv, + avoidProgress, + rateLimitMeterUpdate, + Meter, + mkMeter, + setMeterTotalSize, + updateMeter, + displayMeterHandle, + clearMeterHandle, + bandwidthMeter, +) where import Common import Utility.Percentage @@ -80,11 +113,6 @@ withMeteredFile :: FilePath -> MeterUpdate -> (L.ByteString -> IO a) -> IO a withMeteredFile f meterupdate a = withBinaryFile f ReadMode $ \h -> hGetContentsMetered h meterupdate >>= a -{- Sends the content of a file to a Handle, updating the meter as it's - - written. -} -streamMeteredFile :: FilePath -> MeterUpdate -> Handle -> IO () -streamMeteredFile f meterupdate h = withMeteredFile f meterupdate $ L.hPut h - {- Writes a ByteString to a Handle, updating a meter as it's written. -} meteredWrite :: MeterUpdate -> Handle -> L.ByteString -> IO () meteredWrite meterupdate h = void . meteredWrite' meterupdate h diff --git a/Utility/Misc.hs b/Utility/Misc.hs index 48fcceb7e2..de77c949a0 100644 --- a/Utility/Misc.hs +++ b/Utility/Misc.hs @@ -7,7 +7,19 @@ {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.Misc where +module Utility.Misc ( + hGetContentsStrict, + readFileStrict, + separate, + firstLine, + segment, + segmentDelim, + massReplace, + hGetSomeString, + exitBool, + + prop_segment_regressionTest, +) where import System.IO import Control.Monad diff --git a/Utility/Monad.hs b/Utility/Monad.hs index ac751043cd..abe06f335c 100644 --- a/Utility/Monad.hs +++ b/Utility/Monad.hs @@ -7,7 +7,19 @@ {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.Monad where +module Utility.Monad ( + firstM, + getM, + anyM, + allM, + untilTrue, + ifM, + (<||>), + (<&&>), + observe, + after, + noop, +) where import Data.Maybe import Control.Monad diff --git a/Utility/Network.hs b/Utility/Network.hs index 4def3c5c55..23487b1bcb 100644 --- a/Utility/Network.hs +++ b/Utility/Network.hs @@ -7,7 +7,7 @@ {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.Network where +module Utility.Network (getHostname) where import Utility.Process import Utility.Exception diff --git a/Utility/OSX.hs b/Utility/OSX.hs index f6aba50960..f5820a78d6 100644 --- a/Utility/OSX.hs +++ b/Utility/OSX.hs @@ -7,7 +7,12 @@ {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.OSX where +module Utility.OSX ( + autoStartBase, + systemAutoStart, + userAutoStart, + genOSXAutoStartFile, +) where import Utility.UserInfo diff --git a/Utility/OptParse.hs b/Utility/OptParse.hs index c65a18c249..c81517edfb 100644 --- a/Utility/OptParse.hs +++ b/Utility/OptParse.hs @@ -5,7 +5,10 @@ - License: BSD-2-clause -} -module Utility.OptParse where +module Utility.OptParse ( + invertableSwitch, + invertableSwitch', +) where import Options.Applicative import Data.Monoid diff --git a/Utility/PID.hs b/Utility/PID.hs index f5f8aa873e..1c617006b3 100644 --- a/Utility/PID.hs +++ b/Utility/PID.hs @@ -7,7 +7,7 @@ {-# LANGUAGE CPP #-} -module Utility.PID where +module Utility.PID (PID, getPID) where #ifndef mingw32_HOST_OS import System.Posix.Types (ProcessID) diff --git a/Utility/Parallel.hs b/Utility/Parallel.hs index 2352ba7068..2a778b9958 100644 --- a/Utility/Parallel.hs +++ b/Utility/Parallel.hs @@ -5,7 +5,7 @@ - License: BSD-2-clause -} -module Utility.Parallel where +module Utility.Parallel (inParallel) where import Common diff --git a/Utility/PartialPrelude.hs b/Utility/PartialPrelude.hs index 85f80534ca..90c67ffa2d 100644 --- a/Utility/PartialPrelude.hs +++ b/Utility/PartialPrelude.hs @@ -7,7 +7,18 @@ {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.PartialPrelude where +module Utility.PartialPrelude ( + Utility.PartialPrelude.read, + Utility.PartialPrelude.head, + Utility.PartialPrelude.tail, + Utility.PartialPrelude.init, + Utility.PartialPrelude.last, + Utility.PartialPrelude.readish, + Utility.PartialPrelude.headMaybe, + Utility.PartialPrelude.lastMaybe, + Utility.PartialPrelude.beginning, + Utility.PartialPrelude.end, +) where import qualified Data.Maybe diff --git a/Utility/Path.hs b/Utility/Path.hs index f1302ae8ca..26d66066ad 100644 --- a/Utility/Path.hs +++ b/Utility/Path.hs @@ -8,7 +8,29 @@ {-# LANGUAGE CPP #-} {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.Path where +module Utility.Path ( + simplifyPath, + absPathFrom, + parentDir, + upFrom, + dirContains, + absPath, + relPathCwdToFile, + relPathDirToFile, + relPathDirToFileAbs, + segmentPaths, + runSegmentPaths, + relHome, + inPath, + searchPath, + dotfile, + sanitizeFilePath, + splitShortExtensions, + + prop_upFrom_basics, + prop_relPathDirToFile_basics, + prop_relPathDirToFile_regressionTest, +) where import System.FilePath import Data.List diff --git a/Utility/Path/Max.hs b/Utility/Path/Max.hs index 49e65d38b0..11c0ea2d75 100644 --- a/Utility/Path/Max.hs +++ b/Utility/Path/Max.hs @@ -8,7 +8,7 @@ {-# LANGUAGE CPP #-} {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.Path.Max where +module Utility.Path.Max (fileNameLengthLimit) where #ifndef mingw32_HOST_OS import Utility.Exception diff --git a/Utility/Process/Transcript.hs b/Utility/Process/Transcript.hs index 68fb2223eb..c0ee0b1a16 100644 --- a/Utility/Process/Transcript.hs +++ b/Utility/Process/Transcript.hs @@ -8,7 +8,11 @@ {-# LANGUAGE CPP #-} {-# OPTIONS_GHC -fno-warn-tabs #-} -module Utility.Process.Transcript where +module Utility.Process.Transcript ( + processTranscript, + processTranscript', + processTranscript'', +) where import Utility.Process import Utility.Misc diff --git a/Utility/Rsync.hs b/Utility/Rsync.hs index be890ca076..c6881b7ab9 100644 --- a/Utility/Rsync.hs +++ b/Utility/Rsync.hs @@ -7,7 +7,17 @@ {-# LANGUAGE CPP #-} -module Utility.Rsync where +module Utility.Rsync ( + rsyncShell, + rsyncServerSend, + rsyncServerReceive, + rsyncUseDestinationPermissions, + rsync, + rsyncUrlIsShell, + rsyncUrlIsPath, + rsyncProgress, + filterRsyncSafeOptions, +) where import Common import Utility.Metered @@ -161,10 +171,8 @@ filterRsyncSafeOptions = fst3 . getOpt Permute - The virtual filesystem contains: - /c, /d, ... mount points for Windows drives -} +#ifdef mingw32_HOST_OS toMSYS2Path :: FilePath -> FilePath -#ifndef mingw32_HOST_OS -toMSYS2Path = id -#else toMSYS2Path p | null drive = recombine parts | otherwise = recombine $ "/" : driveletter drive : parts diff --git a/Utility/Su.hs b/Utility/Su.hs index 03355991d1..cba88199e5 100644 --- a/Utility/Su.hs +++ b/Utility/Su.hs @@ -7,7 +7,15 @@ {-# LANGUAGE CPP #-} -module Utility.Su where +module Utility.Su ( + WhosePassword(..), + PasswordPrompt(..), + describePasswordPrompt, + describePasswordPrompt', + SuCommand, + runSuCommand, + mkSuCommand, +) where import Common diff --git a/Utility/Url.hs b/Utility/Url.hs index b10aba1c93..2aa4e6a589 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -138,22 +138,14 @@ curlParams uo ps = ps ++ uaparams ++ headerparams ++ addedparams ++ schemeparams ] schemelist = map fromScheme $ S.toList $ allowedSchemes uo -checkPolicy :: UrlOptions -> URI -> a -> (String -> IO b) -> IO a -> IO a -checkPolicy uo u onerr displayerror a +checkPolicy :: UrlOptions -> URI -> IO (Either String a) -> IO (Either String a) +checkPolicy uo u a | allowedScheme uo u = a - | otherwise = do - void $ displayerror $ - "Configuration does not allow accessing " ++ show u - return onerr + | otherwise = return $ Left $ + "Configuration does not allow accessing " ++ show u -unsupportedUrlScheme :: URI -> (String -> IO a) -> IO a -unsupportedUrlScheme u displayerror = - displayerror $ "Unsupported url scheme " ++ show u - -warnError :: String -> IO () -warnError msg = do - hPutStrLn stderr msg - hFlush stderr +unsupportedUrlScheme :: URI -> String +unsupportedUrlScheme u = "Unsupported url scheme " ++ show u allowedScheme :: UrlOptions -> URI -> Bool allowedScheme uo u = uscheme `S.member` allowedSchemes uo @@ -161,14 +153,18 @@ allowedScheme uo u = uscheme `S.member` allowedSchemes uo uscheme = mkScheme $ takeWhile (/=':') (uriScheme u) {- Checks that an url exists and could be successfully downloaded, - - also checking that its size, if available, matches a specified size. -} -checkBoth :: URLString -> Maybe Integer -> UrlOptions -> IO Bool -checkBoth url expected_size uo = do - v <- check url expected_size uo - return (fst v && snd v) + - also checking that its size, if available, matches a specified size. + - + - The Left error is returned if policy does not allow accessing the url + - or the url scheme is not supported. + -} +checkBoth :: URLString -> Maybe Integer -> UrlOptions -> IO (Either String Bool) +checkBoth url expected_size uo = fmap go <$> check url expected_size uo + where + go v = fst v && snd v -check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool) -check url expected_size uo = go <$> getUrlInfo url uo +check :: URLString -> Maybe Integer -> UrlOptions -> IO (Either String (Bool, Bool)) +check url expected_size uo = fmap go <$> getUrlInfo url uo where go (UrlInfo False _ _) = (False, False) go (UrlInfo True Nothing _) = (True, True) @@ -176,8 +172,8 @@ check url expected_size uo = go <$> getUrlInfo url uo Just _ -> (True, expected_size == s) Nothing -> (True, True) -exists :: URLString -> UrlOptions -> IO Bool -exists url uo = urlExists <$> getUrlInfo url uo +exists :: URLString -> UrlOptions -> IO (Either String Bool) +exists url uo = fmap urlExists <$> getUrlInfo url uo data UrlInfo = UrlInfo { urlExists :: Bool @@ -190,32 +186,36 @@ assumeUrlExists :: UrlInfo assumeUrlExists = UrlInfo True Nothing Nothing {- Checks that an url exists and could be successfully downloaded, - - also returning its size and suggested filename if available. -} -getUrlInfo :: URLString -> UrlOptions -> IO UrlInfo + - also returning its size and suggested filename if available. + - + - The Left error is returned if policy does not allow accessing the url + - or the url scheme is not supported. + -} +getUrlInfo :: URLString -> UrlOptions -> IO (Either String UrlInfo) getUrlInfo url uo = case parseURIRelaxed url of - Just u -> checkPolicy uo u dne warnError $ - case (urlDownloader uo, parseUrlRequest (show u)) of - (DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust - -- When http redirects to a protocol which - -- conduit does not support, it will throw - -- a StatusCodeException with found302 - -- and a Response with the redir Location. - (matchStatusCodeException (== found302)) - (existsconduit req) - (followredir r) - `catchNonAsync` (const $ return dne) - (DownloadWithConduit (DownloadWithCurlRestricted r), Nothing) - | isfileurl u -> existsfile u - | isftpurl u -> existscurlrestricted r u url ftpport - `catchNonAsync` (const $ return dne) - | otherwise -> do - unsupportedUrlScheme u warnError - return dne - (DownloadWithCurl _, _) - | isfileurl u -> existsfile u - | otherwise -> existscurl u (basecurlparams url) - Nothing -> return dne - where + Just u -> checkPolicy uo u (go u) + Nothing -> return (Right dne) + where + go :: URI -> IO (Either String UrlInfo) + go u = case (urlDownloader uo, parseUrlRequest (show u)) of + (DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust + -- When http redirects to a protocol which + -- conduit does not support, it will throw + -- a StatusCodeException with found302 + -- and a Response with the redir Location. + (matchStatusCodeException (== found302)) + (Right <$> existsconduit req) + (followredir r) + `catchNonAsync` (const $ return $ Right dne) + (DownloadWithConduit (DownloadWithCurlRestricted r), Nothing) + | isfileurl u -> Right <$> existsfile u + | isftpurl u -> (Right <$> existscurlrestricted r u url ftpport) + `catchNonAsync` (const $ return $ Right dne) + | otherwise -> return $ Left $ unsupportedUrlScheme u + (DownloadWithCurl _, _) + | isfileurl u -> Right <$> existsfile u + | otherwise -> Right <$> existscurl u (basecurlparams url) + dne = UrlInfo False Nothing Nothing found sz f = return $ UrlInfo True sz f @@ -291,11 +291,11 @@ getUrlInfo url uo = case parseURIRelaxed url of -- http to file redirect would not be secure, -- and http-conduit follows http to http. Just u' | isftpurl u' -> - checkPolicy uo u' dne warnError $ + checkPolicy uo u' $ Right <$> existscurlrestricted r u' url' ftpport - _ -> return dne - Nothing -> return dne - followredir _ _ = return dne + _ -> return (Right dne) + Nothing -> return (Right dne) + followredir _ _ = return (Right dne) -- Parse eg: attachment; filename="fname.ext" -- per RFC 2616 @@ -317,31 +317,32 @@ headRequest r = r {- Download a perhaps large file, with auto-resume of incomplete downloads. - - - Displays error message on stderr when download failed. + - When the download fails, returns an error message. -} -download :: MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO Bool +download :: MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO (Either String ()) download = download' False -{- Avoids displaying any error message. -} +{- Avoids displaying any error message, including silencing curl errors. -} downloadQuiet :: MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO Bool -downloadQuiet = download' True +downloadQuiet meterupdate url file uo = isRight + <$> download' True meterupdate url file uo -download' :: Bool -> MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO Bool -download' noerror meterupdate url file uo = +download' :: Bool -> MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO (Either String ()) +download' nocurlerror meterupdate url file uo = catchJust matchHttpException go showhttpexception `catchNonAsync` (dlfailed . show) where go = case parseURIRelaxed url of - Just u -> checkPolicy uo u False dlfailed $ + Just u -> checkPolicy uo u $ case (urlDownloader uo, parseUrlRequest (show u)) of (DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust (matchStatusCodeException (== found302)) - (downloadConduit meterupdate req file uo >> return True) + (downloadConduit meterupdate req file uo >> return (Right ())) (followredir r) (DownloadWithConduit (DownloadWithCurlRestricted r), Nothing) | isfileurl u -> downloadfile u | isftpurl u -> downloadcurlrestricted r u url ftpport - | otherwise -> unsupportedUrlScheme u dlfailed + | otherwise -> dlfailed $ unsupportedUrlScheme u (DownloadWithCurl _, _) | isfileurl u -> downloadfile u | otherwise -> downloadcurl url basecurlparams @@ -354,27 +355,20 @@ download' noerror meterupdate url file uo = ftpport = 21 - showhttpexception he = do - let msg = case he of - HttpExceptionRequest _ (StatusCodeException r _) -> - B8.toString $ statusMessage $ responseStatus r - HttpExceptionRequest _ (InternalException ie) -> - case fromException ie of - Nothing -> show ie - Just (ConnectionRestricted why) -> why - HttpExceptionRequest _ other -> show other - _ -> show he - dlfailed msg - - dlfailed msg - | noerror = return False - | otherwise = do - hPutStrLn stderr $ "download failed: " ++ msg - hFlush stderr - return False + showhttpexception he = dlfailed $ case he of + HttpExceptionRequest _ (StatusCodeException r _) -> + B8.toString $ statusMessage $ responseStatus r + HttpExceptionRequest _ (InternalException ie) -> + case fromException ie of + Nothing -> show ie + Just (ConnectionRestricted why) -> why + HttpExceptionRequest _ other -> show other + _ -> show he + dlfailed msg = return $ Left $ "download failed: " ++ msg + basecurlparams = curlParams uo - [ if noerror + [ if nocurlerror then Param "-S" else Param "-sS" , Param "-f" @@ -387,7 +381,10 @@ download' noerror meterupdate url file uo = -- if the url happens to be empty, so pre-create. unlessM (doesFileExist file) $ writeFile file "" - boolSystem "curl" (curlparams ++ [Param "-o", File file, File rawurl]) + ifM (boolSystem "curl" (curlparams ++ [Param "-o", File file, File rawurl])) + ( return $ Right () + , return $ Left "download failed" + ) downloadcurlrestricted r u rawurl defport = downloadcurl rawurl =<< curlRestrictedParams r u defport basecurlparams @@ -396,7 +393,7 @@ download' noerror meterupdate url file uo = let src = unEscapeString (uriPath u) withMeteredFile src meterupdate $ L.writeFile file - return True + return $ Right () -- Conduit does not support ftp, so will throw an exception on a -- redirect to a ftp url; fall back to curl. @@ -404,7 +401,7 @@ download' noerror meterupdate url file uo = case headMaybe $ map decodeBS $ getResponseHeader hLocation resp of Just url' -> case parseURIRelaxed url' of Just u' | isftpurl u' -> - checkPolicy uo u' False dlfailed $ + checkPolicy uo u' $ downloadcurlrestricted r u' url' ftpport _ -> throwIO ex Nothing -> throwIO ex @@ -448,7 +445,7 @@ downloadConduit meterupdate req file uo = liftIO $ debugM "url" (show req'') resp <- http req'' (httpManager uo) if responseStatus resp == partialContent206 - then store (BytesProcessed sz) AppendMode resp + then store (toBytesProcessed sz) AppendMode resp else if responseStatus resp == ok200 then store zeroBytesProcessed WriteMode resp else respfailure resp diff --git a/doc/bugs/OSX_dmg_git-core_binaries_do_not_link.mdwn b/doc/bugs/OSX_dmg_git-core_binaries_do_not_link.mdwn new file mode 100644 index 0000000000..d976be1fd6 --- /dev/null +++ b/doc/bugs/OSX_dmg_git-core_binaries_do_not_link.mdwn @@ -0,0 +1,12 @@ +The OSX .dmg contains a few binaries in git-core like git-remote-http. +They have been adjusted by otool to link to libraries in the same directory +as the binary. However, the libraries are not located in the git-core +directory, but in its parent directory, and so the git-core binaries don't +link. + +I don't think this is a new regression, but not entirely sure. + +Seems that OSXMkLibs could symlink ../lib into git-core. +--[[Joey]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/Packfile_does_not_match_digest__58___gcrypt_with_assistant/comment_14_2274479a1eef9ffc6a53ffb65e2c3511._comment b/doc/bugs/Packfile_does_not_match_digest__58___gcrypt_with_assistant/comment_14_2274479a1eef9ffc6a53ffb65e2c3511._comment new file mode 100644 index 0000000000..7249adc933 --- /dev/null +++ b/doc/bugs/Packfile_does_not_match_digest__58___gcrypt_with_assistant/comment_14_2274479a1eef9ffc6a53ffb65e2c3511._comment @@ -0,0 +1,52 @@ +[[!comment format=mdwn + username="xwvvvvwx" + avatar="http://cdn.libravatar.org/avatar/7198160b33539b5b1b2d56ca85c562d9" + subject="comment 14" + date="2019-11-21T17:32:31Z" + content=""" +I just reproduced this when pushing to a gcrypt remote on rsync.net using the assistant. There is only one client pushing to the gcrypt remote. + +It was during the initial sync of a moderately large amount of data (~22G), perhaps this has something to do with it? + +I could reproduce the issue by cloning with gcrypt directly (`git clone gcrypt::ssh://....`). + +I was able to recover by following the steps outlined in Schnouki's comment (#12), but this is obviously quite an unsatisfactory fix. + +I am using annex to replicate important personal data, and I find this issue highly concerning. + +Foolishly, I did not keep a copy of the bad repo before I forced pushed over it on the remote, so I do not have a copy available to experiment with :( + +--- + +## logs + +`daemon.log` excerpt: [https://ipfs.io/ipfs/QmcoPuTLY2v5FWPABQLVwgyqW5WdsvkBbVS33cJh6zjzi4](https://ipfs.io/ipfs/QmcoPuTLY2v5FWPABQLVwgyqW5WdsvkBbVS33cJh6zjzi4) + + +`git clone` output: + +``` +[annex@xwvvvvwx:~]$ git clone gcrypt::ssh:// remote +Cloning into 'remote'... +gcrypt: Decrypting manifest +gpg: Signature made Thu 21 Nov 2019 04:02:40 PM CET +gpg: using RSA key 92E9F58E9F8C6845423C251AACD9A98951774194 +gpg: Good signature from \"git-annex \" [ultimate] +gcrypt: Remote ID is :id:tWrcOFKu2yX7y+jLDLxm +gcrypt: Packfile e7b619864585f3c921b491fd041127cf0ae33c4480810610dcb2e37ec46a82be does not match digest! +fatal: early EOF +``` + +`git annex version`: + +``` +git-annex version: 7.20191114 +build flags: Assistant Webapp Pairing S3 WebDAV Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite +dependency versions: aws-0.21.1 bloomfilter-2.0.1.0 cryptonite-0.25 DAV-1.3.3 feed-1.2.0.1 ghc-8.6.5 http-client-0.6.4 persistent-sqlite-2.9.3 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.0 +key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL +remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs hook external +operating system: linux x86_64 +supported repository versions: 7 +upgrade supported from repository versions: 0 1 2 3 4 5 6 +``` +"""]] diff --git a/doc/bugs/concurrent_git-annex-copy_to_s3_special_remote_fails/comment_3_f244825dcd89cd6bc74deec7ac4bdd99._comment b/doc/bugs/concurrent_git-annex-copy_to_s3_special_remote_fails/comment_3_f244825dcd89cd6bc74deec7ac4bdd99._comment new file mode 100644 index 0000000000..f2b950348e --- /dev/null +++ b/doc/bugs/concurrent_git-annex-copy_to_s3_special_remote_fails/comment_3_f244825dcd89cd6bc74deec7ac4bdd99._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2019-11-13T19:29:34Z" + content=""" +--debug might provide some clue in its http dump. + +The ParseError comes from attoparsec. Seems likely that aeson/aws is what's +using it there, and that it is failing to parse something from S3. + +Of course, the malloc error suggests a low-level memory problem, probably +from C code. I don't think git-annex contains anything like that, so it +must be from a dependency. + +The S3 signature being wrong again points to the aws library, or something +lower level. And then the following double free is another low-level memory +problem. + +So there's a pattern, and it seems to extend across linux and OSX. + +Kind of wondering if something in the library stack is somehow failing to +be concurrency safe. If two http requests end up using the same memory, +it would kind of explain all of this. +"""]] diff --git a/doc/bugs/cygwin.mdwn b/doc/bugs/cygwin.mdwn new file mode 100644 index 0000000000..4d0bf07a62 --- /dev/null +++ b/doc/bugs/cygwin.mdwn @@ -0,0 +1,406 @@ +Cygwin do not work with git-annex windows installed version + + +### What steps will reproduce the problem? +* Install git-annex windows version +* Try run git annex test under cygwin, and got 65 test failed out of 101. +* Try run git annex test under git bash windows and got 101 test passed. +* NOTE: git-lfs windows installed version working fine under cygwin and git bash windows. + + +### What version of git-annex are you using? On what operating system? +git-annex version: 7.20191106-ge486fd5e0 +build flags: Assistant Webapp Pairing S3 WebDAV TorrentParser Feeds Testsuite +dependency versions: aws-0.21.1 bloomfilter-2.0.1.0 cryptonite-0.25 DAV-1.3.3 feed-1.0.1.0 ghc-8.6.5 http-client-0.5.14 persistent-sqlite-2.9.3 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.0 +key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL +remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs hook external +operating system: mingw32 x86_64 +supported repository versions: 7 +upgrade supported from repository versions: 2 3 4 5 6 + +### Please provide any additional information below. + +[[!format sh """ +# If you can, paste a complete transcript of the problem occurring here. +# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log +Cygwin ~/git..t_tools/wget/cache (test) +(506)$ git-annex test +Tests + QuickCheck + prop_encode_decode_roundtrip: OK (0.04s) + +++ OK, passed 1000 tests. + prop_encode_c_decode_c_roundtrip: OK (0.03s) + +++ OK, passed 1000 tests. + prop_isomorphic_key_encode: OK + +++ OK, passed 1000 tests. + prop_isomorphic_shellEscape: OK (0.02s) + +++ OK, passed 1000 tests. + prop_isomorphic_shellEscape_multiword: OK (0.70s) + +++ OK, passed 1000 tests. + prop_isomorphic_configEscape: OK (0.02s) + +++ OK, passed 1000 tests. + prop_parse_show_Config: OK (0.04s) + +++ OK, passed 1000 tests. + prop_upFrom_basics: OK (0.02s) + +++ OK, passed 1000 tests. + prop_relPathDirToFile_basics: OK (0.03s) + +++ OK, passed 1000 tests. + prop_relPathDirToFile_regressionTest: OK + +++ OK, passed 1 test. + prop_cost_sane: OK + +++ OK, passed 1 test. + prop_matcher_sane: OK + +++ OK, passed 1 test. + prop_HmacSha1WithCipher_sane: OK + +++ OK, passed 1 test. + prop_VectorClock_sane: OK + +++ OK, passed 1 test. + prop_addMapLog_sane: OK + +++ OK, passed 1 test. + prop_verifiable_sane: OK (0.07s) + +++ OK, passed 1000 tests. + prop_segment_regressionTest: OK + +++ OK, passed 1 test. + prop_read_write_transferinfo: OK (0.04s) + +++ OK, passed 1000 tests. + prop_read_show_inodecache: OK (0.02s) + +++ OK, passed 1000 tests. + prop_parse_build_presence_log: OK (1.27s) + +++ OK, passed 1000 tests. + prop_parse_build_contentidentifier_log: OK (1.23s) + +++ OK, passed 1000 tests. + prop_read_show_TrustLevel: OK + +++ OK, passed 1 test. + prop_parse_build_TrustLevelLog: OK + +++ OK, passed 1 test. + prop_hashes_stable: OK + +++ OK, passed 1 test. + prop_mac_stable: OK + +++ OK, passed 1 test. + prop_schedule_roundtrips: OK (0.01s) + +++ OK, passed 1000 tests. + prop_past_sane: OK + +++ OK, passed 1 test. + prop_duration_roundtrips: OK + +++ OK, passed 1000 tests. + prop_metadata_sane: OK (0.86s) + +++ OK, passed 1000 tests. + prop_metadata_serialize: OK (0.84s) + +++ OK, passed 1000 tests. + prop_branchView_legal: OK (0.77s) + +++ OK, passed 1000 tests. + prop_viewPath_roundtrips: OK (0.03s) + +++ OK, passed 1000 tests. + prop_view_roundtrips: OK (0.52s) + +++ OK, passed 1000 tests. + prop_viewedFile_rountrips: OK (0.02s) + +++ OK, passed 1000 tests. + prop_b64_roundtrips: OK + +++ OK, passed 1000 tests. + prop_standardGroups_parse: OK + +++ OK, passed 1 test. + Unit Tests v7 adjusted unlocked branch + add dup: Init Tests + init: init test repo + Detected a filesystem without fifo support. + + Disabling ssh connection caching. + + Detected a crippled filesystem. + + Disabling core.symlinks. +(scanning for unlocked files...) + + Entering an adjusted branch where files are unlocked as this filesystem does not support locked files. + not found . +git-annex.exe: pre-commit: 1 failed + + Failed to enter adjusted branch! +ok +(recording state in git...) + not found . +git-annex.exe: pre-commit: 1 failed +FAIL (6.92s) + .\\Test\\Framework.hs:469: + git commit failed + add: add foo +ok +(recording state in git...) +add sha1foo +ok +(recording state in git...) + not found . +git-annex.exe: pre-commit: 1 failed +FAIL (8.10s) + Test.hs:303: + git commit failed + +2 out of 2 tests failed (15.02s) +FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + add extras: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + export_import: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + export_import_subdir: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + shared clone: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + log: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + import: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + reinject: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + unannex (no copy): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + unannex (with copy): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + drop (no remote): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + drop (with remote): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + drop (untrusted remote): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + get: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + get (ssh remote): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + move: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + move (ssh remote): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + copy: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + lock: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + lock --force: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + edit (no pre-commit): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + edit (pre-commit): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + partial commit: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + fix: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + trust: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + fsck (basics): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + fsck (bare): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + fsck (local untrusted): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + fsck (remote untrusted): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + fsck --from remote: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + migrate: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + migrate (via gitattributes): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + unused: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + describe: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + find: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + merge: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + info: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + version: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + sync: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + union merge regression: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + adjusted branch merge regression: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + adjusted branch subtree regression: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + conflict resolution: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + conflict resolution (adjusted branch): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + conflict resolution movein regression: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + conflict resolution (mixed directory and file): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + conflict resolution symlink bit: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + conflict resolution (uncommitted local file): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + conflict resolution (removed file): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + conflict resolution (nonannexed file): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + conflict resolution (nonannexed symlink): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + conflict resolution (mixed locked and unlocked file): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + map: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + uninit: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + uninit (in git-annex branch): FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + upgrade: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + whereis: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + hook remote: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + directory remote: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + rsync remote: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + bup remote: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + crypto: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + preferred content: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + add subdirs: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + addurl: FAIL + Exception: init tests failed! cannot continue + CallStack (from HasCallStack): + error, called at .\\Test\\Framework.hs:427:33 in main:Test.Framework + +65 out of 101 tests failed (21.64s) + (Failures above could be due to a bug in git-annex, or an incompatibility + with utilities, such as git, installed on this system.) +# End of transcript or log. +"""]] + + diff --git a/doc/bugs/error_message_in_addurl_is_not_channeled_into_json_record_with_--json-error-messages.mdwn b/doc/bugs/error_message_in_addurl_is_not_channeled_into_json_record_with_--json-error-messages.mdwn index 82c8bae21c..68d052d656 100644 --- a/doc/bugs/error_message_in_addurl_is_not_channeled_into_json_record_with_--json-error-messages.mdwn +++ b/doc/bugs/error_message_in_addurl_is_not_channeled_into_json_record_with_--json-error-messages.mdwn @@ -22,3 +22,8 @@ If user convenience was something to strive for here, it should technically be p [[!meta author=yoh]] [[!tag projects/datalad]] +> [[fixed|done]], and I also converted a number of other places +> where an error could leak through to stderr, although there are still +> some places where direct writes to stderr happen -- I'll probably never +> be able to guarantee --json-error-messages catches every possible stderr +> output. --[[Joey]] diff --git a/doc/bugs/impossible__40____63____41___to_continuously_re-import_a_directory_while_keeping_original_files_in_place/comment_1_d385d0fffdd6ac18f38828f805e4daff._comment b/doc/bugs/impossible__40____63____41___to_continuously_re-import_a_directory_while_keeping_original_files_in_place/comment_1_d385d0fffdd6ac18f38828f805e4daff._comment new file mode 100644 index 0000000000..a34534e6f5 --- /dev/null +++ b/doc/bugs/impossible__40____63____41___to_continuously_re-import_a_directory_while_keeping_original_files_in_place/comment_1_d385d0fffdd6ac18f38828f805e4daff._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2019-11-19T17:12:41Z" + content=""" +I think that you can accomplish what you want by making the directory +you're importing from be a directory special remote with exporttree=yes +importtree=yes and use the new `git annex import master --from remote` + +If that does not do what you want, I'd prefer to look at making it be able +to do so. I hope to eventually remove the legacy git-annex import from +directory, since we have this new more general interface. +"""]] diff --git a/doc/bugs/parallel_copy_fails/comment_2_6a0e3514a111d48662bb50ca6a15b01f._comment b/doc/bugs/parallel_copy_fails/comment_2_6a0e3514a111d48662bb50ca6a15b01f._comment new file mode 100644 index 0000000000..140dc91de0 --- /dev/null +++ b/doc/bugs/parallel_copy_fails/comment_2_6a0e3514a111d48662bb50ca6a15b01f._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2019-11-13T19:37:16Z" + content=""" +The signal 11 is very significant. It points to a problem in a lower-level +library (or ghc runtime), or perhaps a bad memory problem. git-annex does +not itself contain any code that can segfault, afaik. + +Almost certianly the same as the other bug. +"""]] diff --git a/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction.mdwn b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction.mdwn new file mode 100644 index 0000000000..1aa191ed88 --- /dev/null +++ b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction.mdwn @@ -0,0 +1,73 @@ +Originally was trying to reproduce [datalad/issues/3653](https://github.com/datalad/datalad/issues/3653) assuming that multiple files pointed to the same key. +It was not the case, and my attempt revealed another bug - annex inability to "obtain" files in parallel when multiple of them point to the same key: + +
+setup of original repo(click to expand) + +[[!format sh """ +/tmp > mkdir src; (cd src; git init; git annex init; dd if=/dev/zero of=1 count=1024 bs=1024; for f in {2..10}; do cp 1 $f; done ; git annex add *; git commit -m added; ) +Initialized empty Git repository in /tmp/src/.git/ +init (scanning for unlocked files...) +ok +(recording state in git...) +1024+0 records in +1024+0 records out +1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.00106651 s, 983 MB/s +add 1 +ok +add 10 +ok +add 2 +ok +add 3 +ok +add 4 +ok +add 5 +ok +add 6 +ok +add 7 +ok +add 8 +ok +add 9 +ok +(recording state in git...) +[master (root-commit) 63b1163] added + 10 files changed, 10 insertions(+) + create mode 120000 1 + create mode 120000 10 + create mode 120000 2 + create mode 120000 3 + create mode 120000 4 + create mode 120000 5 + create mode 120000 6 + create mode 120000 7 + create mode 120000 8 + create mode 120000 9 +"""]] +
+ +And that is what happens then when we try to get the same key in parallel: +[[!format sh """ + +/tmp > git clone src dst; (cd dst; git annex get -J 5 *; ) +Cloning into 'dst'... +done. +(merging origin/git-annex into git-annex...) +(recording state in git...) +(scanning for unlocked files...) +get 2 (from origin...) (checksum...) +git-annex: thread blocked indefinitely in an STM transaction +failed +git-annex: thread blocked indefinitely in an MVar operation + +"""]] + +I felt like it is an old issue but failed to find a trace of it upon a quick lookup + +[[!meta author=yoh]] +[[!tag projects/datalad]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_1_46420a92dbd8655af9b16349da24d0fc._comment b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_1_46420a92dbd8655af9b16349da24d0fc._comment new file mode 100644 index 0000000000..77b039cc67 --- /dev/null +++ b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_1_46420a92dbd8655af9b16349da24d0fc._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2019-11-13T16:34:34Z" + content=""" +Reproduced. + +After building git-annex with the DebugLocks flag, I got this: + + debugLocks, called at ./Annex/Transfer.hs:248:18 in main:Annex.Transfer + debugLocks, called at ./CmdLine/Action.hs:263:26 in main:CmdLine.Action + +Which points to pickRemote and ensureOnlyActionOn. But pickRemote +does no STM actions when there's only 1 remote, so it must really be +the latter. + +Also, I notice that when 5 files to get are provided, it crashes, but with +less than 5, it succeeds. +Even this trivial case crashes: `git annex get -J1 1 2` +"""]] diff --git a/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_2_b1f4dc12ad00a4aa73e5bdc7c0a8f489._comment b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_2_b1f4dc12ad00a4aa73e5bdc7c0a8f489._comment new file mode 100644 index 0000000000..0721cf378a --- /dev/null +++ b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_2_b1f4dc12ad00a4aa73e5bdc7c0a8f489._comment @@ -0,0 +1,83 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2019-11-13T17:07:29Z" + content=""" +Ok, I see the bug. ensureOnlyActionOn does a STM +retry if it finds in the activekeys map some other thread +is operating on the same key. +But, there is no running STM transaction what will update +the map. So, STM detects that the retry would deadlock. + +It's not really a deadlock, because once the other thread finishes, +it will update the map to remove itself. But STM can't know that. +The solution will be to not use STM for waiting on the other thread. + +Hmm, I tried the obvious approach, using a MVar semaphore to wait for the +thread, but that just resulted in more STM and MVar deadlocks. + +I don't understand why after puzzling over it for two hours. I did +instrument all calls to atomically, and it looks, unfortunately, like +the one in finishCommandActions is deadlocking. If the problem extends +beyond ensureOnlyActionOn it may be much more complicated. + +Patch that does not work and I don't know why. + + diff --git a/CmdLine/Action.hs b/CmdLine/Action.hs + index 87298a95f..bf4bdd589 100644 + --- a/CmdLine/Action.hs + +++ b/CmdLine/Action.hs + @@ -268,16 +268,30 @@ ensureOnlyActionOn k a = debugLocks $ + go ConcurrentPerCpu = goconcurrent + goconcurrent = do + tv <- Annex.getState Annex.activekeys + - bracket (setup tv) id (const a) + - setup tv = liftIO $ do + + bracketIO (setup tv) id (const a) + + setup tv = do + + mysem <- newEmptyMVar + mytid <- myThreadId + - atomically $ do + + finishsetup <- atomically $ do + m <- readTVar tv + case M.lookup k m of + - Just tid + - | tid /= mytid -> retry + - | otherwise -> return $ return () + + Just (tid, theirsem) + + | tid /= mytid -> return $ do + + -- wait for the other + + -- thread to finish, and + + -- retry (STM retry would + + -- deadlock) + + readMVar theirsem + + setup tv + + | otherwise -> return $ + + -- same thread, so no + + -- blocking + + return $ return () + Nothing -> do + - writeTVar tv $! M.insert k mytid m + - return $ liftIO $ atomically $ + - modifyTVar tv $ M.delete k + + writeTVar tv $! M.insert k (mytid, mysem) m + + return $ return $ do + + atomically $ modifyTVar tv $ + + M.delete k + + -- indicate finished + + putMVar mysem () + + finishsetup + diff --git a/Annex.hs b/Annex.hs + index 9eb4c5f39..936399ae7 100644 + --- a/Annex.hs + +++ b/Annex.hs + @@ -143,7 +143,7 @@ data AnnexState = AnnexState + , existinghooks :: M.Map Git.Hook.Hook Bool + , desktopnotify :: DesktopNotify + , workers :: Maybe (TMVar (WorkerPool AnnexState)) + - , activekeys :: TVar (M.Map Key ThreadId) + + , activekeys :: TVar (M.Map Key (ThreadId, MVar ())) + , activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer) + , keysdbhandle :: Maybe Keys.DbHandle + , cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment)) +"""]] diff --git a/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_3_d8ba7cc5a860e9ccaab32c637cc2a7cd._comment b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_3_d8ba7cc5a860e9ccaab32c637cc2a7cd._comment new file mode 100644 index 0000000000..2880a4632a --- /dev/null +++ b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_3_d8ba7cc5a860e9ccaab32c637cc2a7cd._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2019-11-13T19:07:49Z" + content=""" +Tried going back to c04b2af3e1a8316e7cf640046ad0aa68826650ed, +which is before the separation of perform and cleanup stages. +The same code was in onlyActionOn back then. And the test case does not +crash. + +So, that gives a good commit to start a bisection. Which will probably +find the bug was introduced in the separation of perform and cleanup stages, +because that added a lot of STM complexity. + +(Have to cherry-pick 018b5b81736a321f3eb9762a2afb7124e19dbdf9 +onto those old commits to make them build with current libraries.) +"""]] diff --git a/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_4_eecaa7f7b0279c56902c90ed58d1444f._comment b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_4_eecaa7f7b0279c56902c90ed58d1444f._comment new file mode 100644 index 0000000000..4c1ad5521e --- /dev/null +++ b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_4_eecaa7f7b0279c56902c90ed58d1444f._comment @@ -0,0 +1,83 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2019-11-13T21:22:07Z" + content=""" +Simplified version of patch above, that converts ensureOnlyActionOn to not use +STM at all, and is significantly simpler. + +With this patch, the test case still STM deadlocks. So this seems to be +proof that the actual problem is not in ensureOnlyActionOn. + + diff --git a/Annex.hs b/Annex.hs + index 9eb4c5f39..9baf7755a 100644 + --- a/Annex.hs + +++ b/Annex.hs + @@ -143,7 +143,7 @@ data AnnexState = AnnexState + , existinghooks :: M.Map Git.Hook.Hook Bool + , desktopnotify :: DesktopNotify + , workers :: Maybe (TMVar (WorkerPool AnnexState)) + - , activekeys :: TVar (M.Map Key ThreadId) + + , activekeys :: MVar (M.Map Key (ThreadId, MVar ())) + , activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer) + , keysdbhandle :: Maybe Keys.DbHandle + , cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment)) + @@ -154,7 +154,7 @@ data AnnexState = AnnexState + newState :: GitConfig -> Git.Repo -> IO AnnexState + newState c r = do + emptyactiveremotes <- newMVar M.empty + - emptyactivekeys <- newTVarIO M.empty + + emptyactivekeys <- newMVar M.empty + o <- newMessageState + sc <- newTMVarIO False + return $ AnnexState + diff --git a/CmdLine/Action.hs b/CmdLine/Action.hs + index 87298a95f..a8c2bd205 100644 + --- a/CmdLine/Action.hs + +++ b/CmdLine/Action.hs + @@ -22,7 +22,7 @@ import Remote.List + import Control.Concurrent + import Control.Concurrent.Async + import Control.Concurrent.STM + -import GHC.Conc + +import GHC.Conc (getNumProcessors) + import qualified Data.Map.Strict as M + import qualified System.Console.Regions as Regions + + @@ -267,17 +267,22 @@ ensureOnlyActionOn k a = debugLocks $ + go (Concurrent _) = goconcurrent + go ConcurrentPerCpu = goconcurrent + goconcurrent = do + - tv <- Annex.getState Annex.activekeys + - bracket (setup tv) id (const a) + - setup tv = liftIO $ do + + mv <- Annex.getState Annex.activekeys + + bracketIO (setup mv) id (const a) + + setup mv = do + mytid <- myThreadId + - atomically $ do + - m <- readTVar tv + - case M.lookup k m of + - Just tid + - | tid /= mytid -> retry + - | otherwise -> return $ return () + - Nothing -> do + - writeTVar tv $! M.insert k mytid m + - return $ liftIO $ atomically $ + - modifyTVar tv $ M.delete k + + m <- takeMVar mv + + let ready sem = do + + putMVar mv $! M.insert k (mytid, sem) m + + return $ do + + modifyMVar_ mv $ pure . M.delete k + + putMVar sem () + + case M.lookup k m of + + Nothing -> ready =<< newEmptyMVar + + Just (tid, sem) + + | tid /= mytid -> do + + takeMVar sem + + ready sem + + | otherwise -> do + + putMVar mv m + + return noop +"""]] diff --git a/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_5_052acf169f9c2b8b0233adddabb02559._comment b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_5_052acf169f9c2b8b0233adddabb02559._comment new file mode 100644 index 0000000000..4d3cc32da8 --- /dev/null +++ b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_5_052acf169f9c2b8b0233adddabb02559._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2019-11-13T21:42:58Z" + content=""" +finishCommandActions is reaching the retry case, and STM deadlocks there. +The WorkerPool is getting into a state where allIdle is False, and is not +leaving it, perhaps due to an earlier STM deadlock. (There seem to be two +different ones.) + +Also, I notice with --json-error-messages: + + {"command":"get","note":"from origin...\nchecksum...","success":false,"key":"SHA256E-s524288--07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541","error-messages":["git-annex: thread blocked indefinitely in an STM transaction"],"file":"1"} + +So the thread that actually gets to run on the key is somehow reaching a +STM deadlock. + +Which made me wonder if that thread deadlocks on enteringStage. +And it seems so. If Command.Get is changed to use commandStages +rather than transferStages, the test case succeeds. + +Like finishCommandActions, enteringStage has a STM retry if it needs to +wait for something to happen to the WorkerPool. So again it looks like +the WorkerPool is getting screwed up. +"""]] diff --git a/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_6_880c962d5ca77c494c984e7f74725265._comment b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_6_880c962d5ca77c494c984e7f74725265._comment new file mode 100644 index 0000000000..f8d18b1cf6 --- /dev/null +++ b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_6_880c962d5ca77c494c984e7f74725265._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="catch-all deadlock breaker" + date="2019-11-13T22:33:59Z" + content=""" +Not sure if feasible, but maybe a [[catch-all deadlock breaker|todo/more_extensive_retries_to_mask_transient_failures]] could be implemented to mask this and other deadlocks? + +The moon landings software [[had something|https://www.ibiblio.org/apollo/hrst/archive/1033.pdf]] [[like this|https://history.nasa.gov/computers/Ch2-6.html]], and it worked [[pretty well|https://www.wsj.com/articles/apollo-11-had-a-hidden-hero-software-11563153001]]... +"""]] diff --git a/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_6_e33df0ec76069deb069b94c944d62c76._comment b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_6_e33df0ec76069deb069b94c944d62c76._comment new file mode 100644 index 0000000000..7f348fc9f5 --- /dev/null +++ b/doc/bugs/parallel_get_to_the_files_for_the_same_key_would_fail_with__thread_blocked_indefinitely_in_an_STM_transaction/comment_6_e33df0ec76069deb069b94c944d62c76._comment @@ -0,0 +1,69 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 6""" + date="2019-11-14T15:20:13Z" + content=""" +Added tracing of changes to the WorkerPool. + + joey@darkstar:/tmp/dst>git annex get -J1 1 2 --json + ("initial pool",WorkerPool UsedStages {initialStage = TransferStage, stageSet = fromList [TransferStage,VerifyStage]} [IdleWorker TransferStage,IdleWorker VerifyStage] 2) + ("starting worker",WorkerPool UsedStages {initialStage = TransferStage, stageSet = fromList [TransferStage,VerifyStage]} [ActiveWorker TransferStage,IdleWorker VerifyStage] 1) + +Transfer starts for file 1 + + (("change stage from",TransferStage,"to",VerifyStage),WorkerPool UsedStages {initialStage = TransferStage, stageSet = fromList [TransferStage,VerifyStage]} [IdleWorker TransferStage,ActiveWorker VerifyStage] 1) + +Transfer complete, verifying starts. + + ("starting worker",WorkerPool UsedStages {initialStage = TransferStage, stageSet = fromList [TransferStage,VerifyStage]} [ActiveWorker TransferStage,ActiveWorker VerifyStage] 0) + +This second thread is being started to process file 2. +It starts in TransferStage, but it will be blocked from doing anything +by ensureOnlyActionOn. + + ("finishCommandActions starts with",WorkerPool UsedStages {initialStage = TransferStage, stageSet = fromList [TransferStage,VerifyStage]} [ActiveWorker TransferStage,ActiveWorker VerifyStage] 0) + ("finishCommandActions observes",WorkerPool UsedStages {initialStage = TransferStage, stageSet = fromList [TransferStage,VerifyStage]} [ActiveWorker TransferStage,ActiveWorker VerifyStage] 0) + +All files have threads to process them started, so finishCommandActions starts up. +It will retry since the threads are still running. + + (("change stage from",VerifyStage,"to",TransferStage),WorkerPool UsedStages {initialStage = TransferStage, stageSet = fromList [TransferStage,VerifyStage]} [IdleWorker VerifyStage,ActiveWorker TransferStage] 0) + +The first thread is done with verification, and +the stage is being restored to transfer. + +The 0 means that there are 0 spareVals. Normally, the number of spareVals +should be the same as the number of IdleWorkers, so it should be 1. +It's 0 because the thread is in the process of changing between stages. + +The thread should at this point be waiting for an idle TransferStage +slot to become available. The second thread still has that active. +It seems that wait never completes, because a trace I had after that wait +never got printed. + + ("finishCommandActions observes",WorkerPool UsedStages {initialStage = TransferStage, stageSet = fromList [TransferStage,VerifyStage]} [IdleWorker VerifyStage,ActiveWorker TransferStage] 0) + +It retries again, because of the active worker and also because spareVals +is not the same as IdleWorkers. + + git-annex: thread blocked indefinitely in an STM transaction + +Deadlock. + +Looks like that second thread that got into transfer stage +never leaves it, and then the first thread, which wants to +restore back to transfer stage, is left waiting forever for it. And so is +finishCommandActions. + +Aha! The second thread is in fact still in ensureOnlyActionOn. +So it's waiting on the first thread to finish. But the first thread can't +transition back to TransferStage because the second thread has stolen it. + +Now it makes sense. + +So.. One way to fix this would be to add a new stage, which is used for +threads that are just starting. Then the second thread would be in +StartStage, and the first thread would not be prevented from transitioning +back to TransferStage. Would need to make sure that, once a thread leaves +StartStage, it does not ever transition back to it. +"""]] diff --git a/doc/bugs/regression__58___standalone_build_is_deficient_on_linux_after_7.20190819+git2-g908476a9b-1__126__ndall+1_some_time.mdwn b/doc/bugs/regression__58___standalone_build_is_deficient_on_linux_after_7.20190819+git2-g908476a9b-1__126__ndall+1_some_time.mdwn new file mode 100644 index 0000000000..9ec6964e7b --- /dev/null +++ b/doc/bugs/regression__58___standalone_build_is_deficient_on_linux_after_7.20190819+git2-g908476a9b-1__126__ndall+1_some_time.mdwn @@ -0,0 +1,79 @@ +### Please describe the problem. + +There were a few changes introduced since then to Makefile (I will not guess which one broke it) which resulted in git within git-annex-standalone of neurodebian to be unable to clone from https://: + + +[[!format sh """ +$> /usr/lib/git-annex.linux/git clone https://github.com/afni/afni_ci_test_data.git +Cloning into 'afni_ci_test_data'... +fatal: unable to find remote helper for 'https' + +"""]] + +
+diff between list of files in 7.20190819+git60-gcdb679818 and 7.20191017+git2-g7b13db551 package builds shows many git-* missing +[[!format sh """ +lena:/tmp +$> ls 7.2019*/usr/lib/git-annex.linux/exe/ +7.20190819/usr/lib/git-annex.linux/exe/: +cp@ git-diff-index@ git-mktag@ git-sh-i18n--envsubst@ +curl@ git-diff-tree@ git-mktree@ git-shell@ +git@ git-difftool@ git-multi-pack-index@ git-shortlog@ +git-add@ git-fast-export@ git-mv@ git-show@ +git-am@ git-fast-import@ git-name-rev@ git-show-branch@ +git-annex@ git-fetch@ git-notes@ git-show-index@ +git-annex-shell@ git-fetch-pack@ git-pack-objects@ git-show-ref@ +git-annotate@ git-fmt-merge-msg@ git-pack-redundant@ git-stage@ +git-apply@ git-for-each-ref@ git-pack-refs@ git-status@ +git-archive@ git-format-patch@ git-patch-id@ git-stripspace@ +git-bisect--helper@ git-fsck@ git-prune@ git-submodule--helper@ +git-blame@ git-fsck-objects@ git-prune-packed@ git-symbolic-ref@ +git-branch@ git-gc@ git-pull@ git-tag@ +git-bundle@ git-get-tar-commit-id@ git-push@ git-unpack-file@ +git-cat-file@ git-grep@ git-range-diff@ git-unpack-objects@ +git-check-attr@ git-hash-object@ git-read-tree@ git-update-index@ +git-check-ignore@ git-help@ git-rebase@ git-update-ref@ +git-check-mailmap@ git-http-backend@ git-rebase--interactive@ git-update-server-info@ +git-check-ref-format@ git-http-fetch@ git-receive-pack@ git-upload-archive@ +git-checkout@ git-http-push@ git-reflog@ git-upload-pack@ +git-checkout-index@ git-imap-send@ git-remote@ git-var@ +git-cherry@ git-index-pack@ git-remote-ext@ git-verify-commit@ +git-cherry-pick@ git-init@ git-remote-fd@ git-verify-pack@ +git-clean@ git-init-db@ git-remote-ftp@ git-verify-tag@ +git-clone@ git-interpret-trailers@ git-remote-ftps@ git-whatchanged@ +git-column@ git-log@ git-remote-http@ git-worktree@ +git-commit@ git-ls-files@ git-remote-https@ git-write-tree@ +git-commit-graph@ git-ls-remote@ git-remote-testsvn@ localedef@ +git-commit-tree@ git-ls-tree@ git-remote-tor-annex@ lsof@ +git-config@ git-mailinfo@ git-repack@ rsync@ +git-count-objects@ git-mailsplit@ git-replace@ sh@ +git-credential@ git-merge@ git-rerere@ ssh@ +git-credential-cache@ git-merge-base@ git-reset@ ssh-keygen@ +git-credential-cache--daemon@ git-merge-file@ git-rev-list@ tar@ +git-credential-store@ git-merge-index@ git-rev-parse@ uname@ +git-daemon@ git-merge-ours@ git-revert@ xargs@ +git-describe@ git-merge-recursive@ git-rm@ +git-diff@ git-merge-subtree@ git-send-pack@ +git-diff-files@ git-merge-tree@ git-serve@ + +7.20191017/usr/lib/git-annex.linux/exe/: +cp@ git-credential-cache--daemon@ git-http-push@ git-sh-i18n--envsubst@ sh@ +curl@ git-credential-store@ git-imap-send@ git-shell@ ssh@ +git@ git-daemon@ git-receive-pack@ git-upload-pack@ ssh-keygen@ +git-annex@ git-fast-import@ git-remote-http@ localedef@ tar@ +git-annex-shell@ git-http-backend@ git-remote-testsvn@ lsof@ uname@ +git-credential-cache@ git-http-fetch@ git-remote-tor-annex@ rsync@ xargs@ + + +"""]] + +
+ +so may be that is related. + +Unfortunately in datalad we had no test testing cloning over https, so I added such integration test in https://github.com/datalad/datalad/pull/3867 to at least detect such regressions in the future before hitting the userland + +[[!meta author=yoh]] +[[!tag projects/datalad]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/regression__58___standalone_build_is_deficient_on_linux_after_7.20190819+git2-g908476a9b-1__126__ndall+1_some_time/comment_1_1735409e62ce82f7ba7258b0167fda06._comment b/doc/bugs/regression__58___standalone_build_is_deficient_on_linux_after_7.20190819+git2-g908476a9b-1__126__ndall+1_some_time/comment_1_1735409e62ce82f7ba7258b0167fda06._comment new file mode 100644 index 0000000000..1c80ed5ecb --- /dev/null +++ b/doc/bugs/regression__58___standalone_build_is_deficient_on_linux_after_7.20190819+git2-g908476a9b-1__126__ndall+1_some_time/comment_1_1735409e62ce82f7ba7258b0167fda06._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2019-11-14T18:14:35Z" + content=""" +It will either be caused by 5463f97ca216cd261f7a1da08aa8a62cef415a71 or by +a new version of git reorging files (or both). +"""]] diff --git a/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn b/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn index 2223087969..b841f079d6 100644 --- a/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn +++ b/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn @@ -118,7 +118,7 @@ file. This is called a ContentIdentifier. A good ContentIdentifier needs to: It's up to the implementor of a external special remote program what to use for their ContentIdentifier, but not meeting those criteria -will leas to unhappy users, and it's better not to implement this interface if +will lead to unhappy users, and it's better not to implement this interface if you can't do it well. ### protocol messages @@ -148,7 +148,7 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`. remote. A block of responses can be made to this, which must always end with `END`. * `CONTENT Size Name` - An file stored in the special remote. The Size is its size + A file stored in the special remote. The Size is its size in bytes. The Name is the name of the file on the remote, in the form of a relative path, and may contain path separators, whitespace, and other special characters. @@ -164,13 +164,15 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`. * `END` Indicates the end of a block of responses. * `LOCATION Name` - Comes before each of the following requests, + Comes before each of the following requests (except for + REMOVEEXPORTDIRECTORYWHENEMPTY), specifying the name of the file on the remote. It will be in the form of a relative path, and may contain path separators, whitespace, and other special characters. No response is made to this message. * `EXPECTED ContentIdentifier` - Comes before each of the following requests, specifying the + Comes before each of the following requests (except + for REMOVEEXPORTDIRECTORYWHENEMPTY), specifying the ContentIdentifier that is expected to be present on the remote. * `NOTHINGEXPECTED` If no ContentIdentifier is expected to be present, this is sent @@ -179,7 +181,7 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`. Retrieves the content of a file from the special remote to the File on local disk. Must take care to only retrieve content that has the ContentIdentifier specified by - by `EXPECTED`. + `EXPECTED`. While the transfer is running, the remote can send any number of `PROGRESS` messages. Once the transfer is complete, it finishes by sending one of these replies: @@ -224,7 +226,9 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`. Indicates the content has been removed from the remote. May be returned when the content was already not present. * `REMOVE-FAILURE Key ErrorMsg` - Indicates that the content was unable to be removed from the remote. + Indicates that the content was unable to be removed from the remote, + either because of an access problem, or because it did not match + the ContentIdentifier. * `REMOVEEXPORTDIRECTORYWHENEMPTY Directory` Requests the remote remove an exported directory, so long as it's empty. If the remote does not use directories, or `REMOVEEXPORTEXPECTED` @@ -294,11 +298,9 @@ of the older versions.) Next git-annex asks for the content of a file to be retrieved. LOCATION foo - EXPECTED 100 48511528411921470 + EXPECTED 100 48511528411921470 RETRIEVEEXPORTEXPECTED tmpfile -But, the file "foo" has been modified and so the -ContentIdentifier no longer matches. +If the requested version no longer exists, the response should be: RETRIEVE-FAILURE content has changed - diff --git a/doc/devblog/day_607__v8_is_done.mdwn b/doc/devblog/day_607__v8_is_done.mdwn new file mode 100644 index 0000000000..eead30e0ce --- /dev/null +++ b/doc/devblog/day_607__v8_is_done.mdwn @@ -0,0 +1,40 @@ +Spent the past two weeks on the [[todo/sqlite_database_improvements]] +which will be git-annex v8. + +That cleaned up a significant amount of technical debt. I had made some bad +choices about encoding sqlite data early on, and the persistent library +turns out to make a dubious choice about how String is stored, that +prevents some unicode surrigate code points from roundtripping sometimes. +On top of those problems, there were some missing indexes. And then to +resolve the `git add` mess, I had to write a raw SQL query that used LIKE, +which was super ugly, slow, and not indexed. + +Really good to get all that resolved. And I have microbenchmarks that are +good too; 10-25% speedup across the board for database operations. + +The tricky thing was that, due to the encoding problem, both filenames and +keys stored in the old sqlite databases can't be trusted to be valid. This +ruled out a database migration because it could leave a repo with bad old +data in it. Instead, the old databases have to be thrown away, and the +upgrade has to somehow build new databases that contain all the necessary +data. Seems a tall order, but luckily git-annex is a distributed system and +so the databases are used as a local fast cache for information that can be +looked up more slowly from git. Well, mostly. Sometimes the databases are +used for data that has not yet been committed to git, or that is local to a +single repo. + +So I had to find solutions to a lot of hairly problems. In a couple cases, +the solutions involve git-annex doing more work after the upgrade for a +while, until it is able to fully regenerate the data that was stored in the +old databases. + +One nice thing about this approach is that, if I ever need to change the +sqlite databases again, I can reuse the same code to delete the old and +regnerate the new, rather than writing migration code specific to a +given database change. + +Anyway, v8 is all ready to merge, but I'm inclined to sit on it for a month or +two, to avoid upgrade fatigue. Also I find more ways to improve the +database schema. Perhaps it would be worth it to do some normalization, +and/or move everything into a single large database rather than the current +smattering of unnormalized databases? diff --git a/doc/devblog/day_607__v8_is_done/comment_1_7326382d8ff23873c2fe0d6acd984454._comment b/doc/devblog/day_607__v8_is_done/comment_1_7326382d8ff23873c2fe0d6acd984454._comment new file mode 100644 index 0000000000..97e6a5cf81 --- /dev/null +++ b/doc/devblog/day_607__v8_is_done/comment_1_7326382d8ff23873c2fe0d6acd984454._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="pigmonkey" + avatar="http://cdn.libravatar.org/avatar/560cedfcec1b75e8ac2e98a10615d770" + subject="Consider deprecation warnings in the web app" + date="2019-11-07T18:38:28Z" + content=""" +If rebuilding the database is an operation that will take some time, it might be nice to have deprecation warnings in the web app before the assistant eventually autoupgrades repos. + +I use the assistant/web app to manage about 10 repos, some of which are on the larger size both in terms of disk space and number of files. I suspect that if the assistant kicked off an upgrade with a database rebuild on all of these at once, it would have a noticeable performance impact on my machine. If, after v8 is merged but before the assistant autoupgrades, the web app displayed a message like \"This repository is using a version that will soon be upgraded, click here to learn more about v8 and consider upgrading\", it would give folks (who don't read the devblog or release notes) a heads up and give them a chance to manually upgrade repositories one by one. +"""]] diff --git a/doc/devblog/day_607__v8_is_done/comment_2_802311f91808a13473aa99578a429497._comment b/doc/devblog/day_607__v8_is_done/comment_2_802311f91808a13473aa99578a429497._comment new file mode 100644 index 0000000000..6da2674bac --- /dev/null +++ b/doc/devblog/day_607__v8_is_done/comment_2_802311f91808a13473aa99578a429497._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="gb@4a49bb1afcf3d183bba8f07297b0395808768c6c" + nickname="gb" + avatar="http://cdn.libravatar.org/avatar/51ed40932fbf8748c70b31fd7446d40e" + subject="Symlinks for not-present unlocked files in v8?" + date="2019-11-10T14:51:59Z" + content=""" +Hi Joey, do you think it would be possible to squeeze a solution for the missing symlinks for not-present unlocked files in v8? + +Symlinks for missing unlocked files are the last thing missing from V5 for those of us simulating direct mode with adjusted branches and `annex.thin`. For reference and + +"""]] diff --git a/doc/devblog/day_607__v8_is_done/comment_3_7d3cad862be54dc5a39059f8b82834a6._comment b/doc/devblog/day_607__v8_is_done/comment_3_7d3cad862be54dc5a39059f8b82834a6._comment new file mode 100644 index 0000000000..0231d92ef0 --- /dev/null +++ b/doc/devblog/day_607__v8_is_done/comment_3_7d3cad862be54dc5a39059f8b82834a6._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="Re: Symlinks for not-present unlocked files in v8" + date="2019-11-10T23:03:42Z" + content=""" +Adding symlinks for not-present unlocked files in v8 would be great, but if it would hold up the release with SQL db fixes, making that release first would be better. Some query operations -- like finding all unlocked files, or all files present in one remote but not the other -- currently take a while on large repos; if the SQL refactor could solve that, that by itself would be a major step forward. +"""]] diff --git a/doc/devblog/day_608__easier_git-lfs_setup.mdwn b/doc/devblog/day_608__easier_git-lfs_setup.mdwn new file mode 100644 index 0000000000..4c1f6da546 --- /dev/null +++ b/doc/devblog/day_608__easier_git-lfs_setup.mdwn @@ -0,0 +1,25 @@ +The git-lfs support I added to git-annex had one small problem: People +expect to be able to clone a git repo and get right to using it, but after +cloning a git-annex repo that's on a server that uses git-lfs, there +was an extra `git annex enableremote` step to be able to use it as a git-lfs +special remote. And, you ended up with a "origin" git remote and a git-lfs +special remote with some other name. + +Now, it's this simple to set up a git-lfs repo on eg, github: + + git annex initremote github type=git-lfs encryption=none url=https://github.com/joeyh/lfstest + git annex sync github + git annex copy --to github ... + +And then for others to clone and use it is even simpler: + + git clone https://github.com/joeyh/lfstest + cd lfstest + git annex get + +The only gotcha is that git-annex has to know the url that's used for +the remote. Cloning any other url any other way (eg http instead of https) +will result in git-annex not using it. This is a consequence of git-lfs +not having any equivilant of a git-annex repository UUID, so git-annex +can't probe for the UUID and has to compare urls. This can be worked +around using `initremote --sameas` to tell git-annex about other urls. diff --git a/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks.mdwn b/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks.mdwn new file mode 100644 index 0000000000..403e2cafa5 --- /dev/null +++ b/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks.mdwn @@ -0,0 +1,13 @@ +Hi, + +Thank you very much for this software. I'm working in a research institute and we are very interested into using git-annex with DataLad to manage our datasets. + +We aim to provide a datasets repository accessible through the local network on a single file system. Some of our datasets are multi TB with a few millions of files. It will be managed by a few people but the primary users, the researchers, will only have read access. We would like to use hardlinks everywhere to avoid infrequent reading errors related to symlinks and save space when we want to propose different versions of the datasets with slight changes. The file system will be backed-up so we don't really need multi copies of the same files on a single file system. + +We seam to be able to achieve this using the `direct` mode in git-annex version 5 but it seams that the `unlock` mode in version 7 does copies instead of hardlinks. I'm wondering how we could achieve the same behaviour as in version 5. I believe I've read in the doc that there's a maximum of 2 hardlinks for a single file but I can't remember where or see if that is still the case. If that is still the case, I couldn't find if there is a setting to set or remove this maximum. + +We've tested with git-annex local version 5 / build 7.20190819, local version 7 / build 7.20190819 and local version 7 / build 7.20191106. [Here is a gist](https://gist.github.com/satyaog/b08a6e5d1eee75217ba823d38b84fb8b) containing test scripts for each setup. The `.annex-cache` part can be ignored for this topic. I've used Miniconda3-4.3.30 on Ubuntu 18.04.2 LTS to setup the environments. + +Thank you, + +Satya diff --git a/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks/comment_1_b59a72142e8f752d7b68a63a6ac1bbfe._comment b/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks/comment_1_b59a72142e8f752d7b68a63a6ac1bbfe._comment new file mode 100644 index 0000000000..d341a02d36 --- /dev/null +++ b/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks/comment_1_b59a72142e8f752d7b68a63a6ac1bbfe._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="using hardlinks" + date="2019-11-18T19:21:18Z" + content=""" +I don't have a full answer, but [[tips/local_caching_of_annexed_files]] might have relevant info. + +There is also the `annex.thin` setting; but check some [caveats](https://git-annex.branchable.com/bugs/annex.thin_can_cause_corrupt___40__not_just_missing__41___data/) related to it. +"""]] diff --git a/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks/comment_2_e4bfea21f664a0cb1aa8ec18ee88fc50._comment b/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks/comment_2_e4bfea21f664a0cb1aa8ec18ee88fc50._comment new file mode 100644 index 0000000000..359626b214 --- /dev/null +++ b/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks/comment_2_e4bfea21f664a0cb1aa8ec18ee88fc50._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="satya.ortiz-gagne@a4c92de91eb4fd5ae8fc9893bb4fd674a19f2e59" + nickname="satya.ortiz-gagne" + avatar="http://cdn.libravatar.org/avatar/79c93025f174cd2aff98fbb952702c09" + subject="Re: using hardlinks" + date="2019-11-18T20:46:22Z" + content=""" +Thanks for your comment. I've looked into [local caching of annexed files](https://git-annex.branchable.com/tips/local_caching_of_annexed_files) and most of it can be found in the scenario [described in the test gist](https://gist.github.com/satyaog/b08a6e5d1eee75217ba823d38b84fb8b). + +The two settings `annex.thin` and `annex.hardlink` are also set in the two git-annex repositories of the test. Thanks for letting me know about the caveats. Based on the tests that I've executed, it would seam that [`git-annex unlock`](https://git-annex.branchable.com/git-annex-unlock/) now copies the file to avoid the mentioned issue as I noticed different inodes? I understand that this prevents unwanted lost of data while using git-annex but I would actually like to have a hardlink instead of a copy. I'm wondering if it's possible. +"""]] diff --git a/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks/comment_3_ee247d9316df963fd584df2199b80fe2._comment b/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks/comment_3_ee247d9316df963fd584df2199b80fe2._comment new file mode 100644 index 0000000000..d272eb88a1 --- /dev/null +++ b/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks/comment_3_ee247d9316df963fd584df2199b80fe2._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="using hardlinks" + date="2019-11-19T17:51:08Z" + content=""" +I again don't have a full answer, but maybe you could customize git's [post-checkout hook](https://git-scm.com/docs/githooks#_post_checkout)? (You'd need to still call the hook that git-annex installs.) + +Also, I've thrown together a [[FUSE filesystem|https://github.com/broadinstitute/viral-ngs/blob/is-dx-benchmarks/tools/git-annex-remotes/git-annex-on-demand.py]] that fetches git-annexed files on demand, maybe that could be adapted. It only works with locked files and symlinks though. + +What is the reason you don't want to use locked files? You can have different [[worktrees|tips/Using_git-worktree_with_annex]] with symlinks of locked files pointing into the same annex. +"""]] diff --git a/doc/forum/Issues_with_webapp___47___assistant_and_gitlab_as_a_metadata_only_repo.mdwn b/doc/forum/Issues_with_webapp___47___assistant_and_gitlab_as_a_metadata_only_repo.mdwn new file mode 100644 index 0000000000..c7f821b2b0 --- /dev/null +++ b/doc/forum/Issues_with_webapp___47___assistant_and_gitlab_as_a_metadata_only_repo.mdwn @@ -0,0 +1,63 @@ +Greetings, + +Two more issues have come up. + +1) webapp (and assistant) don't always seem to inherit ssh-agent keys (I've setup a passwordless key for gitlab). It usually takes multiple cycles of killing off ssh and annex-related daemons before a new instance will not fail auth with gitlab - is there a more solid way of doing this? + +2) When I do get auth, the assistant / webui will notice file changes (i.e moving a small file that doesn't match annex.largefiles, but the changes don't get committed to origin (gitlab - metadata only). The log file is below - thougts? + +[2019-11-13 20:24:33.378362] main: starting assistant version 7.20191106 +[2019-11-13 20:24:33.439501] TransferScanner: Syncing with origin +(scanning...) [2019-11-13 20:24:33.529954] Watcher: Performing startup scan +fatal: Pathspec 'workflow/cc-archive-exif/LICENSE' is in submodule 'workflow/cc-archive-exif' +fatal: Pathspec 'workflow/cc-archive-exif/LICENSE' is in submodule 'workflow/cc-archive-exif' +fatal: Pathspec 'workflow/cc-archive-exif/LICENSE' is in submodule 'workflow/cc-archive-exif' +fatal: Pathspec 'workflow/cc-archive-exif/LICENSE' is in submodule 'workflow/cc-archive-exif' +fatal: Pathspec 'workflow/cc-archive-exif/LICENSE' is in submodule 'workflow/cc-archive-exif' +fatal: Pathspec 'workflow/cc-archive-exif/LICENSE' is in submodule 'workflow/cc-archive-exif' +fatal: Pathspec 'workflow/cc-archive-exif/LICENSE' is in submodule 'workflow/cc-archive-exif' +fatal: Pathspec 'workflow/cc-archive-exif/LICENSE' is in submodule 'workflow/cc-archive-exif' +fatal: Pathspec 'workflow/cc-archive-exif/LICENSE' is in submodule 'workflow/cc-archive-exif' +fatal: Pathspec 'workflow/cc-archive-exif/LICENSE' is in submodule 'workflow/cc-archive-exif' +fatal: Pathspec 'workflow/cc-archive-exif/LICENSE' is in submodule 'workflow/cc-archive-exif' + +git cat-file EOF: user error + +fd:39: hFlush: resource vanished (Broken pipe) + +fd:39: hFlush: resource vanished (Broken pipe) +(started...) +[2019-11-13 20:24:34.481957] Committer: Committing changes to git +(recording state in git...) +> GitLab: Disallowed command +ControlSocket .git/annex/ssh/git@gitlab.com already exists, disabling multiplexing +[2019-11-13 20:24:35.42995] Pusher: Syncing with origin +Everything up-to-date +> GitLab: Disallowed command +Everything up-to-date +> GitLab: Disallowed command +> GitLab: Disallowed command +> GitLab: Disallowed command +> GitLab: Disallowed command +> GitLab: Disallowed command + +fd:39: hFlush: resource vanished (Broken pipe) + +fd:39: hFlush: resource vanished (Broken pipe) +[2019-11-13 20:26:09.256217] main: Syncing with origin +Everything up-to-date +> GitLab: Disallowed command + +fd:39: hFlush: resource vanished (Broken pipe) +[2019-11-13 20:27:31.922901] Committer: Committing changes to git +(recording state in git...) +[2019-11-13 20:27:31.956249] Pusher: Syncing with origin +Everything up-to-date + +fd:39: hFlush: resource vanished (Broken pipe) +[2019-11-13 20:28:35.846741] Committer: Committing changes to git +(recording state in git...) +[2019-11-13 20:28:35.87987] Pusher: Syncing with origin +Everything up-to-date +> GitLab: Disallowed command +> GitLab: Disallowed command diff --git a/doc/forum/Issues_with_webapp___47___assistant_and_gitlab_as_a_metadata_only_repo/comment_1_b61139816f55dbc4f7eac22207f243c0._comment b/doc/forum/Issues_with_webapp___47___assistant_and_gitlab_as_a_metadata_only_repo/comment_1_b61139816f55dbc4f7eac22207f243c0._comment new file mode 100644 index 0000000000..71e8b6fe65 --- /dev/null +++ b/doc/forum/Issues_with_webapp___47___assistant_and_gitlab_as_a_metadata_only_repo/comment_1_b61139816f55dbc4f7eac22207f243c0._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="git-annex@17927e6dc041ab425c14217a97a685adf3ecf44f" + nickname="git-annex" + avatar="http://cdn.libravatar.org/avatar/66e5c6e044d726597ce5a0ad68f86fe4" + subject="Sometimes it sorta works" + date="2019-11-14T17:24:11Z" + content=""" +So, sometimes (often), it says it synched in the webapp, but no changes show up in any branch on gitlab. However, sometimes it works. For example, sometimes after killing everything off (including any ssh sessions) and restarting the webapp, a file I added to the repo locally as a test will show up in the gitlab repo. When I then move that file, the webapp/assistant wakes up, syncs (says it was successful), and the \"delete\" is processed in the gitlab repo (the old file is deleted), but the \"add\" (the file showing up with a new name) doesn't. + + +"""]] diff --git a/doc/forum/Issues_with_webapp___47___assistant_and_gitlab_as_a_metadata_only_repo/comment_2_f6196f814ed388d276947ffcc92ff37c._comment b/doc/forum/Issues_with_webapp___47___assistant_and_gitlab_as_a_metadata_only_repo/comment_2_f6196f814ed388d276947ffcc92ff37c._comment new file mode 100644 index 0000000000..456146b110 --- /dev/null +++ b/doc/forum/Issues_with_webapp___47___assistant_and_gitlab_as_a_metadata_only_repo/comment_2_f6196f814ed388d276947ffcc92ff37c._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="git-annex@17927e6dc041ab425c14217a97a685adf3ecf44f" + nickname="git-annex" + avatar="http://cdn.libravatar.org/avatar/66e5c6e044d726597ce5a0ad68f86fe4" + subject="comment 2" + date="2019-11-19T17:18:18Z" + content=""" +bump? Any thoughts? +"""]] diff --git a/doc/forum/Issues_with_webapp___47___assistant_and_gitlab_as_a_metadata_only_repo/comment_3_1556ebbb2a8db4aa859e3852160df044._comment b/doc/forum/Issues_with_webapp___47___assistant_and_gitlab_as_a_metadata_only_repo/comment_3_1556ebbb2a8db4aa859e3852160df044._comment new file mode 100644 index 0000000000..65d4bf4858 --- /dev/null +++ b/doc/forum/Issues_with_webapp___47___assistant_and_gitlab_as_a_metadata_only_repo/comment_3_1556ebbb2a8db4aa859e3852160df044._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="git-annex@17927e6dc041ab425c14217a97a685adf3ecf44f" + nickname="git-annex" + avatar="http://cdn.libravatar.org/avatar/66e5c6e044d726597ce5a0ad68f86fe4" + subject="git add or git commit does not trigger assistant, but git rm does" + date="2019-11-20T01:37:15Z" + content=""" +More diagnostics on this one.. + +I create a file that is below the size threshold for annex, but I do have a gitlab upstream. + +1) echo \"foo\" > foo - Assistant (or webapp) does not fire +2) git add foo - Assistant/webapp does not fire +3) git commit -am \"random testing\" - assistant does not fire +4) git annex sync - foo ends up in gitlab + +I remove a file + +1) git rm foo - Assistant fires and file is removed from gitlab + +Is this the expected behavior? If so, why does a git rm trigger annex assistant/webapp, but a git add does not? + + +"""]] diff --git a/doc/forum/git-annex_clients_with_only_a_gitLab_repo_and_GDrive_special_remote_in_common.mdwn b/doc/forum/git-annex_clients_with_only_a_gitLab_repo_and_GDrive_special_remote_in_common.mdwn new file mode 100644 index 0000000000..a1177cbf1d --- /dev/null +++ b/doc/forum/git-annex_clients_with_only_a_gitLab_repo_and_GDrive_special_remote_in_common.mdwn @@ -0,0 +1,20 @@ +Greetings, + +I'm setting up git-annex as the store for a darktable environment. I have to assume that all of the backup and client machines can not necessarily see each-other. Let's call them: +client1 +client2 +backup +usb (incr. backup - in case a client can't reach the gdrive) +gitlab (upstream - do I need to group this as well - if so, what) +gdrive (again, labeled as a backup so it will take everything) + +They all can reach both a gitlab repo (I know they don't support annex anymore) and a special remote for the large files (gdrive via the rclone remote to allow me to use a team drive) + +So either client1 or client2 will create or update content. git-annex assistant on the client will sync that to gitlab and gdrive (I assume). + +The question is, will git-annex assistant on backup and the other client check to see if there have been updates to the gitlab upstream? If so, how often? If not, should I git-annex schedule a git-annex-sync on each client and backup to force it? + +Thx +Christopher + + diff --git a/doc/forum/git-annex_clients_with_only_a_gitLab_repo_and_GDrive_special_remote_in_common/comment_1_10fa70e1e5e08e12ab7c1570881b38f8._comment b/doc/forum/git-annex_clients_with_only_a_gitLab_repo_and_GDrive_special_remote_in_common/comment_1_10fa70e1e5e08e12ab7c1570881b38f8._comment new file mode 100644 index 0000000000..d9d17fd266 --- /dev/null +++ b/doc/forum/git-annex_clients_with_only_a_gitLab_repo_and_GDrive_special_remote_in_common/comment_1_10fa70e1e5e08e12ab7c1570881b38f8._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2019-11-13T19:58:20Z" + content=""" +Recent versions of git-annex have a git-lfs special +remote, and gitlab does support git-lfs, so git-annex +can store data on it. That does not answer your question, just wanted to +mention it. + +For the assistant to quickly notice changes, it needs to be able to talk to +a git-annex-shell on the remote's server. As a fallback, it does poll +remotes periodically that don't have git-annex on them. That's done once +per hour, and also when network connection changes are detected. + +If you need to poll more frequently, you can install a cron job that does a +git pull. +"""]] diff --git a/doc/forum/git-annex_clients_with_only_a_gitLab_repo_and_GDrive_special_remote_in_common/comment_2_ff0dab8dc544f7aa8e4ebfbec8f2a081._comment b/doc/forum/git-annex_clients_with_only_a_gitLab_repo_and_GDrive_special_remote_in_common/comment_2_ff0dab8dc544f7aa8e4ebfbec8f2a081._comment new file mode 100644 index 0000000000..0e4a37c0ec --- /dev/null +++ b/doc/forum/git-annex_clients_with_only_a_gitLab_repo_and_GDrive_special_remote_in_common/comment_2_ff0dab8dc544f7aa8e4ebfbec8f2a081._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="git-annex@17927e6dc041ab425c14217a97a685adf3ecf44f" + nickname="git-annex" + avatar="http://cdn.libravatar.org/avatar/66e5c6e044d726597ce5a0ad68f86fe4" + subject="comment 2" + date="2019-11-14T00:38:56Z" + content=""" +Thank's Joey - that's what I needed. +"""]] diff --git a/doc/forum/lets_discuss_git_add_behavior/comment_35_0423806de951f97f156b5edea85522cc._comment b/doc/forum/lets_discuss_git_add_behavior/comment_35_0423806de951f97f156b5edea85522cc._comment new file mode 100644 index 0000000000..5b0c18a5e9 --- /dev/null +++ b/doc/forum/lets_discuss_git_add_behavior/comment_35_0423806de951f97f156b5edea85522cc._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="http://templeofcrom.duckdns.org/" + nickname="Karl" + avatar="http://cdn.libravatar.org/avatar/336975995d2c8652aa98284987d5987e90e1b4d137da415af18a8e04c29edbc3" + subject="A warning in the docs about earlier v7 revisions would be nice" + date="2019-11-08T04:59:43Z" + content=""" +I started using git-annex for the first time today and ran head-first into this bug, so I'm glad to see a course correction here. I just wish the update had been done a month earlier, as the version packaged with Fedora 30 has the 'git add' override behavior which cost me a few hours in figuring out how to get files out of the annex and into a git object. The git-annex-add wiki page really could use a couple warnings to say that 'git add' may be overridden and that in earlier v7 revisions you are basically required to use annex.largefiles with a strict filter in order to make normal use of 'git add'. +"""]] diff --git a/doc/git-annex-addurl.mdwn b/doc/git-annex-addurl.mdwn index ce7ad04eb9..224d23078a 100644 --- a/doc/git-annex-addurl.mdwn +++ b/doc/git-annex-addurl.mdwn @@ -16,8 +16,10 @@ embedded in a web page at the url, and that is added to the annex instead. See the documentation of annex.security.allowed-ip-addresses in [[git-annex]](1) for details.) -Urls to torrent files (including magnet links) will cause the content of -the torrent to be downloaded, using `aria2c`. +Special remotes can add other special handling of particular urls. For +example, the bittorrent special remotes makes urls to torrent files +(including magnet links) download the content of the torrent, +using `aria2c`. Normally the filename is based on the full url, so will look like "www.example.com_dir_subdir_bigfile". In some cases, addurl is able to diff --git a/doc/git-annex-benchmark.mdwn b/doc/git-annex-benchmark.mdwn index 66f9c65258..c81839f5de 100644 --- a/doc/git-annex-benchmark.mdwn +++ b/doc/git-annex-benchmark.mdwn @@ -4,7 +4,7 @@ git-annex benchmark - benchmark git-annex commands # SYNOPSIS -git annex benchmark [criterionopts] ( -- commmand [; command] | --databases ) +git annex benchmark [criterionopts] ( -- commmand [; command] | --databases=N ) # DESCRIPTION @@ -39,8 +39,8 @@ used. Any options that git-annex usually accepts can be included after the command to benchmark. -The --databases option benchmark's git-annex's use of sqlite databases, -instead of a command. +The --databases=N option benchmark's git-annex's use of sqlite databases, +instead of a command. N is the number of items to benchmark. # OUTPUT diff --git a/doc/git-annex-import.mdwn b/doc/git-annex-import.mdwn index 89a30e82c5..1b7239f09b 100644 --- a/doc/git-annex-import.mdwn +++ b/doc/git-annex-import.mdwn @@ -1,16 +1,16 @@ # NAME -git-annex import - add files from a non-versioned directory or a special remote +git-annex import - add a tree of files to the repository # SYNOPSIS -git annex import `[path ...]` | git annex import --from remote branch[:subdir] +git annex import --from remote branch[:subdir] | `[path ...]` # DESCRIPTION -This command is a way to import files from elsewhere into your git-annex -repository. It can import files from a directory into your repository, -or it can import files from a git-annex special remote. +This command is a way to import a tree of files from elsewhere into your +git-annex repository. It can import files from a git-annex special remote, +or from a directory. # IMPORTING FROM A SPECIAL REMOTE @@ -30,7 +30,8 @@ remote. You can only import from special remotes that were configured with `importtree=yes` when set up with [[git-annex-initremote]](1). Only some -kinds of special remotes will let you configure them this way. +kinds of special remotes will let you configure them this way. A perhaps +non-exhastive list is the directory, s3, and adb special remotes. To import from a special remote, you must specify the name of a branch. A corresponding remote tracking branch will be updated by `git annex @@ -88,6 +89,10 @@ things that depend on the key. Preferred content expressions containing When run with a path, `git annex import` moves files from somewhere outside the git working copy, and adds them to the annex. +This is a legacy interface. It is still supported, but please consider +switching to importing from a directory special remote instead, using the +interface documented above. + Individual files to import can be specified. If a directory is specified, the entire directory is imported. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index fedc6b8425..b191e3a5b0 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -146,10 +146,9 @@ subdirectories). See [[git-annex-rmurl]](1) for details. -* `import [path ...]` +* `import --from remote branch[:subdir] | [path ...]` - Add files from a non-version-controlled directory or a - special remote into the annex. + Add a tree of files to the repository. See [[git-annex-import]](1) for details. @@ -1028,6 +1027,19 @@ Like other git commands, git-annex is configured via `.git/config`. This works well in combination with annex.alwayscommit=false, to gather up a set of changes and commit them with a message you specify. +* `annex.allowsign` + + By default git-annex avoids gpg signing commits that it makes when + they're not the purpose of a command, but only a side effect. + That default avoids lots of gpg password prompts when + commit.gpgSign is set. A command like `git annex sync` or `git annex merge` + will gpg sign its commit, but a command like `git annex get`, + that updates the git-annex branch, will not. The assistant also avoids + signing commits. + + Setting annex.allowsign to true lets all commits be signed, as + controlled by commit.gpgSign and other git configuration. + * `annex.merge-annex-branches` By default, git-annex branches that have been pulled from remotes diff --git a/doc/news/version_7.20191009.mdwn b/doc/news/version_7.20191009.mdwn deleted file mode 100644 index c9647bf87b..0000000000 --- a/doc/news/version_7.20191009.mdwn +++ /dev/null @@ -1,29 +0,0 @@ -git-annex 7.20191009 released with [[!toggle text="these changes"]] -[[!toggleable text=""" - * Fix bug in handling of annex.largefiles that use largerthan/smallerthan. - When adding a modified file, it incorrectly used the file size of the - old version of the file, not the current size. - * Added --mimetype and --mimeencoding file matching options. - * Added --unlocked and --locked file matching options. - * Added adjust --lock, to enter an adjusted branch where files are locked. - * git-lfs: Added support for http basic auth. - * git-lfs: Only do endpoint discovery once when concurrency is enabled. - * fsck --incremental/--more: Fix bug that prevented the incremental fsck - information from being updated every 5 minutes as it was supposed to be; - it was only updated after 1000 files were checked, which may be more - files that are possible to fsck in a given fsck time window. - Thanks to Peter Simons for help with analysis of this bug. - * Test: Use more robust directory removal when built with directory-1.2.7. - * Close sqlite databases more robustly. - * remotedaemon: Don't list --stop in help since it's not supported. - * enable-tor: Run kdesu with -c option. - * enable-tor: Use pkexec to run command as root when gksu and kdesu are not - available. - * When dropping an unlocked file, preserve its mtime, which avoids - git status unncessarily running the clean filter on the file. - * uninit: Remove several git hooks that git-annex init sets up. - * uninit: Remove the smudge and clean filters that git-annex init sets up. - * Work around git cat-file --batch's odd stripping of carriage return - from the end of the line (some windows infection), avoiding crashing - when the repo contains a filename ending in a carriage return. - * git-annex-standalone.rpm: Fix the git-annex-shell symlink."""]] \ No newline at end of file diff --git a/doc/news/version_7.20191114.mdwn b/doc/news/version_7.20191114.mdwn new file mode 100644 index 0000000000..6dfb3401e3 --- /dev/null +++ b/doc/news/version_7.20191114.mdwn @@ -0,0 +1,10 @@ +git-annex 7.20191114 released with [[!toggle text="these changes"]] +[[!toggleable text=""" + * Added annex.allowsign option. + * Make --json-error-messages capture more errors, + particularly url download errors. + * Fix a crash (STM deadlock) when -J is used with multiple files + that point to the same key. + * linuxstandalone: Fix a regression that broke git-remote-https. + * OSX git-annex.app: Fix a problem that prevented using the bundled + git-remote-https, git-remote-http, and git-shell."""]] \ No newline at end of file diff --git a/doc/security/comment_3_0cc44e032fc94f4d374fb297ca9a8f2d._comment b/doc/security/comment_3_0cc44e032fc94f4d374fb297ca9a8f2d._comment new file mode 100644 index 0000000000..c94ab38953 --- /dev/null +++ b/doc/security/comment_3_0cc44e032fc94f4d374fb297ca9a8f2d._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="Another use case -- http_proxy" + date="2019-11-12T17:51:25Z" + content=""" +On some institutional servers they mandate for all http traffic to go through proxy. In our case `http_proxy` looked like `http://ptn07-e0:3128`. +`datalad install` worked out but an attempt to `datalad get` a bunch of files resulted in massive list of errors, and `annex-ignore` being set for \"origin\" which is otherwise available. +We managed to fish out that warning about security schemes and http_proxy being ignored for that reason in one of the subsequent attempts (after unsetting annex-ignore). +Upon attempts to set that variable we realized that we had to provide IP instead of `http_proxy` full value or just a name (`ptn07-e0`), netmasked address (e.g. 10.0.0.1/24) also didn't work. That makes it really inflexible. Actual IP could change, without http_proxy being changed. Even the value of http_proxy could change system wide. It would be painful to require our users to adjust for that every time, and it is infeasible to demand sysadmins to somehow tune up their configuration across HPC (we have no direct connection to them). If we could at least whitelist private network -- that would provide some remedy. Regular expression, though indeed not a really security-friendly solution, could have also provided remedy. +Have we missed some already existing way to make our lives easy on that system? +"""]] diff --git a/doc/special_remotes/git-lfs.mdwn b/doc/special_remotes/git-lfs.mdwn index e48a76cf4f..00e0d29405 100644 --- a/doc/special_remotes/git-lfs.mdwn +++ b/doc/special_remotes/git-lfs.mdwn @@ -10,13 +10,6 @@ the git-lfs special remote: * `url` - Required. The url to the git-lfs repository to use. Can be either a ssh url (scp-style is also accepted) or a http url. - But currently, a http url accesses the git-lfs repository without - authentication. To authenticate, you will need to use a ssh url. - - This parameter needs to be specified in the initial `git annex - initremote` but also each time you `git annex enableremote` - an existing git-lfs special remote. It's fine to use different urls - at different times as long as they point to the same git-lfs repository. * `encryption` - One of "none", "hybrid", "shared", or "pubkey". Required. See [[encryption]]. Also see the encryption notes below. diff --git a/doc/tips/storing_data_in_git-lfs.mdwn b/doc/tips/storing_data_in_git-lfs.mdwn index 65dc26cf94..a5b504d994 100644 --- a/doc/tips/storing_data_in_git-lfs.mdwn +++ b/doc/tips/storing_data_in_git-lfs.mdwn @@ -4,9 +4,11 @@ repositories, using the [[git-lfs special remote|special_remotes/git-lfs]]. You do not need the git-lfs program installed to use it, just a recent enough version of git-annex. +## getting started + Here's how to initialize a git-lfs special remote on Github. - git annex initremote lfs type=git-lfs encryption=none url=git@github.com:yourname/yourrepo.git + git annex initremote lfs type=git-lfs encryption=none url=https://github.com/yourname/yourrepo In this example, the remote will not be encrypted, so anyone who can access it can see its contents. It is possible to encrypt everything stored in a @@ -24,8 +26,44 @@ because the protocol does not support deletion. A git-lfs special remote also functions as a regular git remote. You can use things like `git push` and `git pull` with it. -To enable an existing git-lfs remote in another clone of the repository, -you'll need to provide an url to it again. It's ok to provide a different -url as long as it points to the same git-lfs repository. +## enabling existing git-lfs special remotes - git annex enableremote lfs url=https://github.com/yourname/yourrepo.git +There are two different ways to enable a git-lfs special +remote in another clone of the repository. + +Of course, you can use `git annex enableremote` to enable a git-lfs special +remote, the same as you would enable any other special remote. +Eg, for the "lfs" remote initialized above: + + git annex enableremote lfs + +But perhaps more simply, if git-annex sees a git remote that matches +the url that was provided to initremote earlier, it will *automatically* +enable that git remote as a git-lfs special remote. + +So you can just git clone from the url, and the "origin" remote will be +automatically used as a git-lfs special remote. + + git clone https://github.com/yourname/yourrepo + cd yourrepo + git-annex get --from origin + +Nice and simple, and much the same as git-annex handles its regular +remotes. + +(Note that git-annex versions 7.20191115 and older didn't remember the url +privided to initremote, so you'll need to pass the url= parameter +to enableremote in that case. Newer versions of git-annex will then +remember the url.) + +## multiple urls + +Often there are multiple urls that can access the same git repository. +You can set up git-lfs remotes for each url. For example, +to add a remote accessing the github repository over ssh: + + git annex initremote lfs-http --sameas=lfs url=git@github.com:yourname/yourrepo.git + +The `--sameas` parameter tells git-annex that this is the same as the "lfs" +repository, so it will understand that anything it stores in one remote can +be accessed also with the other remote. diff --git a/doc/todo/add_import_--to_command/comment_4_4a30627ac78b32911604c3377b958cd0._comment b/doc/todo/add_import_--to_command/comment_4_4a30627ac78b32911604c3377b958cd0._comment new file mode 100644 index 0000000000..20d03ae2c1 --- /dev/null +++ b/doc/todo/add_import_--to_command/comment_4_4a30627ac78b32911604c3377b958cd0._comment @@ -0,0 +1,40 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2019-11-19T16:58:06Z" + content=""" +Since git-annex does now support importtree from directory special remotes, +you can almost get what you said you want by: + + git annex initremote usb-drive type=directory directory=/mnt/usb-drive/myfiles \ + exporttree=yes importtree=yes encryption=none + +Then `git annex import master --from usb-drive` will import the files +into a usb-drive/master branch that you can merge. And you can run it +repeatedly to import new and changed files from the directory. + +So then you have the files sitting in a special remote like you wanted. +Namely the directory special remote on the USB drive. Only problem is that +importing the files does also copy them into the git-annex repo. So you'd +have to drop the files again, assuming you had disk space for them all +to begin with. + +I wonder, if it were possible to import the files without add their content +to the repo you ran the import from, leaving them on the special remote, +would that meet your use case? That seems like something it would be +possible to add. + +It would still probably have to copy the file into the local repo, in order +to hash it, and then just delete the content from the local repo. Of course +when the file is in a directory on the local system, that's not strictly +necessary; it could do the hashing of the file in place. But that would +need an extension to the special remote API to hash a file. + +But like I said in my other comment, I'd just clone my git-annex repo onto the +drive and add the files to the repo there. Avoids all this complication. +You'd need to provide a good justification for why you can't do that for +me to pursue this any further. + +(As far as adding a --to switch to import, [[transitive_transfers]] +discusses this kind of thing, and some issues with implementing that.) +"""]] diff --git a/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default.mdwn b/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default.mdwn new file mode 100644 index 0000000000..4fc5706e21 --- /dev/null +++ b/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default.mdwn @@ -0,0 +1,5 @@ +[[git-annex-import]] by default deletes the original files. Keeping them by default would be better. "import" in many other tools (e.g. the bioinformatics tool [Geneious](https://www.geneious.com/)) means a non-destructive import. The short description of `git-annex-import` on its man page says it "adds" files to the repo, which does not suggest erasure. When I first used `git-annex-import`, I was surprised by the default behavior, and others may be too. Also, the command has now been "overloaded" for importing from a special remote, and in that mode the originals are not erased; giving the import-from-dir mode the same default would be more consistent. In general, erasing data by default seems dangerous: what if it was being imported into a temporary or untrusted repo? + +Changing the default would also let one [[repeatedly re-import a directory while keeping original files in place|bugs/impossible__40____63____41___to_continuously_re-import_a_directory_while_keeping_original_files_in_place]]. + +I realize this would be a breaking change for some workflows; warning of it [[like git does|todo/warn_of_breaking_changes_same_way_git_does]] would mitigate the breakage. diff --git a/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default/comment_1_4eb794daaeef843b104bd480e11f7b42._comment b/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default/comment_1_4eb794daaeef843b104bd480e11f7b42._comment new file mode 100644 index 0000000000..7e10050360 --- /dev/null +++ b/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default/comment_1_4eb794daaeef843b104bd480e11f7b42._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2019-11-19T17:09:00Z" + content=""" +My general feeling about git-annex import is that everything not involving +importing from a special remote should be deprecated and eventually +removed. + +The --duplicate option probably does what you want, but if the interface is +going to be changed, such as making that the default, I'd rather the +interface change move toward the goal of deprecating the old mode. + +The fundamental mistake that the legacy interface made is it conflated +copying content into the repository, dropping content from the directory, +and updating the working tree. The new interface decouples all 3, +only doing the first, and updating a tracking branch, which the user is then +free to merge as-is, or otherwise modify before merging. Dropping requires +an export of a new tree, which is the main pain point in emulating +the old interface, but you happen to not want to drop the content from the +directory, so that pain point shouldn't affect you. +"""]] diff --git a/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default/comment_2_f2d436822490e74544bf58a4f1c9ee79._comment b/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default/comment_2_f2d436822490e74544bf58a4f1c9ee79._comment new file mode 100644 index 0000000000..c4c86551d8 --- /dev/null +++ b/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default/comment_2_f2d436822490e74544bf58a4f1c9ee79._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="import/export" + date="2019-11-19T17:39:47Z" + content=""" +Makes sense. It's certainly better if import/export did complementary things. Maybe, move the old `git-annex-import` functionality to a new command called `git-annex-ingest`? + +\"everything not involving importing from a special remote should be deprecated\" -- i.e. to ingest a directory you'd first create a directory special remote for it, and then `git-annex-import` from that? +"""]] diff --git a/doc/todo/documenting_sqlite_database_schemas.mdwn b/doc/todo/documenting_sqlite_database_schemas.mdwn new file mode 100644 index 0000000000..5a67c19212 --- /dev/null +++ b/doc/todo/documenting_sqlite_database_schemas.mdwn @@ -0,0 +1,3 @@ +If a spec of the [[sqlite database schemas|todo/sqlite_database_improvements]] could be added to the [[internals]] docs, this would open some possibilities for third-party tools based on this info. E.g. one could write some sqlite3 queries to get aggregate info on the number (and total size?) of keys present in specific combinations of repos. It would of course be understood that this is internal info subject to frequent change. + +Also, if [[Sometimes the databases are used for data that has not yet been committed to git|devblog/day_607__v8_is_done]], this would improve [[future_proofing]]. diff --git a/doc/todo/git-annex-export_--from_option.mdwn b/doc/todo/git-annex-export_--from_option.mdwn new file mode 100644 index 0000000000..c9d07ee9d9 --- /dev/null +++ b/doc/todo/git-annex-export_--from_option.mdwn @@ -0,0 +1,7 @@ +I just wanted to have a way to manage data copying / syncing between a fileserver and my android phone. So I pushed some files on my fileserver into a git remote and added the files with the annex subcommands then cloned the git tree from my workstation which is connected to my smartphone. + +Now I followed the documentation about the special remote adb and created that remote with the initremote command. When I then export I get (not available) failed errors. + +Which is caused by the fact that I didn't have checked out the files on my workstation. I don't need the files on this pc so it would be stupid to checkout partially huge files there or in other words I don't need the files at that place, I don't get why the export command not has a --from option where it can get the files? + +Is there a reason that does not exist and if so what would be a way to do sending files to the android device without ssh-ing into my server? diff --git a/doc/todo/git-annex-export_--from_option/comment_1_10f107aa0094d5ee4886878f5b1aaf06._comment b/doc/todo/git-annex-export_--from_option/comment_1_10f107aa0094d5ee4886878f5b1aaf06._comment new file mode 100644 index 0000000000..2f536eacde --- /dev/null +++ b/doc/todo/git-annex-export_--from_option/comment_1_10f107aa0094d5ee4886878f5b1aaf06._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2019-11-19T16:52:00Z" + content=""" +`git annex export --from` would be basically the same as +[[transitive_transfers]] and the comments there detail +the problems with trying to support that. + +What you can do is download the files onto the computer that is connected +to the phone, export them to the phone, and then drop the files from the +computer. +"""]] diff --git a/doc/todo/git-lfs_special_remote_simpler_setup.mdwn b/doc/todo/git-lfs_special_remote_simpler_setup.mdwn index ee0a55cdff..ebaf25bf21 100644 --- a/doc/todo/git-lfs_special_remote_simpler_setup.mdwn +++ b/doc/todo/git-lfs_special_remote_simpler_setup.mdwn @@ -20,5 +20,8 @@ obvious two or three. Now that `initremote --sameas` special remotes can be initialized for all the urls. The user would need to do that themselves probably. -[[!tag projects/dandi]] +> [[done]], the url is stored, and when there's a remote that has an url +> that's known to be to a git-lfs repo, that remote is automatically +> enabled to be used as a git-lfs special remote. --[[Joey]] +[[!tag projects/dandi]] diff --git a/doc/todo/globus_special_remote_as_a___34__transport__34___layer/comment_3_43107f1bec1ad141af2e97f715a70fe9._comment b/doc/todo/globus_special_remote_as_a___34__transport__34___layer/comment_3_43107f1bec1ad141af2e97f715a70fe9._comment new file mode 100644 index 0000000000..a9b7c3ebb9 --- /dev/null +++ b/doc/todo/globus_special_remote_as_a___34__transport__34___layer/comment_3_43107f1bec1ad141af2e97f715a70fe9._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="RFC: how would it work for regular git remote + special remote" + date="2019-11-08T19:19:18Z" + content=""" +Added recently `--sameas` functionality provides support at the \"UUID logistics\" level, and examples in the comments exercise it for two external remotes (rsync + directory) with the same layout of annex objects. +The original use case I am pursuing is for a regular git repository (e.g. non-bare) with \"git repository\" layout of the store (i.e. under `.git/annex/objects/`) use a special remote primarily as a transport mechanism. In our case it will be `globus`. I really doubt it would work \"out of the box\" since AFAIK any special remote has only two possible ideas about layout of objects: its regular \"special remote layout\" (e.g. a flat list of keys or with some hash directories) or exported (such as a file tree). Only in case of `git` special remote layout would be the same, but otherwise special remote layout would be different, and \"export\" wouldn't really be the one desired (especially for placing files to the remote). +So it seems that the only way to accomplish my mission would be to implement in the `globus` custom special remote the support of additional layout by parametrizing special remote upon initremote with e.g. `layout=local`, which would lookup location for the key in the local repository (under `.git/annex/objects`), and use it as the path for the key on the remote. + +Is that a correct idea Joey? or you see a better way? +"""]] diff --git a/doc/todo/globus_special_remote_as_a___34__transport__34___layer/comment_4_3bea0473d4805b1ef56b955ba30166e8._comment b/doc/todo/globus_special_remote_as_a___34__transport__34___layer/comment_4_3bea0473d4805b1ef56b955ba30166e8._comment new file mode 100644 index 0000000000..63877c943e --- /dev/null +++ b/doc/todo/globus_special_remote_as_a___34__transport__34___layer/comment_4_3bea0473d4805b1ef56b955ba30166e8._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2019-11-12T20:34:23Z" + content=""" +If the repository being accessed over globus uses .git/annex/objects/ +locations, it sounds to me like it's a git-annex repo, being accessed over +a protocol other than ssh. A special remote that accesses remote annex +objects could be created, and --sameas used to make the special remote have +the same uuid as the (remote) git-annex repo. +"""]] diff --git a/doc/todo/globus_special_remote_as_a___34__transport__34___layer/comment_5_28aabb525a1a487eaecdfe591cc7108b._comment b/doc/todo/globus_special_remote_as_a___34__transport__34___layer/comment_5_28aabb525a1a487eaecdfe591cc7108b._comment new file mode 100644 index 0000000000..329cd3c40e --- /dev/null +++ b/doc/todo/globus_special_remote_as_a___34__transport__34___layer/comment_5_28aabb525a1a487eaecdfe591cc7108b._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 5" + date="2019-11-13T04:41:56Z" + content=""" +> If the repository being accessed over globus uses .git/annex/objects/ locations, it sounds to me like it's a git-annex repo, being accessed over a protocol other than ssh. + +That is correct. + +> A special remote that accesses remote annex objects could be created, and --sameas used to make the special remote have the same uuid as the (remote) git-annex repo. + +That is correct too. The question is either it should be a dedicated `git-annex-remote-globus-gitannex` special remote which would need to probably use the same functionality of a `git-annex-remote-globus` for actual authentication and interaction with globus (with difference largely in paths to assume) or just an option to the `git-annex-remote-globus`...? +"""]] diff --git a/doc/todo/remove_legacy_import_directory_interface.mdwn b/doc/todo/remove_legacy_import_directory_interface.mdwn new file mode 100644 index 0000000000..d8f8a316c5 --- /dev/null +++ b/doc/todo/remove_legacy_import_directory_interface.mdwn @@ -0,0 +1,47 @@ +The old `git annex import /dir` interface should be removed, in favor of +the new import from special remote interface, which can be used with a +directory special remote to do the same kind of operation. + +There have always been complaints about the old interface being surprising +and/or not doing quite what some users want. +Tried to find a principled way to address some of that with the "duplicate" +options, but users just complain they're confusing (which they certianly +are) and don't quite do what they want. + +The fundamental mistake that the old interface made is it conflated +copying content into the repository, deleting content from the directory, +and updating the working tree. The new interface decouples all 3, +only doing the first, and updating a tracking branch. The user is then free +to merge the tracking branch as-is, or otherwise modify before merging. +There are some options to manipulate the tracking branch in commonly +wanted ways, which just boil down do git branch manipulation. Less common +desires can be handled using all of git's facilities. As for deleting from +the directory, that's an export of a branch, which can just be an empty +branch if they want to delete everything, or again they can use all of git +to construct the branch with the changes they desire. + +So while it's not been used as much as the old interface, I think the new +interface will be much more flexible to the varied needs of users. What's +less clear is if it can well support every way that the old interface can +be used. + +Of course the first pain point is that the user has to set up a directory +special remote. Which may be annoying if they are importing from a variety +of different directories ad-hoc. + +Another likely pain point is ad-hoc importing of individual files or +files matched by wildcard. The new interface is much more about importing +whole trees, perhaps configured by preferred content settings + +One approach would be to make the old interface be implemented using the +new interface, and paper over the cracks, by eg setting up a directory +special remote automatically. + +Or, the old interface could warn when used, linking to some documentation +about how to accomplish the same tasks with the new interface. + +Either could be done incrementally, eg start with the most common import +cases, convert to the new interface, and keep others using the old +interface. + +--[[Joey]] diff --git a/doc/todo/split_off_clean__47__smudge_filter__63__/comment_7_9817fd2cbb46c97124cd35a7a2a9c4bd._comment b/doc/todo/split_off_clean__47__smudge_filter__63__/comment_7_9817fd2cbb46c97124cd35a7a2a9c4bd._comment new file mode 100644 index 0000000000..1b05c62bac --- /dev/null +++ b/doc/todo/split_off_clean__47__smudge_filter__63__/comment_7_9817fd2cbb46c97124cd35a7a2a9c4bd._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="limiting clean/smudge filter to unlocked files" + date="2019-11-08T19:48:11Z" + content=""" +\"Each new invokation of git-annex has to re-open databases, start up git cat-file to query from, link the executable, read git config, etc. That takes a few hundred milliseconds.\" -- this is somewhat more of an issue now that all `git add/checkout` operations call the clean/smudge filter, even when there are no unlocked files. One option is to [[only configure the filters for unlocked files|todo/only_pass_unlocked_files_through_the_clean__47__smudge_filter]] when only a few files are unlocked. +"""]] diff --git a/doc/todo/sync_git-lfs_special_remote_should_sync_git_too.mdwn b/doc/todo/sync_git-lfs_special_remote_should_sync_git_too.mdwn new file mode 100644 index 0000000000..ebefc2c59f --- /dev/null +++ b/doc/todo/sync_git-lfs_special_remote_should_sync_git_too.mdwn @@ -0,0 +1,5 @@ +git annex sync with a git-lfs special remote does not pull or push. +It should. +--[[Joey]] + +> [[fixed|done]] --[[Joey]] diff --git a/git-annex.cabal b/git-annex.cabal index 89d3ab7b20..01527c1c66 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -1,5 +1,5 @@ Name: git-annex -Version: 8.20191107 +Version: 8.20191121 Cabal-Version: >= 1.8 License: AGPL-3 Maintainer: Joey Hess diff --git a/standalone/osx/git-annex.app/Contents/MacOS/runshell b/standalone/osx/git-annex.app/Contents/MacOS/runshell index 56d8349f28..557c59e224 100755 --- a/standalone/osx/git-annex.app/Contents/MacOS/runshell +++ b/standalone/osx/git-annex.app/Contents/MacOS/runshell @@ -69,7 +69,7 @@ export PATH ORIG_GIT_EXEC_PATH="$GIT_EXEC_PATH" export ORIG_GIT_EXEC_PATH -GIT_EXEC_PATH="$bundle" +GIT_EXEC_PATH="$bundle/git-core" export GIT_EXEC_PATH ORIG_GIT_TEMPLATE_DIR="$GIT_TEMPLATE_DIR"